LPS chunk sizes should be 16 bytes, not 17.
[packages/old-time.git] / Data / ByteString / Fusion.hs
1 {-# OPTIONS_GHC -cpp -fglasgow-exts -fno-warn-orphans #-}
2 -- |
3 -- Module : Data.ByteString.Fusion
4 -- License : BSD-style
5 -- Maintainer : dons@cse.unsw.edu.au
6 -- Stability : experimental
7 -- Portability : portable
8 --
9 -- Functional array fusion for ByteStrings.
10 --
11 -- Originally based on code from the Data Parallel Haskell project,
12 -- <http://www.cse.unsw.edu.au/~chak/project/dph>
13 --
14
15 -- #hide
16 module Data.ByteString.Fusion (
17
18 -- * Fusion utilities
19 loopU, loopL, fuseEFL,
20 NoAcc(NoAcc), loopArr, loopAcc, loopSndAcc, unSP,
21 mapEFL, filterEFL, foldEFL, foldEFL', scanEFL, mapAccumEFL, mapIndexEFL,
22
23 -- ** Alternative Fusion stuff
24 -- | This replaces 'loopU' with 'loopUp'
25 -- and adds several further special cases of loops.
26 loopUp, loopDown, loopNoAcc, loopMap, loopFilter,
27 loopWrapper, sequenceLoops,
28 doUpLoop, doDownLoop, doNoAccLoop, doMapLoop, doFilterLoop,
29
30 -- | These are the special fusion cases for combining each loop form perfectly.
31 fuseAccAccEFL, fuseAccNoAccEFL, fuseNoAccAccEFL, fuseNoAccNoAccEFL,
32 fuseMapAccEFL, fuseAccMapEFL, fuseMapNoAccEFL, fuseNoAccMapEFL,
33 fuseMapMapEFL, fuseAccFilterEFL, fuseFilterAccEFL, fuseNoAccFilterEFL,
34 fuseFilterNoAccEFL, fuseFilterFilterEFL, fuseMapFilterEFL, fuseFilterMapEFL,
35
36 -- * Strict pairs and sums
37 PairS(..), MaybeS(..)
38
39 ) where
40
41 import Data.ByteString.Base
42
43 import Foreign.ForeignPtr
44 import Foreign.Ptr
45 import Foreign.Storable (Storable(..))
46
47 import Data.Word (Word8)
48 import System.IO.Unsafe (unsafePerformIO)
49
50 -- -----------------------------------------------------------------------------
51 --
52 -- Useful macros, until we have bang patterns
53 --
54
55 #define STRICT1(f) f a | a `seq` False = undefined
56 #define STRICT2(f) f a b | a `seq` b `seq` False = undefined
57 #define STRICT3(f) f a b c | a `seq` b `seq` c `seq` False = undefined
58 #define STRICT4(f) f a b c d | a `seq` b `seq` c `seq` d `seq` False = undefined
59 #define STRICT5(f) f a b c d e | a `seq` b `seq` c `seq` d `seq` e `seq` False = undefined
60
61 infixl 2 :*:
62
63 -- |Strict pair
64 data PairS a b = !a :*: !b deriving (Eq,Ord,Show)
65
66 -- |Strict Maybe
67 data MaybeS a = NothingS | JustS !a deriving (Eq,Ord,Show)
68
69 -- |Data type for accumulators which can be ignored. The rewrite rules rely on
70 -- the fact that no bottoms of this type are ever constructed; hence, we can
71 -- assume @(_ :: NoAcc) `seq` x = x@.
72 --
73 data NoAcc = NoAcc
74
75 -- |Type of loop functions
76 type AccEFL acc = acc -> Word8 -> (PairS acc (MaybeS Word8))
77 type NoAccEFL = Word8 -> MaybeS Word8
78 type MapEFL = Word8 -> Word8
79 type FilterEFL = Word8 -> Bool
80
81 infixr 9 `fuseEFL`
82
83 -- |Fuse to flat loop functions
84 fuseEFL :: AccEFL acc1 -> AccEFL acc2 -> AccEFL (PairS acc1 acc2)
85 fuseEFL f g (acc1 :*: acc2) e1 =
86 case f acc1 e1 of
87 acc1' :*: NothingS -> (acc1' :*: acc2) :*: NothingS
88 acc1' :*: JustS e2 ->
89 case g acc2 e2 of
90 acc2' :*: res -> (acc1' :*: acc2') :*: res
91 #if defined(__GLASGOW_HASKELL__)
92 {-# INLINE [1] fuseEFL #-}
93 #endif
94
95 -- | Special forms of loop arguments
96 --
97 -- * These are common special cases for the three function arguments of gen
98 -- and loop; we give them special names to make it easier to trigger RULES
99 -- applying in the special cases represented by these arguments. The
100 -- "INLINE [1]" makes sure that these functions are only inlined in the last
101 -- two simplifier phases.
102 --
103 -- * In the case where the accumulator is not needed, it is better to always
104 -- explicitly return a value `()', rather than just copy the input to the
105 -- output, as the former gives GHC better local information.
106 --
107
108 -- | Element function expressing a mapping only
109 #if !defined(LOOPNOACC_FUSION)
110 mapEFL :: (Word8 -> Word8) -> AccEFL NoAcc
111 mapEFL f = \_ e -> (NoAcc :*: (JustS $ f e))
112 #else
113 mapEFL :: (Word8 -> Word8) -> NoAccEFL
114 mapEFL f = \e -> JustS (f e)
115 #endif
116 #if defined(__GLASGOW_HASKELL__)
117 {-# INLINE [1] mapEFL #-}
118 #endif
119
120 -- | Element function implementing a filter function only
121 #if !defined(LOOPNOACC_FUSION)
122 filterEFL :: (Word8 -> Bool) -> AccEFL NoAcc
123 filterEFL p = \_ e -> if p e then (NoAcc :*: JustS e) else (NoAcc :*: NothingS)
124 #else
125 filterEFL :: (Word8 -> Bool) -> NoAccEFL
126 filterEFL p = \e -> if p e then JustS e else NothingS
127 #endif
128
129 #if defined(__GLASGOW_HASKELL__)
130 {-# INLINE [1] filterEFL #-}
131 #endif
132
133 -- |Element function expressing a reduction only
134 foldEFL :: (acc -> Word8 -> acc) -> AccEFL acc
135 foldEFL f = \a e -> (f a e :*: NothingS)
136 #if defined(__GLASGOW_HASKELL__)
137 {-# INLINE [1] foldEFL #-}
138 #endif
139
140 -- | A strict foldEFL.
141 foldEFL' :: (acc -> Word8 -> acc) -> AccEFL acc
142 foldEFL' f = \a e -> let a' = f a e in a' `seq` (a' :*: NothingS)
143 #if defined(__GLASGOW_HASKELL__)
144 {-# INLINE [1] foldEFL' #-}
145 #endif
146
147 -- | Element function expressing a prefix reduction only
148 --
149 scanEFL :: (Word8 -> Word8 -> Word8) -> AccEFL Word8
150 scanEFL f = \a e -> (f a e :*: JustS a)
151 #if defined(__GLASGOW_HASKELL__)
152 {-# INLINE [1] scanEFL #-}
153 #endif
154
155 -- | Element function implementing a map and fold
156 --
157 mapAccumEFL :: (acc -> Word8 -> (acc, Word8)) -> AccEFL acc
158 mapAccumEFL f = \a e -> case f a e of (a', e') -> (a' :*: JustS e')
159 #if defined(__GLASGOW_HASKELL__)
160 {-# INLINE [1] mapAccumEFL #-}
161 #endif
162
163 -- | Element function implementing a map with index
164 --
165 mapIndexEFL :: (Int -> Word8 -> Word8) -> AccEFL Int
166 mapIndexEFL f = \i e -> let i' = i+1 in i' `seq` (i' :*: JustS (f i e))
167 #if defined(__GLASGOW_HASKELL__)
168 {-# INLINE [1] mapIndexEFL #-}
169 #endif
170
171 -- | Projection functions that are fusion friendly (as in, we determine when
172 -- they are inlined)
173 loopArr :: (PairS acc arr) -> arr
174 loopArr (_ :*: arr) = arr
175 #if defined(__GLASGOW_HASKELL__)
176 {-# INLINE [1] loopArr #-}
177 #endif
178
179 loopAcc :: (PairS acc arr) -> acc
180 loopAcc (acc :*: _) = acc
181 #if defined(__GLASGOW_HASKELL__)
182 {-# INLINE [1] loopAcc #-}
183 #endif
184
185 loopSndAcc :: (PairS (PairS acc1 acc2) arr) -> (PairS acc2 arr)
186 loopSndAcc ((_ :*: acc) :*: arr) = (acc :*: arr)
187 #if defined(__GLASGOW_HASKELL__)
188 {-# INLINE [1] loopSndAcc #-}
189 #endif
190
191 unSP :: (PairS acc arr) -> (acc, arr)
192 unSP (acc :*: arr) = (acc, arr)
193 #if defined(__GLASGOW_HASKELL__)
194 {-# INLINE [1] unSP #-}
195 #endif
196
197 ------------------------------------------------------------------------
198 --
199 -- Loop combinator and fusion rules for flat arrays
200 -- |Iteration over over ByteStrings
201
202 -- | Iteration over over ByteStrings
203 loopU :: AccEFL acc -- ^ mapping & folding, once per elem
204 -> acc -- ^ initial acc value
205 -> ByteString -- ^ input ByteString
206 -> (PairS acc ByteString)
207
208 loopU f start (PS z s i) = unsafePerformIO $ withForeignPtr z $ \a -> do
209 (ps, acc) <- createAndTrim' i $ \p -> do
210 (acc' :*: i') <- go (a `plusPtr` s) p start
211 return (0, i', acc')
212 return (acc :*: ps)
213
214 where
215 go p ma = trans 0 0
216 where
217 STRICT3(trans)
218 trans a_off ma_off acc
219 | a_off >= i = return (acc :*: ma_off)
220 | otherwise = do
221 x <- peekByteOff p a_off
222 let (acc' :*: oe) = f acc x
223 ma_off' <- case oe of
224 NothingS -> return ma_off
225 JustS e -> do pokeByteOff ma ma_off e
226 return $ ma_off + 1
227 trans (a_off+1) ma_off' acc'
228
229 #if defined(__GLASGOW_HASKELL__)
230 {-# INLINE [1] loopU #-}
231 #endif
232
233 {-# RULES
234
235 "FPS loop/loop fusion!" forall em1 em2 start1 start2 arr.
236 loopU em2 start2 (loopArr (loopU em1 start1 arr)) =
237 loopSndAcc (loopU (em1 `fuseEFL` em2) (start1 :*: start2) arr)
238
239 #-}
240
241 --
242 -- Functional list/array fusion for lazy ByteStrings.
243 --
244 loopL :: AccEFL acc -- ^ mapping & folding, once per elem
245 -> acc -- ^ initial acc value
246 -> [ByteString] -- ^ input ByteString
247 -> PairS acc [ByteString]
248 loopL f = loop
249 where loop s [] = (s :*: [])
250 loop s (x:xs)
251 | l == 0 = (s'' :*: ys)
252 | otherwise = (s'' :*: y:ys)
253 where (s' :*: y@(PS _ _ l)) = loopU f s x -- avoid circular dep on P.null
254 (s'' :*: ys) = loop s' xs
255
256 #if defined(__GLASGOW_HASKELL__)
257 {-# INLINE [1] loopL #-}
258 #endif
259
260 {-# RULES
261
262 "FPS lazy loop/loop fusion!" forall em1 em2 start1 start2 arr.
263 loopL em2 start2 (loopArr (loopL em1 start1 arr)) =
264 loopSndAcc (loopL (em1 `fuseEFL` em2) (start1 :*: start2) arr)
265
266 #-}
267
268
269 {-
270
271 Alternate experimental formulation of loopU which partitions it into
272 an allocating wrapper and an imperitive array-mutating loop.
273
274 The point in doing this split is that we might be able to fuse multiple
275 loops into a single wrapper. This would save reallocating another buffer.
276 It should also give better cache locality by reusing the buffer.
277
278 Note that this stuff needs ghc-6.5 from May 26 or later for the RULES to
279 really work reliably.
280
281 -}
282
283 loopUp :: AccEFL acc -> acc -> ByteString -> PairS acc ByteString
284 loopUp f a arr = loopWrapper (doUpLoop f a) arr
285 {-# INLINE loopUp #-}
286
287 loopDown :: AccEFL acc -> acc -> ByteString -> PairS acc ByteString
288 loopDown f a arr = loopWrapper (doDownLoop f a) arr
289 {-# INLINE loopDown #-}
290
291 loopNoAcc :: NoAccEFL -> ByteString -> PairS NoAcc ByteString
292 loopNoAcc f arr = loopWrapper (doNoAccLoop f NoAcc) arr
293 {-# INLINE loopNoAcc #-}
294
295 loopMap :: MapEFL -> ByteString -> PairS NoAcc ByteString
296 loopMap f arr = loopWrapper (doMapLoop f NoAcc) arr
297 {-# INLINE loopMap #-}
298
299 loopFilter :: FilterEFL -> ByteString -> PairS NoAcc ByteString
300 loopFilter f arr = loopWrapper (doFilterLoop f NoAcc) arr
301 {-# INLINE loopFilter #-}
302
303 -- The type of imperitive loops that fill in a destination array by
304 -- reading a source array. They may not fill in the whole of the dest
305 -- array if the loop is behaving as a filter, this is why we return
306 -- the length that was filled in. The loop may also accumulate some
307 -- value as it loops over the source array.
308 --
309 type ImperativeLoop acc =
310 Ptr Word8 -- pointer to the start of the source byte array
311 -> Ptr Word8 -- pointer to ther start of the destination byte array
312 -> Int -- length of the source byte array
313 -> IO (PairS (PairS acc Int) Int) -- result and offset, length of dest that was filled
314
315 loopWrapper :: ImperativeLoop acc -> ByteString -> PairS acc ByteString
316 loopWrapper body (PS srcFPtr srcOffset srcLen) = unsafePerformIO $
317 withForeignPtr srcFPtr $ \srcPtr -> do
318 (ps, acc) <- createAndTrim' srcLen $ \destPtr -> do
319 (acc :*: destOffset :*: destLen) <-
320 body (srcPtr `plusPtr` srcOffset) destPtr srcLen
321 return (destOffset, destLen, acc)
322 return (acc :*: ps)
323
324 doUpLoop :: AccEFL acc -> acc -> ImperativeLoop acc
325 doUpLoop f acc0 src dest len = loop 0 0 acc0
326 where STRICT3(loop)
327 loop src_off dest_off acc
328 | src_off >= len = return (acc :*: 0 :*: dest_off)
329 | otherwise = do
330 x <- peekByteOff src src_off
331 case f acc x of
332 (acc' :*: NothingS) -> loop (src_off+1) dest_off acc'
333 (acc' :*: JustS x') -> pokeByteOff dest dest_off x'
334 >> loop (src_off+1) (dest_off+1) acc'
335
336 doDownLoop :: AccEFL acc -> acc -> ImperativeLoop acc
337 doDownLoop f acc0 src dest len = loop (len-1) (len-1) acc0
338 where STRICT3(loop)
339 loop src_off dest_off acc
340 | src_off < 0 = return (acc :*: dest_off + 1 :*: len - (dest_off + 1))
341 | otherwise = do
342 x <- peekByteOff src src_off
343 case f acc x of
344 (acc' :*: NothingS) -> loop (src_off-1) dest_off acc'
345 (acc' :*: JustS x') -> pokeByteOff dest dest_off x'
346 >> loop (src_off-1) (dest_off-1) acc'
347
348 doNoAccLoop :: NoAccEFL -> noAcc -> ImperativeLoop noAcc
349 doNoAccLoop f noAcc src dest len = loop 0 0
350 where STRICT2(loop)
351 loop src_off dest_off
352 | src_off >= len = return (noAcc :*: 0 :*: dest_off)
353 | otherwise = do
354 x <- peekByteOff src src_off
355 case f x of
356 NothingS -> loop (src_off+1) dest_off
357 JustS x' -> pokeByteOff dest dest_off x'
358 >> loop (src_off+1) (dest_off+1)
359
360 doMapLoop :: MapEFL -> noAcc -> ImperativeLoop noAcc
361 doMapLoop f noAcc src dest len = loop 0
362 where STRICT1(loop)
363 loop n
364 | n >= len = return (noAcc :*: 0 :*: len)
365 | otherwise = do
366 x <- peekByteOff src n
367 pokeByteOff dest n (f x)
368 loop (n+1) -- offset always the same, only pass 1 arg
369
370 doFilterLoop :: FilterEFL -> noAcc -> ImperativeLoop noAcc
371 doFilterLoop f noAcc src dest len = loop 0 0
372 where STRICT2(loop)
373 loop src_off dest_off
374 | src_off >= len = return (noAcc :*: 0 :*: dest_off)
375 | otherwise = do
376 x <- peekByteOff src src_off
377 if f x
378 then pokeByteOff dest dest_off x
379 >> loop (src_off+1) (dest_off+1)
380 else loop (src_off+1) dest_off
381
382 -- run two loops in sequence,
383 -- think of it as: loop1 >> loop2
384 sequenceLoops :: ImperativeLoop acc1
385 -> ImperativeLoop acc2
386 -> ImperativeLoop (PairS acc1 acc2)
387 sequenceLoops loop1 loop2 src dest len0 = do
388 (acc1 :*: off1 :*: len1) <- loop1 src dest len0
389 (acc2 :*: off2 :*: len2) <-
390 let src' = dest `plusPtr` off1
391 dest' = src' -- note that we are using dest == src
392 -- for the second loop as we are
393 -- mutating the dest array in-place!
394 in loop2 src' dest' len1
395 return ((acc1 :*: acc2) :*: off1 + off2 :*: len2)
396
397 -- TODO: prove that this is associative! (I think it is)
398 -- since we can't be sure how the RULES will combine loops.
399
400 #if defined(__GLASGOW_HASKELL__)
401
402 {-# INLINE [1] doUpLoop #-}
403 {-# INLINE [1] doDownLoop #-}
404 {-# INLINE [1] doNoAccLoop #-}
405 {-# INLINE [1] doMapLoop #-}
406 {-# INLINE [1] doFilterLoop #-}
407
408 {-# INLINE [1] loopWrapper #-}
409 {-# INLINE [1] sequenceLoops #-}
410
411 {-# INLINE [1] fuseAccAccEFL #-}
412 {-# INLINE [1] fuseAccNoAccEFL #-}
413 {-# INLINE [1] fuseNoAccAccEFL #-}
414 {-# INLINE [1] fuseNoAccNoAccEFL #-}
415 {-# INLINE [1] fuseMapAccEFL #-}
416 {-# INLINE [1] fuseAccMapEFL #-}
417 {-# INLINE [1] fuseMapNoAccEFL #-}
418 {-# INLINE [1] fuseNoAccMapEFL #-}
419 {-# INLINE [1] fuseMapMapEFL #-}
420 {-# INLINE [1] fuseAccFilterEFL #-}
421 {-# INLINE [1] fuseFilterAccEFL #-}
422 {-# INLINE [1] fuseNoAccFilterEFL #-}
423 {-# INLINE [1] fuseFilterNoAccEFL #-}
424 {-# INLINE [1] fuseFilterFilterEFL #-}
425 {-# INLINE [1] fuseMapFilterEFL #-}
426 {-# INLINE [1] fuseFilterMapEFL #-}
427
428 #endif
429
430 {-# RULES
431
432 "FPS loopArr/loopSndAcc" forall x.
433 loopArr (loopSndAcc x) = loopArr x
434
435 "FPS seq/NoAcc" forall (u::NoAcc) e.
436 u `seq` e = e
437
438 "FPS loop/loop wrapper elimination" forall loop1 loop2 arr.
439 loopWrapper loop2 (loopArr (loopWrapper loop1 arr)) =
440 loopSndAcc (loopWrapper (sequenceLoops loop1 loop2) arr)
441
442 --
443 -- n.b in the following, when reading n/m fusion, recall sequenceLoops
444 -- is monadic, so its really n >> m fusion (i.e. m.n), not n . m fusion.
445 --
446
447 "FPS up/up loop fusion" forall f1 f2 acc1 acc2.
448 sequenceLoops (doUpLoop f1 acc1) (doUpLoop f2 acc2) =
449 doUpLoop (f1 `fuseAccAccEFL` f2) (acc1 :*: acc2)
450
451 "FPS map/map loop fusion" forall f1 f2 acc1 acc2.
452 sequenceLoops (doMapLoop f1 acc1) (doMapLoop f2 acc2) =
453 doMapLoop (f1 `fuseMapMapEFL` f2) (acc1 :*: acc2)
454
455 "FPS filter/filter loop fusion" forall f1 f2 acc1 acc2.
456 sequenceLoops (doFilterLoop f1 acc1) (doFilterLoop f2 acc2) =
457 doFilterLoop (f1 `fuseFilterFilterEFL` f2) (acc1 :*: acc2)
458
459 "FPS map/filter loop fusion" forall f1 f2 acc1 acc2.
460 sequenceLoops (doMapLoop f1 acc1) (doFilterLoop f2 acc2) =
461 doNoAccLoop (f1 `fuseMapFilterEFL` f2) (acc1 :*: acc2)
462
463 "FPS filter/map loop fusion" forall f1 f2 acc1 acc2.
464 sequenceLoops (doFilterLoop f1 acc1) (doMapLoop f2 acc2) =
465 doNoAccLoop (f1 `fuseFilterMapEFL` f2) (acc1 :*: acc2)
466
467 "FPS map/up loop fusion" forall f1 f2 acc1 acc2.
468 sequenceLoops (doMapLoop f1 acc1) (doUpLoop f2 acc2) =
469 doUpLoop (f1 `fuseMapAccEFL` f2) (acc1 :*: acc2)
470
471 "FPS up/map loop fusion" forall f1 f2 acc1 acc2.
472 sequenceLoops (doUpLoop f1 acc1) (doMapLoop f2 acc2) =
473 doUpLoop (f1 `fuseAccMapEFL` f2) (acc1 :*: acc2)
474
475 "FPS filter/up loop fusion" forall f1 f2 acc1 acc2.
476 sequenceLoops (doFilterLoop f1 acc1) (doUpLoop f2 acc2) =
477 doUpLoop (f1 `fuseFilterAccEFL` f2) (acc1 :*: acc2)
478
479 "FPS up/filter loop fusion" forall f1 f2 acc1 acc2.
480 sequenceLoops (doUpLoop f1 acc1) (doFilterLoop f2 acc2) =
481 doUpLoop (f1 `fuseAccFilterEFL` f2) (acc1 :*: acc2)
482
483 "FPS down/down loop fusion" forall f1 f2 acc1 acc2.
484 sequenceLoops (doDownLoop f1 acc1) (doDownLoop f2 acc2) =
485 doDownLoop (f1 `fuseAccAccEFL` f2) (acc1 :*: acc2)
486
487 "FPS map/down fusion" forall f1 f2 acc1 acc2.
488 sequenceLoops (doMapLoop f1 acc1) (doDownLoop f2 acc2) =
489 doDownLoop (f1 `fuseMapAccEFL` f2) (acc1 :*: acc2)
490
491 "FPS down/map loop fusion" forall f1 f2 acc1 acc2.
492 sequenceLoops (doDownLoop f1 acc1) (doMapLoop f2 acc2) =
493 doDownLoop (f1 `fuseAccMapEFL` f2) (acc1 :*: acc2)
494
495 "FPS filter/down fusion" forall f1 f2 acc1 acc2.
496 sequenceLoops (doFilterLoop f1 acc1) (doDownLoop f2 acc2) =
497 doDownLoop (f1 `fuseFilterAccEFL` f2) (acc1 :*: acc2)
498
499 "FPS down/filter loop fusion" forall f1 f2 acc1 acc2.
500 sequenceLoops (doDownLoop f1 acc1) (doFilterLoop f2 acc2) =
501 doDownLoop (f1 `fuseAccFilterEFL` f2) (acc1 :*: acc2)
502
503 "FPS noAcc/noAcc loop fusion" forall f1 f2 acc1 acc2.
504 sequenceLoops (doNoAccLoop f1 acc1) (doNoAccLoop f2 acc2) =
505 doNoAccLoop (f1 `fuseNoAccNoAccEFL` f2) (acc1 :*: acc2)
506
507 "FPS noAcc/up loop fusion" forall f1 f2 acc1 acc2.
508 sequenceLoops (doNoAccLoop f1 acc1) (doUpLoop f2 acc2) =
509 doUpLoop (f1 `fuseNoAccAccEFL` f2) (acc1 :*: acc2)
510
511 "FPS up/noAcc loop fusion" forall f1 f2 acc1 acc2.
512 sequenceLoops (doUpLoop f1 acc1) (doNoAccLoop f2 acc2) =
513 doUpLoop (f1 `fuseAccNoAccEFL` f2) (acc1 :*: acc2)
514
515 "FPS map/noAcc loop fusion" forall f1 f2 acc1 acc2.
516 sequenceLoops (doMapLoop f1 acc1) (doNoAccLoop f2 acc2) =
517 doNoAccLoop (f1 `fuseMapNoAccEFL` f2) (acc1 :*: acc2)
518
519 "FPS noAcc/map loop fusion" forall f1 f2 acc1 acc2.
520 sequenceLoops (doNoAccLoop f1 acc1) (doMapLoop f2 acc2) =
521 doNoAccLoop (f1 `fuseNoAccMapEFL` f2) (acc1 :*: acc2)
522
523 "FPS filter/noAcc loop fusion" forall f1 f2 acc1 acc2.
524 sequenceLoops (doFilterLoop f1 acc1) (doNoAccLoop f2 acc2) =
525 doNoAccLoop (f1 `fuseFilterNoAccEFL` f2) (acc1 :*: acc2)
526
527 "FPS noAcc/filter loop fusion" forall f1 f2 acc1 acc2.
528 sequenceLoops (doNoAccLoop f1 acc1) (doFilterLoop f2 acc2) =
529 doNoAccLoop (f1 `fuseNoAccFilterEFL` f2) (acc1 :*: acc2)
530
531 "FPS noAcc/down loop fusion" forall f1 f2 acc1 acc2.
532 sequenceLoops (doNoAccLoop f1 acc1) (doDownLoop f2 acc2) =
533 doDownLoop (f1 `fuseNoAccAccEFL` f2) (acc1 :*: acc2)
534
535 "FPS down/noAcc loop fusion" forall f1 f2 acc1 acc2.
536 sequenceLoops (doDownLoop f1 acc1) (doNoAccLoop f2 acc2) =
537 doDownLoop (f1 `fuseAccNoAccEFL` f2) (acc1 :*: acc2)
538
539 #-}
540
541 {-
542
543 up = up loop
544 down = down loop
545 map = map special case
546 filter = filter special case
547 noAcc = noAcc undirectional loop (unused)
548
549 heirarchy:
550 up down
551 ^ ^
552 \ /
553 noAcc
554 ^ ^
555 / \
556 map filter
557
558 each is a special case of the things above
559
560 so we get rules that combine things on the same level
561 and rules that combine things on different levels
562 to get something on the higher level
563
564 so all the cases:
565 up/up --> up fuseAccAccEFL
566 down/down --> down fuseAccAccEFL
567 noAcc/noAcc --> noAcc fuseNoAccNoAccEFL
568
569 noAcc/up --> up fuseNoAccAccEFL
570 up/noAcc --> up fuseAccNoAccEFL
571 noAcc/down --> down fuseNoAccAccEFL
572 down/noAcc --> down fuseAccNoAccEFL
573
574 and if we do the map, filter special cases then it adds a load more:
575
576 map/map --> map fuseMapMapEFL
577 filter/filter --> filter fuseFilterFilterEFL
578
579 map/filter --> noAcc fuseMapFilterEFL
580 filter/map --> noAcc fuseFilterMapEFL
581
582 map/noAcc --> noAcc fuseMapNoAccEFL
583 noAcc/map --> noAcc fuseNoAccMapEFL
584
585 map/up --> up fuseMapAccEFL
586 up/map --> up fuseAccMapEFL
587
588 map/down --> down fuseMapAccEFL
589 down/map --> down fuseAccMapEFL
590
591 filter/noAcc --> noAcc fuseNoAccFilterEFL
592 noAcc/filter --> noAcc fuseFilterNoAccEFL
593
594 filter/up --> up fuseFilterAccEFL
595 up/filter --> up fuseAccFilterEFL
596
597 filter/down --> down fuseFilterAccEFL
598 down/filter --> down fuseAccFilterEFL
599 -}
600
601 fuseAccAccEFL :: AccEFL acc1 -> AccEFL acc2 -> AccEFL (PairS acc1 acc2)
602 fuseAccAccEFL f g (acc1 :*: acc2) e1 =
603 case f acc1 e1 of
604 acc1' :*: NothingS -> (acc1' :*: acc2) :*: NothingS
605 acc1' :*: JustS e2 ->
606 case g acc2 e2 of
607 acc2' :*: res -> (acc1' :*: acc2') :*: res
608
609 fuseAccNoAccEFL :: AccEFL acc -> NoAccEFL -> AccEFL (PairS acc noAcc)
610 fuseAccNoAccEFL f g (acc :*: noAcc) e1 =
611 case f acc e1 of
612 acc' :*: NothingS -> (acc' :*: noAcc) :*: NothingS
613 acc' :*: JustS e2 -> (acc' :*: noAcc) :*: g e2
614
615 fuseNoAccAccEFL :: NoAccEFL -> AccEFL acc -> AccEFL (PairS noAcc acc)
616 fuseNoAccAccEFL f g (noAcc :*: acc) e1 =
617 case f e1 of
618 NothingS -> (noAcc :*: acc) :*: NothingS
619 JustS e2 ->
620 case g acc e2 of
621 acc' :*: res -> (noAcc :*: acc') :*: res
622
623 fuseNoAccNoAccEFL :: NoAccEFL -> NoAccEFL -> NoAccEFL
624 fuseNoAccNoAccEFL f g e1 =
625 case f e1 of
626 NothingS -> NothingS
627 JustS e2 -> g e2
628
629 fuseMapAccEFL :: MapEFL -> AccEFL acc -> AccEFL (PairS noAcc acc)
630 fuseMapAccEFL f g (noAcc :*: acc) e1 =
631 case g acc (f e1) of
632 (acc' :*: res) -> (noAcc :*: acc') :*: res
633
634 fuseAccMapEFL :: AccEFL acc -> MapEFL -> AccEFL (PairS acc noAcc)
635 fuseAccMapEFL f g (acc :*: noAcc) e1 =
636 case f acc e1 of
637 (acc' :*: NothingS) -> (acc' :*: noAcc) :*: NothingS
638 (acc' :*: JustS e2) -> (acc' :*: noAcc) :*: JustS (g e2)
639
640 fuseMapMapEFL :: MapEFL -> MapEFL -> MapEFL
641 fuseMapMapEFL f g e1 = g (f e1) -- n.b. perfect fusion
642
643 fuseMapNoAccEFL :: MapEFL -> NoAccEFL -> NoAccEFL
644 fuseMapNoAccEFL f g e1 = g (f e1)
645
646 fuseNoAccMapEFL :: NoAccEFL -> MapEFL -> NoAccEFL
647 fuseNoAccMapEFL f g e1 =
648 case f e1 of
649 NothingS -> NothingS
650 JustS e2 -> JustS (g e2)
651
652 fuseAccFilterEFL :: AccEFL acc -> FilterEFL -> AccEFL (PairS acc noAcc)
653 fuseAccFilterEFL f g (acc :*: noAcc) e1 =
654 case f acc e1 of
655 acc' :*: NothingS -> (acc' :*: noAcc) :*: NothingS
656 acc' :*: JustS e2 ->
657 case g e2 of
658 False -> (acc' :*: noAcc) :*: NothingS
659 True -> (acc' :*: noAcc) :*: JustS e2
660
661 fuseFilterAccEFL :: FilterEFL -> AccEFL acc -> AccEFL (PairS noAcc acc)
662 fuseFilterAccEFL f g (noAcc :*: acc) e1 =
663 case f e1 of
664 False -> (noAcc :*: acc) :*: NothingS
665 True ->
666 case g acc e1 of
667 acc' :*: res -> (noAcc :*: acc') :*: res
668
669 fuseNoAccFilterEFL :: NoAccEFL -> FilterEFL -> NoAccEFL
670 fuseNoAccFilterEFL f g e1 =
671 case f e1 of
672 NothingS -> NothingS
673 JustS e2 ->
674 case g e2 of
675 False -> NothingS
676 True -> JustS e2
677
678 fuseFilterNoAccEFL :: FilterEFL -> NoAccEFL -> NoAccEFL
679 fuseFilterNoAccEFL f g e1 =
680 case f e1 of
681 False -> NothingS
682 True -> g e1
683
684 fuseFilterFilterEFL :: FilterEFL -> FilterEFL -> FilterEFL
685 fuseFilterFilterEFL f g e1 = f e1 && g e1
686
687 fuseMapFilterEFL :: MapEFL -> FilterEFL -> NoAccEFL
688 fuseMapFilterEFL f g e1 =
689 case f e1 of
690 e2 -> case g e2 of
691 False -> NothingS
692 True -> JustS e2
693
694 fuseFilterMapEFL :: FilterEFL -> MapEFL -> NoAccEFL
695 fuseFilterMapEFL f g e1 =
696 case f e1 of
697 False -> NothingS
698 True -> JustS (g e1)
699