344442f3fb7bd50a53f5420b7ce529ca2ea6a530
[ghc.git] / testsuite / tests / dph / words / WordsVect.hs
1
2 -- Break up a string into words in parallel.
3 -- Based on the presentation "Breaking Sequential Habits of Thought", Guy Steele.
4 -- http://groups.csail.mit.edu/mac/users/gjs/6.945/readings/MITApril2009Steele.pdf
5 --
6 -- NOTE: This is a naive implementation, and I haven't benchmarked it.
7 -- Using parallel arrays in Seg probably isn't helpful for performance,
8 -- but it's a stress test for the vectoriser.
9 --
10 -- If we actually cared about performance we wouldn't want to recursively
11 -- subdivide the string right down to individual characters.
12 --
13 {-# LANGUAGE ParallelArrays, ParallelListComp #-}
14 {-# OPTIONS -fvectorise #-}
15
16 module WordsVect
17 ( wordsOfPArray
18 , wordCountOfPArray )
19 where
20 import qualified Data.Array.Parallel.Prelude.Word8 as W
21 import Data.Array.Parallel.Prelude.Word8 (Word8)
22 import Data.Array.Parallel.Prelude.Int as I
23 import Data.Array.Parallel
24
25 import qualified Prelude as Prel
26
27
28 -- We can't use the Prelude Char and String types in vectorised code yet..
29 type Char = Word8
30 char_space = W.fromInt 32
31
32 type String = [: Char :]
33
34
35 -- | Word state
36 data State
37 = Chunk String
38 | Seg String -- initial word chunk
39 [:String:] -- complete words in the middle of the segment
40 String -- final word chunk
41
42
43 -- | Compose two wordstates.
44 plusState :: State -> State -> State
45 plusState str1 str2
46 = case (str1, str2) of
47 (Chunk as, Chunk bs) -> Chunk (as +:+ bs)
48 (Chunk as, Seg bl bss br) -> Seg (as +:+ bl) bss br
49 (Seg al ass ar, Chunk bs) -> Seg al ass (ar +:+ bs)
50 (Seg al ass ar, Seg bl bss br) -> Seg al (ass +:+ joinEmpty [:ar +:+ bl:] +:+ bss) br
51
52 joinEmpty :: [:[:Word8:]:] -> [:[:Word8:]:]
53 joinEmpty ws
54 | lengthP ws I.== 1 && lengthP (ws !: 0) I.== 0 = [::]
55 | otherwise = ws
56
57
58 -- | Convert a single char to a wordstate.
59 stateOfChar :: Char -> State
60 stateOfChar c
61 | c W.== char_space = Seg [::] [::] [::]
62 | otherwise = Chunk [:c:]
63
64
65 -- | Break this string into words.
66 stateOfString :: String -> State
67 stateOfString str
68 = let len = lengthP str
69 result
70 | len I.== 0 = Chunk [::]
71 | len I.== 1 = stateOfChar (str !: 0)
72 | otherwise
73 = let half = len `div` 2
74 s1 = sliceP 0 half str
75 s2 = sliceP half (len I.- half) str
76 in plusState (stateOfString s1) (stateOfString s2)
77 in result
78
79
80 -- | Count the number of words in a string.
81 countWordsOfState :: State -> Int
82 countWordsOfState state
83 = case state of
84 Chunk c -> wordsInChunkArr c
85 Seg c1 ws c2 -> wordsInChunkArr c1 I.+ lengthP ws I.+ wordsInChunkArr c2
86
87 wordsInChunkArr :: [:Word8:] -> Int
88 wordsInChunkArr arr
89 | lengthP arr I.== 0 = 0
90 | otherwise = 1
91
92
93 -- | Flatten a state back to an array of Word8s,
94 -- inserting spaces between the words.
95 flattenState :: State -> [:Word8:]
96 flattenState ss
97 = case ss of
98 Chunk s -> s
99
100 Seg w1 ws w2
101 -> w1
102 +:+ [:char_space:]
103 +:+ concatP [: w +:+ [:char_space:] | w <- ws :]
104 +:+ w2
105
106 -- Interface ------------------------------------------------------------------
107
108 -- | Break up an array of chars into words then flatten it back.
109 wordsOfPArray :: PArray Word8 -> PArray Word8
110 {-# NOINLINE wordsOfPArray #-}
111 wordsOfPArray arr
112 = let str = fromPArrayP arr
113 state = stateOfString str
114 strOut = flattenState state
115 in toPArrayP strOut
116
117
118 -- | Count the number of words in an array
119 wordCountOfPArray :: PArray Word8 -> Int
120 {-# NOINLINE wordCountOfPArray #-}
121 wordCountOfPArray arr
122 = let str = fromPArrayP arr
123 state = stateOfString str
124 in countWordsOfState state
125