Add the k-nucleotide shootout benchmark
[nofib.git] / shootout / k-nucleotide / Main.hs
1 --
2 -- The Computer Language Benchmarks Game
3 -- http://benchmarksgame.alioth.debian.org/
4 --
5 -- contributed by Stephen Blackheath (with some bits taken from Don Stewart's
6 -- version), v1.2
7
8 import Text.Printf
9 import Data.ByteString.Internal
10 import qualified Data.ByteString.Char8 as S
11 import Control.Applicative
12 import Control.Monad
13 import Control.Concurrent
14 import Foreign.Storable
15 import Foreign.Marshal.Alloc
16 import Foreign.Marshal.Array
17 import Foreign.Ptr
18 import Foreign.ForeignPtr
19 import Data.Word
20 import Data.Bits
21 import Data.Char
22 import Data.List
23 import Data.Maybe
24 import Data.IORef
25 import GHC.Exts
26
27
28 main = do
29 genome <- extract (S.pack ">TH")
30 let actions = [
31 do
32 a <- printFreqsBySize genome 1
33 b <- printFreqsBySize genome 2
34 return $ a ++ b
35 ] ++ map (printFreqsSpecific genome) specificSeqs
36 output <- concat <$> parallel actions
37 forM_ output putStrLn
38
39 -- Drop in replacement for sequence
40 parallel :: [IO a] -> IO [a]
41 parallel actions = do
42 vars <- forM actions $ \action -> do
43 var <- newEmptyMVar
44 forkIO $ do
45 answer <- action
46 putMVar var answer
47 return var
48 forM vars takeMVar
49
50 specificSeqs = map S.pack [
51 "GGT","GGTA","GGTATT","GGTATTTTAATT","GGTATTTTAATTTATAGT"]
52
53 extract p = do
54 s <- S.getContents
55 let (_, rem) = S.breakSubstring p s
56 return $! S.map toUpper -- array fusion!
57 . S.filter ((/=) '\n')
58 . S.dropWhile ((/=) '\n') $ rem
59
60 printFreqsBySize :: S.ByteString -> Int -> IO [String]
61 printFreqsBySize genome keySize = do
62 ht0 <- htNew keySize
63 ht <- hashGenome genome keySize ht0
64 l <- htToList ht
65 htFree ht
66 return $ map draw (sortBy sortRule l) ++ [""]
67 where
68 genomeLen = S.length genome
69 draw :: (S.ByteString, Int) -> String
70 draw (key, count) = printf "%s %.3f" (S.unpack key) pct
71 where pct = (100 * (fromIntegral count) / total) :: Double
72 total = fromIntegral (genomeLen - keySize + 1)
73
74 printFreqsSpecific :: S.ByteString -> S.ByteString -> IO [String]
75 printFreqsSpecific genome seq = do
76 let keySize = S.length seq
77 genomeLen = S.length genome
78 ht0 <- htNew keySize
79 ht <- hashGenome genome keySize ht0
80 let (fp, offset, len) = toForeignPtr seq
81 count <- withForeignPtr fp $ \p_ -> do
82 htGet ht (p_ `plusPtr` offset)
83 htFree ht
84 return [show count ++ ('\t' : S.unpack seq)]
85
86 hashGenome :: S.ByteString
87 -> Int
88 -> Hashtable
89 -> IO Hashtable
90 {-# INLINE hashGenome #-}
91 hashGenome genome keySize ht = do
92 let (fp, offset, len) = toForeignPtr genome
93 withForeignPtr fp $ \p_ -> do
94 let p = p_ `plusPtr` offset
95 loop ht idx = do
96 let key = p `plusPtr` idx
97 htInc ht key
98 endIdx = len - keySize + 1
99 foldMe i ht | i == endIdx = return ht
100 foldMe i ht = loop ht i >>= foldMe (i+1)
101 foldMe 0 ht
102
103 sortRule :: (S.ByteString, Int) -> (S.ByteString, Int) -> Ordering
104 sortRule (a1, b1) (a2, b2) =
105 case compare b2 b1 of
106 EQ -> compare a1 a2
107 x -> x
108
109 ------ Hash table implementation ----------------------------------------------
110
111 -- Note: Hash tables are not generally used in functional languages, so there
112 -- are no available library implementations for Haskell. This benchmark
113 -- requires a hash table. This is why I have implemented the hash table here.
114
115 htNew :: Int -> IO Hashtable
116 htNew = htNew' (head primes)
117
118 htNew' :: Int -> Int -> IO Hashtable
119 htNew' slots ksz = do
120 let ssz = spineSize ksz slots
121 sp <- mallocBytes ssz
122 memset sp 0 (fromIntegral ssz)
123 return $ Hashtable {
124 keySize = ksz,
125 noOfSlots = slots,
126 spine = sp
127 }
128
129 primes = [ 1543, 3079, 6151, 12289, 24593,
130 49157, 98317, 196613, 93241, 786433,
131 1572869, 3145739, 6291469, 12582917, 25165843,
132 50331653, 100663319, 201326611, 402653189, 805306457 ]
133
134 htFree :: Hashtable -> IO ()
135 htFree ht = do
136 htTraverse ht $ \isSpine (Entry ePtr) -> when (not isSpine) $ free ePtr
137 free (spine ht)
138
139 htGet :: Hashtable -> Ptr Word8 -> IO Int
140 htGet ht key = do
141 hash <- calcHash ht key
142 htPayload ht hash key >>= peek
143
144 htInc :: Hashtable -> Ptr Word8 -> IO Hashtable
145 {-# INLINE htInc #-}
146 htInc !ht !key = do
147 hash <- calcHash ht key
148 pPayload <- htPayload ht hash key
149 value <- peek pPayload
150 poke pPayload (value+1)
151 if value == 0 && (hash .&. 0x7f) == 0
152 then checkGrow ht
153 else return ht
154
155 htPut_ :: Hashtable -> Ptr Word8 -> Int -> IO ()
156 {-# INLINE htPut_ #-}
157 htPut_ !ht !key !value = do
158 hash <- calcHash ht key
159 pPayload <- htPayload ht hash key
160 poke pPayload value
161
162 checkGrow :: Hashtable -> IO Hashtable
163 checkGrow ht = do
164 let pTotal = totalEntriesOf ht
165 slots = noOfSlots ht
166 total <- (0x200+) <$> peek pTotal
167 poke pTotal total
168 if total >= slots
169 then do
170 let newSlots = head $ dropWhile (<= slots*2) primes
171 ht' <- htNew' newSlots (keySize ht)
172 htTraverse ht $ \_ -> reinsert ht' -- re-insert all the elts
173 htFree ht
174 poke (totalEntriesOf ht') total -- copy the total entry count
175 return ht'
176 else return ht
177 where
178 reinsert :: Hashtable -> Entry -> IO ()
179 reinsert ht entry = do
180 value <- peek (payloadOf entry)
181 htPut_ ht (keyOf entry) value
182
183 htToList :: Hashtable -> IO [(S.ByteString, Int)]
184 htToList ht =
185 htMap (\entry -> do
186 keyStr <- keyString ht (keyOf entry)
187 payload <- peek (payloadOf entry)
188 return (keyStr, payload)) ht
189
190 htMap :: (Entry -> IO a) -> Hashtable -> IO [a]
191 htMap f ht = mapM f =<< htEntries ht
192
193 keyString :: Hashtable -> Ptr Word8 -> IO S.ByteString
194 keyString ht key = S.pack . map w2c <$> peekArray (keySize ht) key
195
196 isEmptySlot :: Entry -> IO Bool
197 {-# INLINE isEmptySlot #-}
198 isEmptySlot entry = do
199 ch <- peek $ keyOf entry
200 return $ ch == 0
201
202 htEntries :: Hashtable -> IO [Entry]
203 htEntries ht = do
204 es <- newIORef []
205 htTraverse ht $ \_ entry -> modifyIORef es $ \l -> entry:l
206 readIORef es
207
208 htTraverse :: Hashtable -> (Bool -> Entry -> IO ()) -> IO ()
209 htTraverse ht f = he 0
210 where
211 slots = noOfSlots ht
212 he i | i == slots = return ()
213 he i = do
214 let entry = indexEntry ht i
215 empty <- isEmptySlot entry
216 if empty
217 then he (i+1)
218 else links True i entry
219 links isSpine i entry = do
220 next <- peek $ nextPtrOf entry
221 f isSpine entry
222 if next == nullPtr
223 then he (i+1)
224 else links False i (Entry next)
225
226 data Hashtable = Hashtable {
227 keySize :: Int,
228 noOfSlots :: Int,
229 spine :: Ptr Word8
230 }
231
232 wordSize :: Int
233 wordSize = max (sizeOf (nullPtr :: Ptr Word8)) (sizeOf (0 :: Int))
234
235 -- Round up to word size
236 roundUp :: Int -> Int
237 {-# INLINE roundUp #-}
238 roundUp !i = (i + wordSize - 1) .&. complement (wordSize - 1)
239
240 slotSize :: Int -> Int
241 {-# INLINE slotSize #-}
242 slotSize !ksz = roundUp ksz + wordSize * 2
243
244 spineSize :: Int -> Int -> Int
245 spineSize ksz slots = slots * slotSize ksz + wordSize
246
247 calcHash :: Hashtable -> Ptr Word8 -> IO Int
248 {-# INLINE calcHash #-}
249 calcHash !ht !key = (`mod` noOfSlots ht) <$> ch 0 0
250 where
251 ksz = keySize ht
252 ch :: Int -> Int -> IO Int
253 ch !i !acc | i == ksz = return acc
254 ch !i !acc = do
255 c <- peek (key `plusPtr` i)
256 ch (i+1) (acc * 131 + fromIntegral (c::Word8))
257
258 newtype Entry = Entry (Ptr Word8)
259
260 -- Count of the total number of hash table entries
261 totalEntriesOf :: Hashtable -> Ptr Int
262 {-# INLINE totalEntriesOf #-}
263 totalEntriesOf ht = castPtr $ spine ht
264
265 indexEntry :: Hashtable -> Int -> Entry
266 {-# INLINE indexEntry #-}
267 indexEntry !ht !hash =
268 let hOffset = wordSize + hash * slotSize (keySize ht)
269 in Entry $ spine ht `plusPtr` hOffset
270
271 entryMatches :: Hashtable -> Entry -> Ptr Word8 -> IO Bool
272 {-# INLINE entryMatches #-}
273 entryMatches !ht !entry !key = do
274 let eKey = keyOf entry
275 c <- memcmp key eKey (fromIntegral $ keySize ht)
276 if c == 0
277 then return True
278 else do
279 empty <- isEmptySlot entry
280 if empty
281 then do
282 memcpy eKey key (fromIntegral $ keySize ht) -- ick
283 return True
284 else
285 return False
286
287 nextPtrOf :: Entry -> Ptr (Ptr Word8)
288 {-# INLINE nextPtrOf #-}
289 nextPtrOf !(Entry ePtr) = castPtr $ ePtr
290
291 payloadOf :: Entry -> Ptr Int
292 {-# INLINE payloadOf #-}
293 payloadOf !(Entry ePtr) = castPtr $ ePtr `plusPtr` wordSize
294
295 keyOf :: Entry -> Ptr Word8
296 {-# INLINE keyOf #-}
297 keyOf !(Entry ePtr) = ePtr `plusPtr` (wordSize*2)
298
299 allocEntry :: Hashtable -> Ptr Word8 -> IO Entry
300 allocEntry !ht !key = do
301 let esz = slotSize $ keySize ht
302 ePtr <- mallocBytes esz
303 memset ePtr 0 (fromIntegral esz)
304 let entry = Entry ePtr
305 memcpy (keyOf entry) key (fromIntegral $ keySize ht)
306 return entry
307
308 htPayload :: Hashtable -> Int -> Ptr Word8 -> IO (Ptr Int)
309 {-# INLINE htPayload #-}
310 htPayload !ht !hash !key = do
311 entry <- findEntry (indexEntry ht hash)
312 return $ payloadOf entry
313 where
314 findEntry :: Entry -> IO Entry
315 findEntry !entry = do
316 match <- entryMatches ht entry key
317 if match
318 then
319 return entry
320 else do
321 let pNext = nextPtrOf entry
322 next <- peek pNext
323 if next == nullPtr
324 then do
325 newEntry@(Entry ePtr) <- allocEntry ht key
326 poke pNext ePtr
327 return newEntry
328 else
329 findEntry (Entry next)