Generate straightline code for inline array allocation
authorMichal Terepeta <michal.terepeta@gmail.com>
Sat, 30 Mar 2019 17:36:36 +0000 (18:36 +0100)
committerMarge Bot <ben+marge-bot@smart-cactus.org>
Mon, 8 Apr 2019 18:29:34 +0000 (14:29 -0400)
GHC has an optimization for allocating arrays when the size is
statically known -- it'll generate the code allocating and initializing
the array inline (instead of a call to a procedure from
`rts/PrimOps.cmm`).

However, the generated code uses a loop to do the initialization. Since
we already check that the requested size is small (we check against
`maxInlineAllocSize`), we can generate faster straightline code instead.
This brings about 15% improvement for `newSmallArray#` in my testing and
slightly simplifies the code in GHC.

Signed-off-by: Michal Terepeta <michal.terepeta@gmail.com>
compiler/codeGen/StgCmmPrim.hs
testsuite/tests/codeGen/should_run/NewSmallArray.hs [new file with mode: 0644]
testsuite/tests/codeGen/should_run/NewSmallArray.stdout [new file with mode: 0644]
testsuite/tests/codeGen/should_run/all.T

index ae73f0a..4a07c78 100644 (file)
@@ -2105,17 +2105,11 @@ doNewArrayOp res_r rep info payload n init = do
 
     -- Initialise all elements of the array
     p <- assignTemp $ cmmOffsetB dflags (CmmReg arr) (hdrSize dflags rep)
-    for <- newBlockId
-    emitLabel for
-    let loopBody =
-            [ mkStore (CmmReg (CmmLocal p)) init
-            , mkAssign (CmmLocal p) (cmmOffsetW dflags (CmmReg (CmmLocal p)) 1)
-            , mkBranch for ]
-    emit =<< mkCmmIfThen
-        (cmmULtWord dflags (CmmReg (CmmLocal p))
-         (cmmOffsetW dflags (CmmReg arr)
-          (hdrSizeW dflags rep + n)))
-        (catAGraphs loopBody)
+    let initialization =
+            [ mkStore (cmmOffsetW dflags (CmmReg (CmmLocal p)) off) init
+            | off <- [0.. n - 1]
+            ]
+    emit (catAGraphs initialization)
 
     emit $ mkAssign (CmmLocal res_r) (CmmReg arr)
 
diff --git a/testsuite/tests/codeGen/should_run/NewSmallArray.hs b/testsuite/tests/codeGen/should_run/NewSmallArray.hs
new file mode 100644 (file)
index 0000000..6ee92ad
--- /dev/null
@@ -0,0 +1,96 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE UnboxedTuples #-}
+
+-- Tests for creating and initializing a @SmallArray#@ including the
+-- optimiziation where GHC inlines the code instead of calling the
+-- @newSmallArray#@ primop if the length is small enough and known at compile
+-- time.
+module Main where
+
+import GHC.Exts
+import GHC.ST
+
+import Control.Monad (forM_)
+
+
+main :: IO ()
+main = do
+    let !a00 = newSmallArrayWith42 0
+        !a01 = newSmallArrayWith42 1
+        !a02 = newSmallArrayWith42 2
+        !a03 = newSmallArrayWith42 3
+        !a04 = newSmallArrayWith42 4
+        !a05 = newSmallArrayWith42 5
+        !a06 = newSmallArrayWith42 6
+        !a07 = newSmallArrayWith42 7
+        !a08 = newSmallArrayWith42 8
+        !a09 = newSmallArrayWith42 9
+        !a10 = newSmallArrayWith42 10
+        !a11 = newSmallArrayWith42 11
+        !a12 = newSmallArrayWith42 12
+        !a13 = newSmallArrayWith42 13
+        !a14 = newSmallArrayWith42 14
+        !a15 = newSmallArrayWith42 15
+        !a16 = newSmallArrayWith42 16
+        !a17 = newSmallArrayWith42 17
+        !a18 = newSmallArrayWith42 18
+        !a19 = newSmallArrayWith42 19
+        !a20 = newSmallArrayWith42 20
+        !a21 = newSmallArrayWith42 21
+        !a22 = newSmallArrayWith42 22
+        !a23 = newSmallArrayWith42 23
+        !a24 = newSmallArrayWith42 24
+        !a25 = newSmallArrayWith42 25
+        !a26 = newSmallArrayWith42 26
+        !a27 = newSmallArrayWith42 27
+        !a28 = newSmallArrayWith42 28
+        !a29 = newSmallArrayWith42 29
+        !a30 = newSmallArrayWith42 30
+        !a31 = newSmallArrayWith42 31
+        !a32 = newSmallArrayWith42 32
+        !a33 = newSmallArrayWith42 33
+        !a34 = newSmallArrayWith42 34
+        !a35 = newSmallArrayWith42 35
+        !a36 = newSmallArrayWith42 36
+        !a37 = newSmallArrayWith42 37
+        !a38 = newSmallArrayWith42 38
+        !a39 = newSmallArrayWith42 39
+        !all = [ a00, a01, a02, a03, a04, a05, a06, a07, a08, a09
+               , a10, a11, a12, a13, a14, a15, a16, a17, a18, a19
+               , a20, a21, a22, a23, a24, a25, a26, a27, a28, a29
+               , a30, a31, a32, a33, a34, a35, a36, a37, a38, a39
+               ]
+    forM_ all (print . toListArray)
+
+
+data Array a = Array { unArray :: SmallArray# a }
+
+newSmallArrayWith42 :: Int -> Array Int
+newSmallArrayWith42 n = (runST (newArray n 42))
+-- inline to make sure the length is known at compile time
+{-# INLINE newSmallArrayWith42 #-}
+
+newArray :: Int -> a -> ST s (Array a)
+newArray (I# n#) a = ST $ \s1# -> case newSmallArray# n# a s1# of
+    (# s2#, marr# #) -> case unsafeFreezeSmallArray# marr# s2# of
+        (# s3#, arr# #) -> (# s3#, Array arr# #)
+-- inline to make sure the length is known at compile time
+{-# INLINE newArray #-}
+
+toListArray :: Array a -> [a]
+toListArray arr = go 0
+  where
+    go i | i >= lengthArray arr = []
+         | otherwise = indexArray arr i : go (i+1)
+
+indexArray :: Array a -> Int -> a
+indexArray arr i@(I# i#)
+  | i < 0 || i >= len =
+      error $ "bounds error, offset " ++ show i ++ ", length " ++ show len
+  | otherwise = case indexSmallArray# (unArray arr) i# of
+      (# a #) -> a
+  where len = lengthArray arr
+
+lengthArray :: Array a -> Int
+lengthArray arr = I# (sizeofSmallArray# (unArray arr))
diff --git a/testsuite/tests/codeGen/should_run/NewSmallArray.stdout b/testsuite/tests/codeGen/should_run/NewSmallArray.stdout
new file mode 100644 (file)
index 0000000..8833ccc
--- /dev/null
@@ -0,0 +1,40 @@
+[]
+[42]
+[42,42]
+[42,42,42]
+[42,42,42,42]
+[42,42,42,42,42]
+[42,42,42,42,42,42]
+[42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
+[42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
index c6a6b27..189fb72 100644 (file)
@@ -137,6 +137,7 @@ test('StaticByteArraySize', normal, compile_and_run, ['-O2'])
 test('CopySmallArray', normal, compile_and_run, [''])
 test('CopySmallArrayStressTest', reqlib('random'), compile_and_run, [''])
 test('SizeOfSmallArray', normal, compile_and_run, [''])
+test('NewSmallArray', normal, compile_and_run, [''])
 test('T9001', normal, compile_and_run, [''])
 test('T9013', omit_ways(['ghci']),  # ghci doesn't support unboxed tuples
      compile_and_run, [''])