Improve accuracy of get/setAllocationCounter
authorBen Gamari <bgamari.foss@gmail.com>
Mon, 19 Mar 2018 16:02:43 +0000 (12:02 -0400)
committerBen Gamari <ben@smart-cactus.org>
Mon, 19 Mar 2018 16:05:12 +0000 (12:05 -0400)
Summary:
get/setAllocationCounter didn't take into account allocations in the
current block. This was known at the time, but it turns out to be
important to have more accuracy when using these in a fine-grained
way.

Test Plan:
New unit test to test incrementally larger allocaitons.  Before I got
results like this:

```
+0
+0
+0
+0
+0
+4096
+0
+0
+0
+0
+0
+4064
+0
+0
+4088
+4056
+0
+0
+0
+4088
+4096
+4056
+4096
```

Notice how the results aren't always monotonically increasing.  After
this patch:

```
+344
+416
+488
+560
+632
+704
+776
+848
+920
+992
+1064
+1136
+1208
+1280
+1352
+1424
+1496
+1568
+1640
+1712
+1784
+1856
+1928
+2000
+2072
+2144
```

Reviewers: hvr, erikd, simonmar, jrtc27, trommler

Reviewed By: simonmar

Subscribers: trommler, jrtc27, rwbarton, thomie, carter

Differential Revision: https://phabricator.haskell.org/D4363

12 files changed:
compiler/codeGen/StgCmmForeign.hs
compiler/prelude/primops.txt.pp
includes/Cmm.h
includes/rts/Threads.h
includes/stg/MiscClosures.h
libraries/base/GHC/Conc/Sync.hs
rts/PrimOps.cmm
rts/RtsSymbols.c
rts/Threads.c
testsuite/tests/rts/all.T
testsuite/tests/rts/alloccounter1.hs [new file with mode: 0644]
testsuite/tests/rts/alloccounter1.stdout [new file with mode: 0644]

index b518c07..c1103e7 100644 (file)
@@ -404,8 +404,8 @@ Opening the nursery corresponds to the following code:
 @
    tso = CurrentTSO;
    cn = CurrentNursery;
-   bdfree = CurrentNuresry->free;
-   bdstart = CurrentNuresry->start;
+   bdfree = CurrentNursery->free;
+   bdstart = CurrentNursery->start;
 
    // We *add* the currently occupied portion of the nursery block to
    // the allocation limit, because we will subtract it again in
index 038d350..e580f98 100644 (file)
@@ -2942,6 +2942,20 @@ primop  TraceMarkerOp "traceMarker#" GenPrimOp
    has_side_effects = True
    out_of_line      = True
 
+primop  GetThreadAllocationCounter "getThreadAllocationCounter#" GenPrimOp
+   State# RealWorld -> (# State# RealWorld, INT64 #)
+   { Retrieves the allocation counter for the current thread. }
+   with
+   has_side_effects = True
+   out_of_line      = True
+
+primop  SetThreadAllocationCounter "setThreadAllocationCounter#" GenPrimOp
+   INT64 -> State# RealWorld -> State# RealWorld
+   { Sets the allocation counter for the current thread to the given value. }
+   with
+   has_side_effects = True
+   out_of_line      = True
+
 ------------------------------------------------------------------------
 section "Safe coercions"
 ------------------------------------------------------------------------
index 57d78cc..18b2aaf 100644 (file)
 
 /* TO_W_(n) converts n to W_ type from a smaller type */
 #if SIZEOF_W == 4
+#define TO_I64(x) %sx64(x)
 #define TO_W_(x) %sx32(x)
 #define HALF_W_(x) %lobits16(x)
 #elif SIZEOF_W == 8
+#define TO_I64(x) (x)
 #define TO_W_(x) %sx64(x)
 #define HALF_W_(x) %lobits32(x)
 #endif
index fceacdc..f72f5ed 100644 (file)
@@ -43,8 +43,6 @@ StgRegTable * resumeThread  (void *);
 //
 int     cmp_thread                       (StgPtr tso1, StgPtr tso2);
 int     rts_getThreadId                  (StgPtr tso);
-HsInt64 rts_getThreadAllocationCounter   (StgPtr tso);
-void    rts_setThreadAllocationCounter   (StgPtr tso, HsInt64 i);
 void    rts_enableThreadAllocationLimit  (StgPtr tso);
 void    rts_disableThreadAllocationLimit (StgPtr tso);
 
index 76cfbd6..1fbfab9 100644 (file)
@@ -468,6 +468,9 @@ RTS_FUN_DECL(stg_traceCcszh);
 RTS_FUN_DECL(stg_clearCCSzh);
 RTS_FUN_DECL(stg_traceEventzh);
 RTS_FUN_DECL(stg_traceMarkerzh);
+RTS_FUN_DECL(stg_getThreadAllocationCounterzh);
+RTS_FUN_DECL(stg_setThreadAllocationCounterzh);
+
 
 /* Other misc stuff */
 // See wiki:Commentary/Compiler/Backends/PprC#Prototypes
index 517c20e..94601f3 100644 (file)
@@ -105,6 +105,7 @@ import Data.Maybe
 import GHC.Base
 import {-# SOURCE #-} GHC.IO.Handle ( hFlush )
 import {-# SOURCE #-} GHC.IO.Handle.FD ( stdout )
+import GHC.Int
 import GHC.IO
 import GHC.IO.Encoding.UTF8
 import GHC.IO.Exception
@@ -194,18 +195,16 @@ instance Ord ThreadId where
 --
 -- @since 4.8.0.0
 setAllocationCounter :: Int64 -> IO ()
-setAllocationCounter i = do
-  ThreadId t <- myThreadId
-  rts_setThreadAllocationCounter t i
+setAllocationCounter (I64# i) = IO $ \s ->
+  case setThreadAllocationCounter# i s of s' -> (# s', () #)
 
 -- | Return the current value of the allocation counter for the
 -- current thread.
 --
 -- @since 4.8.0.0
 getAllocationCounter :: IO Int64
-getAllocationCounter = do
-  ThreadId t <- myThreadId
-  rts_getThreadAllocationCounter t
+getAllocationCounter = IO $ \s ->
+  case getThreadAllocationCounter# s of (# s', ctr #) -> (# s', I64# ctr #)
 
 -- | Enables the allocation counter to be treated as a limit for the
 -- current thread.  When the allocation limit is enabled, if the
@@ -242,16 +241,6 @@ disableAllocationLimit = do
   ThreadId t <- myThreadId
   rts_disableThreadAllocationLimit t
 
--- We cannot do these operations safely on another thread, because on
--- a 32-bit machine we cannot do atomic operations on a 64-bit value.
--- Therefore, we only expose APIs that allow getting and setting the
--- limit of the current thread.
-foreign import ccall unsafe "rts_setThreadAllocationCounter"
-  rts_setThreadAllocationCounter :: ThreadId# -> Int64 -> IO ()
-
-foreign import ccall unsafe "rts_getThreadAllocationCounter"
-  rts_getThreadAllocationCounter :: ThreadId# -> IO Int64
-
 foreign import ccall unsafe "rts_enableThreadAllocationLimit"
   rts_enableThreadAllocationLimit :: ThreadId# -> IO ()
 
index 67a2384..e3f6e4c 100644 (file)
@@ -2491,3 +2491,23 @@ stg_traceMarkerzh ( W_ msg )
     return ();
 }
 
+
+stg_getThreadAllocationCounterzh ()
+{
+    // Account for the allocation in the current block
+    W_ offset;
+    offset = Hp - bdescr_start(CurrentNursery);
+    return (StgTSO_alloc_limit(CurrentTSO) - TO_I64(offset));
+}
+
+stg_setThreadAllocationCounterzh ( I64 counter )
+{
+    // Allocation in the current block will be subtracted by
+    // getThreadAllocationCounter#, so we have to offset any existing
+    // allocation here.  See also openNursery/closeNursery in
+    // compiler/codeGen/StgCmmForeign.hs.
+    W_ offset;
+    offset = Hp - bdescr_start(CurrentNursery);
+    StgTSO_alloc_limit(CurrentTSO) = counter + TO_I64(offset);
+    return ();
+}
index e53a056..d5800fd 100644 (file)
       SymI_HasProto(rts_isProfiled)                                     \
       SymI_HasProto(rts_isDynamic)                                      \
       SymI_HasProto(rts_setInCallCapability)                            \
-      SymI_HasProto(rts_getThreadAllocationCounter)                     \
-      SymI_HasProto(rts_setThreadAllocationCounter)                     \
       SymI_HasProto(rts_enableThreadAllocationLimit)                    \
       SymI_HasProto(rts_disableThreadAllocationLimit)                   \
       SymI_HasProto(rts_setMainThread)                                  \
       SymI_HasProto(stg_traceCcszh)                                     \
       SymI_HasProto(stg_traceEventzh)                                   \
       SymI_HasProto(stg_traceMarkerzh)                                  \
+      SymI_HasProto(stg_getThreadAllocationCounterzh)                   \
+      SymI_HasProto(stg_setThreadAllocationCounterzh)                   \
       SymI_HasProto(getMonotonicNSec)                                   \
       SymI_HasProto(lockFile)                                           \
       SymI_HasProto(unlockFile)                                         \
index b769177..be69622 100644 (file)
@@ -165,19 +165,8 @@ rts_getThreadId(StgPtr tso)
 }
 
 /* ---------------------------------------------------------------------------
- * Getting & setting the thread allocation limit
+ * Enabling and disabling the thread allocation limit
  * ------------------------------------------------------------------------ */
-HsInt64 rts_getThreadAllocationCounter(StgPtr tso)
-{
-    // NB. doesn't take into account allocation in the current nursery
-    // block, so it might be off by up to 4k.
-    return PK_Int64((W_*)&(((StgTSO *)tso)->alloc_limit));
-}
-
-void rts_setThreadAllocationCounter(StgPtr tso, HsInt64 i)
-{
-    ASSIGN_Int64((W_*)&(((StgTSO *)tso)->alloc_limit), i);
-}
 
 void rts_enableThreadAllocationLimit(StgPtr tso)
 {
index cd70132..ffbd05c 100644 (file)
@@ -392,3 +392,9 @@ test('T14702', [ ignore_stdout
 test('T14900', normal, compile_and_run, ['-package ghc-compact'])
 test('InternalCounters', normal, run_command,
   ['$MAKE -s --no-print-directory InternalCounters'])
+test('alloccounter1', normal, compile_and_run,
+  [
+    # avoid allocating stack chunks, which counts as
+    # allocation and messes up the results:
+    '-with-rtsopts=-k1m'
+  ])
diff --git a/testsuite/tests/rts/alloccounter1.hs b/testsuite/tests/rts/alloccounter1.hs
new file mode 100644 (file)
index 0000000..4b81896
--- /dev/null
@@ -0,0 +1,19 @@
+module Main where
+
+import Control.Exception
+import Control.Monad
+import Data.List
+import System.Mem
+
+main = do
+  let
+    testAlloc n = do
+      let start = 999999
+      setAllocationCounter start
+      evaluate (last [1..n])
+      c <- getAllocationCounter
+      -- print (start - c)
+      return (start - c)
+  results <- forM [1..1000] testAlloc
+  print (sort results == results)
+    -- results better be in ascending order
diff --git a/testsuite/tests/rts/alloccounter1.stdout b/testsuite/tests/rts/alloccounter1.stdout
new file mode 100644 (file)
index 0000000..0ca9514
--- /dev/null
@@ -0,0 +1 @@
+True