Fix a profiling bug
authorSimon Marlow <marlowsd@gmail.com>
Wed, 28 Jan 2015 11:25:52 +0000 (11:25 +0000)
committerSimon Marlow <marlowsd@gmail.com>
Wed, 4 Feb 2015 08:17:17 +0000 (08:17 +0000)
Summary:
We were erroneously discarding SCCs on function-typed variables.
These can affect the call stack, so we have to retain them.  The bug
was introduced during the recent SourceNote refactoring.

This is an alternative to the fix proposed in D616.  I also added the
scc005 test from that diff, which works with this change.

While I was here, I also fixed up the other profiling tests, marking a
few as expect_broken_for(10037) where the opt/unopt output differs in
non-fatal ways.

Test Plan: profiling tests

Reviewers: scpmw, ezyang, austin

Subscribers: thomie

Differential Revision: https://phabricator.haskell.org/D636

GHC Trac Issues: #10007

compiler/coreSyn/CoreUtils.hs
testsuite/tests/profiling/should_run/T2552.prof.sample
testsuite/tests/profiling/should_run/all.T
testsuite/tests/profiling/should_run/ioprof.prof.sample
testsuite/tests/profiling/should_run/prof-doc-fib.prof.sample
testsuite/tests/profiling/should_run/scc005.hs [new file with mode: 0644]
testsuite/tests/profiling/should_run/scc005.prof.sample [new file with mode: 0644]

index 28981a3..f6dc2a6 100644 (file)
@@ -297,10 +297,18 @@ mkTick t orig_expr = mkTick' id id orig_expr
          else top $ Tick (mkNoScope t) $ rest $ tickHNFArgs (mkNoCount t) expr
 
     Var x
-      | not (isFunTy (idType x)) && tickishPlace t == PlaceCostCentre
+      | notFunction && tickishPlace t == PlaceCostCentre
       -> orig_expr
-      | canSplit
+      | notFunction && canSplit
       -> top $ Tick (mkNoScope t) $ rest expr
+      where
+        -- SCCs can be eliminated on variables provided the variable
+        -- is not a function.  In these cases the SCC makes no difference:
+        -- the cost of evaluating the variable will be attributed to its
+        -- definition site.  When the variable refers to a function, however,
+        -- an SCC annotation on the variable affects the cost-centre stack
+        -- when the function is called, so we must retain those.
+        notFunction = not (isFunTy (idType x))
 
     Lit{}
       | tickishPlace t == PlaceCostCentre
index fa8f886..2ec702f 100644 (file)
@@ -1,40 +1,36 @@
-       Wed Oct 26 13:31 2011 Time and Allocation Profiling Report  (Final)
+       Wed Jan 28 11:18 2015 Time and Allocation Profiling Report  (Final)
 
-          T2552 +RTS -p -RTS
+          T2552 +RTS -hc -p -RTS
 
-       total time  =        1.02 secs   (51 ticks @ 20 ms)
-       total alloc = 234,535,968 bytes  (excludes profiling overheads)
+       total time  =        0.09 secs   (93 ticks @ 1000 us, 1 processor)
+       total alloc = 123,466,232 bytes  (excludes profiling overheads)
 
 COST CENTRE     MODULE  %time %alloc
 
-fib3'.nfib      Main     27.5   33.3
-fib2'.nfib      Main     27.5   33.3
-fib1.fib1'.nfib Main     27.5   33.3
-fib3            Main      3.9    0.0
-fib2'           Main      3.9    0.0
-fib1            Main      3.9    0.0
-nfib'           Main      2.0    0.0
-fib2            Main      2.0    0.0
-fib1.fib1'      Main      2.0    0.0
+fib2'.nfib      Main     34.4   33.3
+fib3'.nfib      Main     33.3   33.3
+fib1.fib1'.nfib Main     32.3   33.3
 
 
                                                                 individual     inherited
 COST CENTRE           MODULE                  no.     entries  %time %alloc   %time %alloc
 
-MAIN                  MAIN                    102           0    0.0    0.0   100.0  100.0
- CAF                  Main                    203           0    0.0    0.0   100.0  100.0
-  fib3                Main                    212           1    0.0    0.0     0.0    0.0
-  main                Main                    204           1    0.0    0.0   100.0  100.0
-   fib3               Main                    213           0    3.9    0.0    31.4   33.3
-    fib3'             Main                    214           1    0.0    0.0    27.5   33.3
-     fib3'.nfib       Main                    215     1028457   27.5   33.3    27.5   33.3
-   fib2               Main                    209           1    2.0    0.0    33.3   33.3
-    fib2'             Main                    210           1    3.9    0.0    31.4   33.3
-     fib2'.nfib       Main                    211     1028457   27.5   33.3    27.5   33.3
-   fib1               Main                    205           1    3.9    0.0    35.3   33.3
-    fib1.fib1'        Main                    206           1    2.0    0.0    31.4   33.3
-     nfib'            Main                    207           1    2.0    0.0    29.4   33.3
-      fib1.fib1'.nfib Main                    208     1028457   27.5   33.3    27.5   33.3
- CAF                  GHC.Conc.Signal         200           0    0.0    0.0     0.0    0.0
- CAF                  GHC.IO.Encoding.Iconv   190           0    0.0    0.0     0.0    0.0
- CAF                  GHC.IO.Handle.FD        182           0    0.0    0.0     0.0    0.0
+MAIN                  MAIN                     98           0    0.0    0.0   100.0  100.0
+ main                 Main                    197           0    0.0    0.0     0.0    0.0
+ CAF                  Main                    195           0    0.0    0.0   100.0  100.0
+  main                Main                    196           1    0.0    0.0   100.0  100.0
+   fib3               Main                    205           1    0.0    0.0    33.3   33.3
+    fib3'             Main                    206           1    0.0    0.0    33.3   33.3
+     fib3'.nfib       Main                    207     1028457   33.3   33.3    33.3   33.3
+   fib2               Main                    202           1    0.0    0.0    34.4   33.3
+    fib2'             Main                    203           1    0.0    0.0    34.4   33.3
+     fib2'.nfib       Main                    204     1028457   34.4   33.3    34.4   33.3
+   fib1               Main                    198           1    0.0    0.0    32.3   33.3
+    fib1.fib1'        Main                    199           1    0.0    0.0    32.3   33.3
+     nfib'            Main                    200           1    0.0    0.0    32.3   33.3
+      fib1.fib1'.nfib Main                    201     1028457   32.3   33.3    32.3   33.3
+ CAF                  GHC.Conc.Signal         189           0    0.0    0.0     0.0    0.0
+ CAF                  GHC.IO.Encoding         179           0    0.0    0.0     0.0    0.0
+ CAF                  GHC.IO.Encoding.Iconv   177           0    0.0    0.0     0.0    0.0
+ CAF                  GHC.IO.Handle.FD        169           0    0.0    0.0     0.0    0.0
+ CAF                  GHC.IO.Handle.Text      167           0    0.0    0.0     0.0    0.0
index ac70b9f..4e4bd09 100644 (file)
@@ -23,7 +23,10 @@ test('T3001-2',
      compile_and_run, ['-package bytestring'])
 
 test('scc001', [req_profiling,
-                extra_ways(['prof']), only_ways(prof_ways)],
+                extra_ways(['prof']), only_ways(prof_ways),
+                expect_broken_for(10037,['prof'])],
+        # As with ioprof001, the unoptimised profile is different but
+        # not badly wrong (CAF attribution is different).
      compile_and_run,
      ['-fno-state-hack -fno-full-laziness']) # Note [consistent stacks]
 
@@ -43,6 +46,11 @@ test('scc004', [req_profiling,
      compile_and_run,
      [''])
 
+test('scc005', [req_profiling,
+                extra_ways(['prof']), only_ways(prof_ways)],
+     compile_and_run,
+     [''])
+
 test('T5314',
      [ only_ways(prof_ways),
        extra_ways(extra_prof_ways),
@@ -56,7 +64,8 @@ test('T680',
      ['-fno-full-laziness']) # Note [consistent stacks]
 
 test('T2552',
-     [ req_profiling, extra_ways(['prof']), only_ways(prof_ways) ],
+     [ req_profiling, extra_ways(['prof']), only_ways(prof_ways),
+       expect_broken_for(10037,['prof'])],
      compile_and_run,
      [''])
 
@@ -67,7 +76,7 @@ test('T949',
 
 test('ioprof',
      [ req_profiling, extra_ways(['prof']), only_ways(prof_ways),
-       expect_fail_for(['prof']),
+       expect_broken_for(10037,['prof']),
         # The results for 'prof' are fine, but the ordering changes.
         # We care more about getting the optimised results right, so ignoring
         # this for now.
@@ -101,7 +110,9 @@ test('T5559', # unicode in cost centre names
 #       -fno-full-laziness
 
 test('callstack001',
-     [ req_profiling, extra_ways(['prof']), only_ways(prof_ways) ],
+     [ req_profiling, extra_ways(['prof']), only_ways(prof_ways),
+       expect_broken_for(10037,['prof'])],
+     # unoptimised results are different w.r.t. CAF attribution
      compile_and_run, ['-fprof-auto-calls -fno-full-laziness -fno-state-hack'])
 
 test('callstack002',
index 07257e2..35f3ea4 100644 (file)
@@ -1,37 +1,36 @@
-       Mon Apr 28 15:29 2014 Time and Allocation Profiling Report  (Final)
+       Wed Jan 28 11:06 2015 Time and Allocation Profiling Report  (Final)
 
           ioprof +RTS -hc -p -RTS
 
        total time  =        0.00 secs   (0 ticks @ 1000 us, 1 processor)
-       total alloc =      52,208 bytes  (excludes profiling overheads)
+       total alloc =      52,192 bytes  (excludes profiling overheads)
 
 COST CENTRE MODULE           %time %alloc
 
 MAIN        MAIN               0.0    1.4
+CAF         GHC.IO.Handle.FD   0.0   66.1
 CAF         GHC.IO.Encoding    0.0    6.3
 CAF         GHC.Conc.Signal    0.0    1.3
-CAF         GHC.IO.Handle.FD   0.0   66.2
 main        Main               0.0   16.7
-errorM.\    Main               0.0    7.0
+errorM.\    Main               0.0    6.9
 
 
                                                             individual     inherited
 COST CENTRE       MODULE                  no.     entries  %time %alloc   %time %alloc
 
-MAIN              MAIN                     44           0    0.0    1.4     0.0  100.0
- main             Main                     89           0    0.0   16.5     0.0   16.5
- CAF              Main                     87           0    0.0    0.0     0.0    7.4
-  main            Main                     88           1    0.0    0.2     0.0    7.4
-   runM           Main                     90           1    0.0    0.2     0.0    7.2
-    bar           Main                     91           1    0.0    0.0     0.0    7.1
-     errorM       Main                     93           1    0.0    0.0     0.0    0.0
-     >>=          Main                     92           1    0.0    0.0     0.0    7.0
-      >>=.\       Main                     94           1    0.0    0.0     0.0    7.0
-       foo        Main                     95           1    0.0    0.0     0.0    7.0
-        errorM    Main                     96           0    0.0    0.0     0.0    7.0
-         errorM.\ Main                     97           1    0.0    7.0     0.0    7.0
- CAF              GHC.IO.Handle.FD         84           0    0.0   66.2     0.0   66.2
- CAF              GHC.Conc.Signal          82           0    0.0    1.3     0.0    1.3
- CAF              GHC.Conc.Sync            81           0    0.0    0.4     0.0    0.4
- CAF              GHC.IO.Encoding          74           0    0.0    6.3     0.0    6.3
- CAF              GHC.IO.Encoding.Iconv    56           0    0.0    0.4     0.0    0.4
+MAIN              MAIN                     98           0    0.0    1.4     0.0  100.0
+ main             Main                    197           0    0.0   16.6     0.0   16.6
+ CAF              Main                    195           0    0.0    0.0     0.0    7.5
+  main            Main                    196           1    0.0    0.2     0.0    7.5
+   runM           Main                    198           1    0.0    0.3     0.0    7.3
+    bar           Main                    199           1    0.0    0.0     0.0    7.0
+     >>=          Main                    200           1    0.0    0.0     0.0    6.9
+      >>=.\       Main                    201           1    0.0    0.0     0.0    6.9
+       foo        Main                    202           1    0.0    0.0     0.0    6.9
+        errorM    Main                    203           1    0.0    0.0     0.0    6.9
+         errorM.\ Main                    204           1    0.0    6.9     0.0    6.9
+ CAF              GHC.Conc.Signal         189           0    0.0    1.3     0.0    1.3
+ CAF              GHC.Conc.Sync           188           0    0.0    0.4     0.0    0.4
+ CAF              GHC.IO.Encoding         179           0    0.0    6.3     0.0    6.3
+ CAF              GHC.IO.Encoding.Iconv   177           0    0.0    0.4     0.0    0.4
+ CAF              GHC.IO.Handle.FD        169           0    0.0   66.1     0.0   66.1
index 9059410..919d7b2 100644 (file)
@@ -1,9 +1,9 @@
-       Thu Oct 27 09:29 2011 Time and Allocation Profiling Report  (Final)
+       Wed Jan 28 11:24 2015 Time and Allocation Profiling Report  (Final)
 
-          fib +RTS -p -RTS
+          prof-doc-fib +RTS -hc -p -RTS
 
-       total time  =        0.76 secs   (38 ticks @ 20 ms)
-       total alloc = 247,940,020 bytes  (excludes profiling overheads)
+       total time  =        0.08 secs   (83 ticks @ 1000 us, 1 processor)
+       total alloc = 107,829,264 bytes  (excludes profiling overheads)
 
 COST CENTRE MODULE  %time %alloc
 
@@ -13,13 +13,16 @@ fib         Main    100.0  100.0
                                                       individual     inherited
 COST CENTRE MODULE                  no.     entries  %time %alloc   %time %alloc
 
-MAIN        MAIN                    102           0    0.0    0.0   100.0  100.0
- CAF        Main                    203           0    0.0    0.0   100.0  100.0
-  main      Main                    204           1    0.0    0.0   100.0  100.0
-   main.g   Main                    207           1    0.0    0.0     0.0    0.1
-    fib     Main                    208        1973    0.0    0.1     0.0    0.1
-   main.f   Main                    205           1    0.0    0.0   100.0   99.9
-    fib     Main                    206     2692537  100.0   99.9   100.0   99.9
- CAF        GHC.Conc.Signal         201           0    0.0    0.0     0.0    0.0
- CAF        GHC.IO.Encoding.Iconv   191           0    0.0    0.0     0.0    0.0
- CAF        GHC.IO.Handle.FD        183           0    0.0    0.0     0.0    0.0
+MAIN        MAIN                     98           0    0.0    0.0   100.0  100.0
+ main       Main                    197           0    0.0    0.0     0.0    0.0
+ CAF        Main                    195           0    0.0    0.0   100.0  100.0
+  main      Main                    196           1    0.0    0.0   100.0  100.0
+   main.f   Main                    200           1    0.0    0.0   100.0   99.9
+    fib     Main                    201     2692537  100.0   99.9   100.0   99.9
+   main.g   Main                    198           1    0.0    0.0     0.0    0.1
+    fib     Main                    199        1973    0.0    0.1     0.0    0.1
+ CAF        GHC.Conc.Signal         189           0    0.0    0.0     0.0    0.0
+ CAF        GHC.IO.Encoding         179           0    0.0    0.0     0.0    0.0
+ CAF        GHC.IO.Encoding.Iconv   177           0    0.0    0.0     0.0    0.0
+ CAF        GHC.IO.Handle.FD        169           0    0.0    0.0     0.0    0.0
+ CAF        GHC.IO.Handle.Text      167           0    0.0    0.0     0.0    0.0
diff --git a/testsuite/tests/profiling/should_run/scc005.hs b/testsuite/tests/profiling/should_run/scc005.hs
new file mode 100644 (file)
index 0000000..b75a8da
--- /dev/null
@@ -0,0 +1,10 @@
+f :: Int -> Int -> Int
+{-# NOINLINE f #-}
+f x _ = x
+
+g :: Int -> Int -> Int
+{-# NOINLINE g #-}
+g x = f (1+1) -- slightly non-trivial so it will float
+
+main :: IO Int
+main = return $! g 3 5
diff --git a/testsuite/tests/profiling/should_run/scc005.prof.sample b/testsuite/tests/profiling/should_run/scc005.prof.sample
new file mode 100644 (file)
index 0000000..5d720f3
--- /dev/null
@@ -0,0 +1,27 @@
+       Wed Jan 28 10:59 2015 Time and Allocation Profiling Report  (Final)
+
+          scc005 +RTS -p -RTS
+
+       total time  =        0.00 secs   (0 ticks @ 1000 us, 1 processor)
+       total alloc =      39,104 bytes  (excludes profiling overheads)
+
+COST CENTRE MODULE           %time %alloc
+
+MAIN        MAIN               0.0    1.2
+CAF         GHC.IO.Handle.FD   0.0   88.3
+CAF         GHC.IO.Encoding    0.0    7.1
+CAF         GHC.Conc.Signal    0.0    1.7
+
+
+                                                      individual     inherited
+COST CENTRE MODULE                  no.     entries  %time %alloc   %time %alloc
+
+MAIN        MAIN                     98           0    0.0    1.2     0.0  100.0
+ CAF        Main                    195           0    0.0    0.9     0.0    1.2
+  main      Main                    196           1    0.0    0.2     0.0    0.3
+   g        Main                    197           1    0.0    0.2     0.0    0.2
+    f       Main                    198           1    0.0    0.0     0.0    0.0
+ CAF        GHC.Conc.Signal         189           0    0.0    1.7     0.0    1.7
+ CAF        GHC.IO.Encoding         179           0    0.0    7.1     0.0    7.1
+ CAF        GHC.IO.Encoding.Iconv   177           0    0.0    0.6     0.0    0.6
+ CAF        GHC.IO.Handle.FD        169           0    0.0   88.3     0.0   88.3