More fixes for #5654
authorSimon Marlow <marlowsd@gmail.com>
Tue, 20 Dec 2016 14:32:11 +0000 (14:32 +0000)
committerSimon Marlow <marlowsd@gmail.com>
Fri, 6 Jan 2017 15:53:36 +0000 (15:53 +0000)
* In stg_ap_0_fast, if we're evaluating a thunk, the thunk might
  evaluate to a function in which case we may have to adjust its CCS.

* The interpreter has its own implementation of stg_ap_0_fast, so we
  have to do the same shenanigans with creating empty PAPs and copying
  PAPs there.

* GHCi creates Cost Centres as children of CCS_MAIN, which enterFunCCS()
  wrongly assumed to imply that they were CAFs.  Now we use the is_caf
  flag for this, which we have to correctly initialise when we create a
  Cost Centre in GHCi.

includes/stg/MiscClosures.h
rts/Apply.cmm
rts/Interpreter.c
rts/Printer.c
rts/Profiling.c
rts/StgMiscClosures.cmm
testsuite/tests/codeGen/should_run/cgrun057.stderr
testsuite/tests/profiling/should_run/T680.prof.sample
testsuite/tests/profiling/should_run/all.T
testsuite/tests/profiling/should_run/toplevel_scc_1.prof.sample

index b3f9a69..994b369 100644 (file)
@@ -63,6 +63,7 @@ RTS_RET(stg_maskUninterruptiblezh_ret);
 RTS_RET(stg_maskAsyncExceptionszh_ret);
 RTS_RET(stg_stack_underflow_frame);
 RTS_RET(stg_restore_cccs);
+RTS_RET(stg_restore_cccs_eval);
 
 // RTS_FUN(stg_interp_constr1_entry);
 // RTS_FUN(stg_interp_constr2_entry);
index b18c347..b3a04ca 100644 (file)
@@ -52,6 +52,10 @@ stg_ap_0_fast ( P_ fun )
   The mechanism we use to wrap the function is to create a
   zero-argument PAP as a proxy object to hold the new CCS, and return
   that.
+
+  If the closure we evaluated is itself a PAP, we cannot make a nested
+  PAP, so we copy the original PAP and set the CCS in the new PAP to
+  enterFunCCS(pap->header.prof.ccs).
 */
 
 again:
@@ -122,6 +126,8 @@ again:
                 CCS_ALLOC(BYTES_TO_WDS(SIZEOF_StgPAP), CCS_OVERHEAD);
                 P_ pap;
                 pap = Hp - size + WDS(1);
+                // We'll lose the original PAP, so we should enter its CCS
+                ccall enterFunCCS(BaseReg "ptr", StgHeader_ccs(fun) "ptr");
                 SET_HDR(pap, stg_PAP_info, CCCS);
                 StgPAP_arity(pap) = StgPAP_arity(fun);
                 StgPAP_n_args(pap) = StgPAP_n_args(fun);
@@ -137,6 +143,27 @@ again:
                 goto loop;
             }
         }
+        case AP,
+             AP_STACK,
+             BLACKHOLE,
+             WHITEHOLE,
+             THUNK,
+             THUNK_1_0,
+             THUNK_0_1,
+             THUNK_2_0,
+             THUNK_1_1,
+             THUNK_0_2,
+             THUNK_STATIC,
+             THUNK_SELECTOR:
+        {
+            // The thunk might evaluate to a function, so we have to come
+            // back here again to adjust its CCS if necessary.  The
+            // stg_restore_ccs_eval stack frame does that.
+            STK_CHK_GEN();
+            jump %ENTRY_CODE(info)
+                (stg_restore_cccs_eval_info, CCCS)
+                (UNTAG(fun));
+        }
         default:
         {
             jump %ENTRY_CODE(info) (UNTAG(fun));
index 7221ff6..5a39567 100644 (file)
@@ -214,6 +214,48 @@ void interp_shutdown ( void )
 
 #endif
 
+#ifdef PROFILING
+
+//
+// Build a zero-argument PAP with the current CCS
+// See Note [Evaluating functions with profiling] in Apply.cmm
+//
+STATIC_INLINE
+StgClosure * newEmptyPAP (Capability *cap,
+                          StgClosure *tagged_obj, // a FUN or a BCO
+                          uint32_t arity)
+{
+    StgPAP *pap = (StgPAP *)allocate(cap, sizeofW(StgPAP));
+    SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS);
+    pap->arity = arity;
+    pap->n_args = 0;
+    pap->fun = tagged_obj;
+    return (StgClosure *)pap;
+}
+
+//
+// Make an exact copy of a PAP, except that we combine the current CCS with the
+// CCS in the PAP.  See Note [Evaluating functions with profiling] in Apply.cmm
+//
+STATIC_INLINE
+StgClosure * copyPAP  (Capability *cap, StgPAP *oldpap)
+{
+    uint32_t size = PAP_sizeW(oldpap->n_args);
+    StgPAP *pap = (StgPAP *)allocate(cap, size);
+    enterFunCCS(&cap->r, oldpap->header.prof.ccs);
+    SET_HDR(pap, &stg_PAP_info, cap->r.rCCCS);
+    pap->arity = oldpap->arity;
+    pap->n_args = oldpap->n_args;
+    pap->fun = oldpap->fun;
+    uint32_t i;
+    for (i = 0; i < ((StgPAP *)pap)->n_args; i++) {
+        pap->payload[i] = oldpap->payload[i];
+    }
+    return (StgClosure *)pap;
+}
+
+#endif
+
 static StgWord app_ptrs_itbl[] = {
     (W_)&stg_ap_p_info,
     (W_)&stg_ap_pp_info,
@@ -343,6 +385,8 @@ eval_obj:
     case CONSTR_1_1:
     case CONSTR_0_2:
     case CONSTR_NOCAF:
+        break;
+
     case FUN:
     case FUN_1_0:
     case FUN_0_1:
@@ -350,15 +394,30 @@ eval_obj:
     case FUN_1_1:
     case FUN_0_2:
     case FUN_STATIC:
+#ifdef PROFILING
+        if (cap->r.rCCCS != obj->header.prof.ccs) {
+            tagged_obj =
+                newEmptyPAP(cap, tagged_obj, get_fun_itbl(obj)->f.arity);
+        }
+#endif
+        break;
+
     case PAP:
-        // already in WHNF
+#ifdef PROFILING
+        if (cap->r.rCCCS != obj->header.prof.ccs) {
+            tagged_obj = copyPAP(cap, (StgPAP *)obj);
+        }
+#endif
         break;
 
     case BCO:
-    {
         ASSERT(((StgBCO *)obj)->arity > 0);
+#ifdef PROFILING
+        if (cap->r.rCCCS != obj->header.prof.ccs) {
+            tagged_obj = newEmptyPAP(cap, tagged_obj, ((StgBCO *)obj)->arity);
+        }
+#endif
         break;
-    }
 
     case AP:    /* Copied from stg_AP_entry. */
     {
@@ -380,7 +439,7 @@ eval_obj:
         // restore the CCCS after evaluating the AP
         Sp -= 2;
         Sp[1] = (W_)cap->r.rCCCS;
-        Sp[0] = (W_)&stg_restore_cccs_info;
+        Sp[0] = (W_)&stg_restore_cccs_eval_info;
 #endif
 
         Sp -= sizeofW(StgUpdateFrame);
@@ -425,7 +484,7 @@ eval_obj:
         // restore the CCCS after evaluating the closure
         Sp -= 2;
         Sp[1] = (W_)cap->r.rCCCS;
-        Sp[0] = (W_)&stg_restore_cccs_info;
+        Sp[0] = (W_)&stg_restore_cccs_eval_info;
 #endif
         Sp -= 2;
         Sp[1] = (W_)tagged_obj;
@@ -465,7 +524,8 @@ do_return:
         // NOTE: not using get_itbl().
         info = ((StgClosure *)Sp)->header.info;
 
-        if (info == (StgInfoTable *)&stg_restore_cccs_info) {
+        if (info == (StgInfoTable *)&stg_restore_cccs_info ||
+            info == (StgInfoTable *)&stg_restore_cccs_eval_info) {
             cap->r.rCCCS = (CostCentreStack*)Sp[1];
             Sp += 2;
             goto do_return;
index f23e0b0..87b11e8 100644 (file)
@@ -526,6 +526,11 @@ printStackChunk( StgPtr sp, StgPtr spBottom )
                 fprintCCS(stderr, (CostCentreStack*)sp[1]);
                 debugBelch("\n" );
                 continue;
+            } else if (c == (StgWord)&stg_restore_cccs_eval_info) {
+                debugBelch("stg_restore_cccs_eval_info\n" );
+                fprintCCS(stderr, (CostCentreStack*)sp[1]);
+                debugBelch("\n" );
+                continue;
 #endif
             } else {
                 debugBelch("RET_SMALL (%p)\n", info);
index 952785b..94ec555 100644 (file)
@@ -236,6 +236,10 @@ CostCentre *mkCostCentre (char *label, char *module, char *srcloc)
     cc->label = label;
     cc->module = module;
     cc->srcloc = srcloc;
+    cc->is_caf = 0;
+    cc->mem_alloc = 0;
+    cc->time_ticks = 0;
+    cc->link = NULL;
     return cc;
 }
 
@@ -379,7 +383,7 @@ void enterFunCCS (StgRegTable *reg, CostCentreStack *ccsfn)
     }
 
     // common case 2: the function stack is empty, or just CAF
-    if (ccsfn->prevStack == CCS_MAIN) {
+    if (ccsfn->cc->is_caf) {
         return;
     }
 
index aa22c99..e8a5b8f 100644 (file)
@@ -55,6 +55,16 @@ INFO_TABLE_RET (stg_restore_cccs, RET_SMALL, W_ info_ptr, W_ cccs)
     jump %ENTRY_CODE(Sp(0)) [*]; // NB. all registers live!
 }
 
+
+INFO_TABLE_RET (stg_restore_cccs_eval, RET_SMALL, W_ info_ptr, W_ cccs)
+    return (P_ ret)
+{
+#if defined(PROFILING)
+    CCCS = cccs;
+#endif
+    jump stg_ap_0_fast(ret);
+}
+
 /* ----------------------------------------------------------------------------
    Support for the bytecode interpreter.
    ------------------------------------------------------------------------- */
index 262d749..5d1656d 100644 (file)
@@ -1,4 +1,4 @@
-*** Exception (reporting due to +RTS -xc): (THUNK_2_0), stack trace: 
+*** Exception (reporting due to +RTS -xc): (THUNK_STATIC), stack trace: 
   Main.g,
   called from Main.f,
   called from Main.main,
index 1d52d71..8875bbd 100644 (file)
@@ -1,42 +1,41 @@
-       Thu Dec  8 15:23 2016 Time and Allocation Profiling Report  (Final)
+       Tue Dec 20 13:18 2016 Time and Allocation Profiling Report  (Final)
 
-          T680 +RTS -hc -p -RTS
+          T680 +RTS -p -RTS
 
-       total time  =        0.00 secs   (0 ticks @ 1000 us, 1 processor)
-       total alloc =     753,032 bytes  (excludes profiling overheads)
+       total time  =        0.20 secs   (204 ticks @ 1000 us, 1 processor)
+       total alloc = 449,729,208 bytes  (excludes profiling overheads)
 
-COST CENTRE MODULE           SRC                    %time %alloc
+COST CENTRE MODULE SRC              %time %alloc
 
-CAF         GHC.IO.Handle.FD <entire-module>          0.0    4.6
-main        Main             T680.hs:20:1-14          0.0    1.2
-foo.\       Main             T680.hs:3:12-40          0.0   25.5
-foo.bar     Main             T680.hs:(5,3)-(9,38)     0.0   29.8
-foo.bar.\   Main             T680.hs:(8,11)-(9,38)    0.0   38.2
+foo.\       Main   T680.hs:3:12-40   98.5   99.8
 
 
                                                                                     individual      inherited
 COST CENTRE          MODULE                SRC                   no.     entries  %time %alloc   %time %alloc
 
-MAIN                 MAIN                  <built-in>            104          0    0.0    0.1     0.0  100.0
- CAF                 Main                  <entire-module>       207          0    0.0    0.0     0.0   93.5
-  k                  Main                  T680.hs:12:1-17       211          1    0.0    0.0     0.0    0.0
-   foo               Main                  T680.hs:(2,1)-(9,38)  212          1    0.0    0.0     0.0    0.0
-    foo.bar          Main                  T680.hs:(5,3)-(9,38)  216          1    0.0    0.0     0.0    0.0
-     foo.bar.k'      Main                  T680.hs:6:9-34        220          1    0.0    0.0     0.0    0.0
-      k.\            Main                  T680.hs:12:16         221          1    0.0    0.0     0.0    0.0
-  main               Main                  T680.hs:20:1-14       208          1    0.0    0.0     0.0    0.0
-  r                  Main                  T680.hs:18:1-26       210          1    0.0    0.0     0.0   93.5
-   k                 Main                  T680.hs:12:1-17       213          0    0.0    0.0     0.0   93.5
-    foo              Main                  T680.hs:(2,1)-(9,38)  214          0    0.0    0.0     0.0   93.5
-     foo.\           Main                  T680.hs:3:12-40       215       4001    0.0   25.5     0.0   25.5
-     foo.bar         Main                  T680.hs:(5,3)-(9,38)  217          0    0.0   29.8     0.0   68.0
-      foo.bar.\      Main                  T680.hs:(8,11)-(9,38) 218       4001    0.0   38.2     0.0   38.2
-       foo.bar.\.k'' Main                  T680.hs:8:15-27       219       4000    0.0    0.0     0.0    0.0
-      foo.bar.k'     Main                  T680.hs:6:9-34        222          0    0.0    0.0     0.0    0.0
- CAF                 GHC.Conc.Signal       <entire-module>       201          0    0.0    0.1     0.0    0.1
- CAF                 GHC.IO.Encoding       <entire-module>       191          0    0.0    0.4     0.0    0.4
- CAF                 GHC.IO.Encoding.Iconv <entire-module>       189          0    0.0    0.0     0.0    0.0
- CAF                 GHC.IO.Handle.FD      <entire-module>       181          0    0.0    4.6     0.0    4.6
- CAF                 GHC.IO.Handle.Text    <entire-module>       179          0    0.0    0.0     0.0    0.0
- CAF                 GHC.Show              <entire-module>       165          0    0.0    0.0     0.0    0.0
- main                Main                  T680.hs:20:1-14       209          0    0.0    1.2     0.0    1.2
+MAIN                 MAIN                  <built-in>            108          0    0.0    0.0   100.0  100.0
+ CAF                 Main                  <entire-module>       215          0    0.0    0.0   100.0  100.0
+  k                  Main                  T680.hs:12:1-17       219          1    0.0    0.0     0.0    0.0
+   foo               Main                  T680.hs:(2,1)-(9,38)  220          1    0.0    0.0     0.0    0.0
+    foo.bar          Main                  T680.hs:(5,3)-(9,38)  224          1    0.0    0.0     0.0    0.0
+     foo.bar.k'      Main                  T680.hs:6:9-34        228          1    0.0    0.0     0.0    0.0
+      k.\            Main                  T680.hs:12:16         229          1    0.0    0.0     0.0    0.0
+  main               Main                  T680.hs:20:1-14       216          1    0.0    0.0     0.0    0.0
+  r                  Main                  T680.hs:18:1-26       218          1    0.0    0.0   100.0  100.0
+   k                 Main                  T680.hs:12:1-17       221          0    0.0    0.0   100.0  100.0
+    foo              Main                  T680.hs:(2,1)-(9,38)  222          0    0.0    0.0   100.0  100.0
+     foo.\           Main                  T680.hs:3:12-40       223       4001   98.5   99.8    98.5   99.8
+     foo.bar         Main                  T680.hs:(5,3)-(9,38)  225          0    0.5    0.0     1.5    0.2
+      foo.bar.\      Main                  T680.hs:(8,11)-(9,38) 226       4001    1.0    0.1     1.0    0.1
+       foo.bar.\.k'' Main                  T680.hs:8:15-27       227       4000    0.0    0.0     0.0    0.0
+        foo.bar.k'   Main                  T680.hs:6:9-34        232          0    0.0    0.0     0.0    0.0
+         k.\         Main                  T680.hs:12:16         233          0    0.0    0.0     0.0    0.0
+      foo.bar.k'     Main                  T680.hs:6:9-34        230          0    0.0    0.0     0.0    0.0
+       k.\           Main                  T680.hs:12:16         231          0    0.0    0.0     0.0    0.0
+ CAF                 GHC.Conc.Signal       <entire-module>       206          0    0.0    0.0     0.0    0.0
+ CAF                 GHC.IO.Encoding       <entire-module>       194          0    0.0    0.0     0.0    0.0
+ CAF                 GHC.IO.Encoding.Iconv <entire-module>       192          0    0.0    0.0     0.0    0.0
+ CAF                 GHC.IO.Handle.FD      <entire-module>       184          0    0.0    0.0     0.0    0.0
+ CAF                 GHC.IO.Handle.Text    <entire-module>       182          0    0.0    0.0     0.0    0.0
+ CAF                 GHC.Show              <entire-module>       168          0    0.0    0.0     0.0    0.0
+ main                Main                  T680.hs:20:1-14       217          0    0.0    0.0     0.0    0.0
index 875a98e..d85a2b8 100644 (file)
@@ -97,8 +97,7 @@ test('T5559', [], compile_and_run, [''])
 
 test('callstack001',
      # unoptimised results are different w.r.t. CAF attribution
-     [ expect_broken_for_10037,
-       omit_ways(['ghci-ext-prof']), # produces a different stack
+     [ omit_ways(['ghci-ext-prof']), # produces a different stack
      ], compile_and_run,
      ['-fprof-auto-calls -fno-full-laziness -fno-state-hack'])
 
index 4eec28d..3464946 100644 (file)
@@ -1,30 +1,33 @@
-       Tue Jul 19 08:36 2016 Time and Allocation Profiling Report  (Final)
+       Tue Dec 20 14:22 2016 Time and Allocation Profiling Report  (Final)
 
           toplevel_scc_1 +RTS -p -RTS
 
        total time  =        0.00 secs   (0 ticks @ 1000 us, 1 processor)
-       total alloc =      79,792 bytes  (excludes profiling overheads)
+       total alloc =      75,880 bytes  (excludes profiling overheads)
 
 COST CENTRE MODULE           SRC              %time %alloc
 
+MAIN        MAIN             <built-in>         0.0   24.0
 CAF         GHC.Read         <entire-module>    0.0    1.2
-CAF         GHC.IO.Handle.FD <entire-module>    0.0   64.9
-CAF         GHC.IO.Encoding  <entire-module>    0.0    3.5
-CAF         Main             <entire-module>    0.0   27.6
+CAF         GHC.IO.Handle.FD <entire-module>    0.0   68.3
+CAF         GHC.IO.Encoding  <entire-module>    0.0    3.6
 
 
-                                                                              individual      inherited
-COST CENTRE MODULE                SRC                      no.     entries  %time %alloc   %time %alloc
+                                                                               individual      inherited
+COST CENTRE  MODULE                SRC                      no.     entries  %time %alloc   %time %alloc
 
-MAIN        MAIN                  <built-in>               105          0    0.0    0.4     0.0  100.0
- CAF        Main                  <entire-module>          209          0    0.0   27.6     0.0   27.9
-  Main.f1   Main                  toplevel_scc_1.hs:4:1-2  212          1    0.0    0.0     0.0    0.0
-  Main.foo  Main                  toplevel_scc_1.hs:7:1-2  210          1    0.0    0.2     0.0    0.3
-   Main.bar Main                  toplevel_scc_1.hs:10:5-6 211          1    0.0    0.1     0.0    0.1
- CAF        GHC.Conc.Signal       <entire-module>          203          0    0.0    0.8     0.0    0.8
- CAF        GHC.IO.Encoding       <entire-module>          193          0    0.0    3.5     0.0    3.5
- CAF        GHC.IO.Encoding.Iconv <entire-module>          191          0    0.0    0.3     0.0    0.3
- CAF        GHC.IO.Handle.FD      <entire-module>          183          0    0.0   64.9     0.0   64.9
- CAF        GHC.IO.Handle.Text    <entire-module>          181          0    0.0    0.1     0.0    0.1
- CAF        GHC.Read              <entire-module>          171          0    0.0    1.2     0.0    1.2
- CAF        Text.Read.Lex         <entire-module>          154          0    0.0    0.8     0.0    0.8
+MAIN         MAIN                  <built-in>               105          0    0.0   24.0     0.0  100.0
+ CAF         Main                  <entire-module>          209          0    0.0    0.3     0.0    0.5
+  Main.f1    Main                  toplevel_scc_1.hs:4:1-2  214          1    0.0    0.0     0.0    0.0
+  Main.foo   Main                  toplevel_scc_1.hs:7:1-2  210          1    0.0    0.2     0.0    0.2
+   Main.bar  Main                  toplevel_scc_1.hs:10:5-6 212          1    0.0    0.0     0.0    0.0
+ CAF         GHC.Conc.Signal       <entire-module>          203          0    0.0    0.8     0.0    0.8
+ CAF         GHC.IO.Encoding       <entire-module>          193          0    0.0    3.6     0.0    3.6
+ CAF         GHC.IO.Encoding.Iconv <entire-module>          191          0    0.0    0.3     0.0    0.3
+ CAF         GHC.IO.Handle.FD      <entire-module>          183          0    0.0   68.3     0.0   68.3
+ CAF         GHC.IO.Handle.Text    <entire-module>          181          0    0.0    0.1     0.0    0.1
+ CAF         GHC.Read              <entire-module>          171          0    0.0    1.2     0.0    1.2
+ CAF         Text.Read.Lex         <entire-module>          154          0    0.0    0.9     0.0    0.9
+ Main.f1     Main                  toplevel_scc_1.hs:4:1-2  215          0    0.0    0.0     0.0    0.0
+ Main.foo    Main                  toplevel_scc_1.hs:7:1-2  211          0    0.0    0.1     0.0    0.1
+  Main.bar   Main                  toplevel_scc_1.hs:10:5-6 213          0    0.0    0.1     0.0    0.1