Drop the per-task timing stats, give a summary only (#5897)
authorSimon Marlow <marlowsd@gmail.com>
Fri, 2 Mar 2012 10:53:34 +0000 (10:53 +0000)
committerSimon Marlow <marlowsd@gmail.com>
Fri, 2 Mar 2012 11:44:17 +0000 (11:44 +0000)
We were keeping around the Task struct (216 bytes) for every worker we
ever created, even though we only keep a maximum of 6 workers per
Capability.  These Task structs accumulate and cause a space leak in
programs that do lots of safe FFI calls; this patch frees the Task
struct as soon as a worker exits.

One reason we were keeping the Task structs around is because we print
out per-Task timing stats in +RTS -s, but that isn't terribly useful.
What is sometimes useful is knowing how *many* Tasks there were.  So
now I'm printing a single-line summary, this is for the program in

  TASKS: 2001 (1 bound, 31 peak workers (2000 total), using -N1)

So although we created 2k tasks overall, there were only 31 workers
active at any one time (which is exactly what we expect: the program
makes 30 safe FFI calls concurrently).

This also gives an indication of how many capabilities were being
used, which is handy if you use +RTS -N without an explicit number.

rts/RtsMain.c
rts/Stats.c
rts/Task.c
rts/Task.h
rts/posix/OSThreads.c
rts/sm/Compact.c

index 2084435..e89445d 100644 (file)
@@ -62,8 +62,7 @@ static void real_main(void)
        Capability *cap = rts_lock();
         rts_evalLazyIO(&cap,progmain_closure, NULL);
        status = rts_getSchedStatus(cap);
-       taskTimeStamp(myTask());
-       rts_unlock(cap);
+        rts_unlock(cap);
     }
 
     /* check the status of the entire Haskell computation */
index 83c43f0..2c7c35d 100644 (file)
@@ -287,18 +287,29 @@ stat_startGC (gc_thread *gct)
 }
 
 void
-stat_gcWorkerThreadStart (gc_thread *gct)
+stat_gcWorkerThreadStart (gc_thread *gct STG_UNUSED)
 {
+#if 0
+    /*
+     * We dont' collect per-thread GC stats any more, but this code
+     * could be used to do that if we want to in the future:
+     */
     if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
     {
         getProcessTimes(&gct->gc_start_cpu, &gct->gc_start_elapsed);
         gct->gc_start_thread_cpu  = getThreadCPUTime();
     }
+#endif
 }
 
 void
-stat_gcWorkerThreadDone (gc_thread *gct)
+stat_gcWorkerThreadDone (gc_thread *gct STG_UNUSED)
 {
+#if 0
+    /*
+     * We dont' collect per-thread GC stats any more, but this code
+     * could be used to do that if we want to in the future:
+     */
     Time thread_cpu, elapsed, gc_cpu, gc_elapsed;
 
     if (RtsFlags.GcFlags.giveStats != NO_GC_STATS)
@@ -311,6 +322,7 @@ stat_gcWorkerThreadDone (gc_thread *gct)
     
         taskDoneGC(gct->cap->running_task, gc_cpu, gc_elapsed);
     }
+#endif
 }
 
 /* -----------------------------------------------------------------------------
@@ -326,17 +338,13 @@ stat_endGC (gc_thread *gct,
         RtsFlags.ProfFlags.doHeapProfile)
         // heap profiling needs GC_tot_time
     {
-        Time cpu, elapsed, thread_gc_cpu, gc_cpu, gc_elapsed;
+        Time cpu, elapsed, gc_cpu, gc_elapsed;
        
         getProcessTimes(&cpu, &elapsed);
         gc_elapsed    = elapsed - gct->gc_start_elapsed;
 
-        thread_gc_cpu = getThreadCPUTime() - gct->gc_start_thread_cpu;
-
         gc_cpu = cpu - gct->gc_start_cpu;
 
-        taskDoneGC(gct->cap->running_task, thread_gc_cpu, gc_elapsed);
-
         if (RtsFlags.GcFlags.giveStats == VERBOSE_GC_STATS) {
            nat faults = getPageFaults();
            
@@ -629,22 +637,10 @@ stat_exit(int alloc)
             statsPrintf("\n");
 
 #if defined(THREADED_RTS)
-           {
-               nat i;
-               Task *task;
-                statsPrintf("                        MUT time (elapsed)       GC time  (elapsed)\n");
-               for (i = 0, task = all_tasks; 
-                    task != NULL; 
-                    i++, task = task->all_link) {
-                   statsPrintf("  Task %2d %-8s :  %6.2fs    (%6.2fs)     %6.2fs    (%6.2fs)\n",
-                               i,
-                               (task->worker) ? "(worker)" : "(bound)",
-                               TimeToSecondsDbl(task->mut_time),
-                               TimeToSecondsDbl(task->mut_etime),
-                               TimeToSecondsDbl(task->gc_time),
-                               TimeToSecondsDbl(task->gc_etime));
-               }
-           }
+            statsPrintf("  TASKS: %d (%d bound, %d peak workers (%d total), using -N%d)\n",
+                        taskCount, taskCount - workerCount,
+                        peakWorkerCount, workerCount,
+                        n_capabilities);
 
            statsPrintf("\n");
 
index 36dd0a9..f4a37bf 100644 (file)
 // Task lists and global counters.
 // Locks required: all_tasks_mutex.
 Task *all_tasks = NULL;
-static nat taskCount;
+
+nat taskCount;
+nat workerCount;
+nat currentWorkerCount;
+nat peakWorkerCount;
+
 static int tasksInitialized = 0;
 
 static void   freeTask  (Task *task);
@@ -64,8 +69,11 @@ void
 initTaskManager (void)
 {
     if (!tasksInitialized) {
-       taskCount = 0;
-       tasksInitialized = 1;
+        taskCount = 0;
+        workerCount = 0;
+        currentWorkerCount = 0;
+        peakWorkerCount = 0;
+        tasksInitialized = 1;
 #if defined(THREADED_RTS)
 #if !defined(MYTASK_USE_TLV)
        newThreadLocalKey(&currentTaskKey);
@@ -87,7 +95,7 @@ freeTaskManager (void)
     ACQUIRE_LOCK(&all_tasks_mutex);
 
     for (task = all_tasks; task != NULL; task = next) {
-        next = task->all_link;
+        next = task->all_next;
         if (task->stopped) {
             freeTask(task);
         } else {
@@ -164,9 +172,6 @@ freeTask (Task *task)
 static Task*
 newTask (rtsBool worker)
 {
-#if defined(THREADED_RTS)
-    Time currentElapsedTime, currentUserTime;
-#endif
     Task *task;
 
 #define ROUND_TO_CACHE_LINE(x) ((((x)+63) / 64) * 64)
@@ -186,26 +191,25 @@ newTask (rtsBool worker)
     task->wakeup = rtsFalse;
 #endif
 
-#if defined(THREADED_RTS)
-    currentUserTime = getThreadCPUTime();
-    currentElapsedTime = getProcessElapsedTime();
-    task->mut_time = 0;
-    task->mut_etime = 0;
-    task->gc_time = 0;
-    task->gc_etime = 0;
-    task->muttimestart = currentUserTime;
-    task->elapsedtimestart = currentElapsedTime;
-#endif
-
     task->next = NULL;
 
     ACQUIRE_LOCK(&all_tasks_mutex);
 
-    task->all_link = all_tasks;
+    task->all_prev = NULL;
+    task->all_next = all_tasks;
+    if (all_tasks != NULL) {
+        all_tasks->all_prev = task;
+    }
     all_tasks = task;
 
     taskCount++;
-
+    if (worker) {
+        workerCount++;
+        currentWorkerCount++;
+        if (currentWorkerCount > peakWorkerCount) {
+            peakWorkerCount = currentWorkerCount;
+        }
+    }
     RELEASE_LOCK(&all_tasks_mutex);
 
     return task;
@@ -314,14 +318,15 @@ discardTasksExcept (Task *keep)
     // Wipe the task list, except the current Task.
     ACQUIRE_LOCK(&all_tasks_mutex);
     for (task = all_tasks; task != NULL; task=next) {
-        next = task->all_link;
+        next = task->all_next;
         if (task != keep) {
             debugTrace(DEBUG_sched, "discarding task %ld", (long)TASK_ID(task));
             freeTask(task);
         }
     }
     all_tasks = keep;
-    keep->all_link = NULL;
+    keep->all_next = NULL;
+    keep->all_prev = NULL;
     RELEASE_LOCK(&all_tasks_mutex);
 }
 
@@ -337,7 +342,7 @@ void updateCapabilityRefs (void)
 
     ACQUIRE_LOCK(&all_tasks_mutex);
 
-    for (task = all_tasks; task != NULL; task=task->all_link) {
+    for (task = all_tasks; task != NULL; task=task->all_next) {
         if (task->cap != NULL) {
             task->cap = &capabilities[task->cap->no];
         }
@@ -353,34 +358,6 @@ void updateCapabilityRefs (void)
 }
 
 
-void
-taskTimeStamp (Task *task USED_IF_THREADS)
-{
-#if defined(THREADED_RTS)
-    Time currentElapsedTime, currentUserTime;
-
-    currentUserTime = getThreadCPUTime();
-    currentElapsedTime = getProcessElapsedTime();
-
-    task->mut_time =
-       currentUserTime - task->muttimestart - task->gc_time;
-    task->mut_etime = 
-        currentElapsedTime - task->elapsedtimestart - task->gc_etime;
-
-    if (task->gc_time   < 0) { task->gc_time   = 0; }
-    if (task->gc_etime  < 0) { task->gc_etime  = 0; }
-    if (task->mut_time  < 0) { task->mut_time  = 0; }
-    if (task->mut_etime < 0) { task->mut_etime = 0; }
-#endif
-}
-
-void
-taskDoneGC (Task *task, Time cpu_time, Time elapsed_time)
-{
-    task->gc_time  += cpu_time;
-    task->gc_etime += elapsed_time;
-}
-
 #if defined(THREADED_RTS)
 
 void
@@ -391,9 +368,22 @@ workerTaskStop (Task *task)
     ASSERT(task->id == id);
     ASSERT(myTask() == task);
 
-    task->cap = NULL;
-    taskTimeStamp(task);
-    task->stopped = rtsTrue;
+    ACQUIRE_LOCK(&all_tasks_mutex);
+
+    if (task->all_prev) {
+        task->all_prev->all_next = task->all_next;
+    } else {
+        all_tasks = task->all_next;
+    }
+    if (task->all_next) {
+        task->all_next->all_prev = task->all_prev;
+    }
+
+    currentWorkerCount--;
+
+    RELEASE_LOCK(&all_tasks_mutex);
+
+    freeTask(task);
 }
 
 #endif
@@ -491,7 +481,7 @@ void
 printAllTasks(void)
 {
     Task *task;
-    for (task = all_tasks; task != NULL; task = task->all_link) {
+    for (task = all_tasks; task != NULL; task = task->all_next) {
        debugBelch("task %p is %s, ", taskId(task), task->stopped ? "stopped" : "alive");
        if (!task->stopped) {
            if (task->cap) {
index 59a316b..ab47a07 100644 (file)
@@ -143,25 +143,13 @@ typedef struct Task_ {
     // So that we can detect when a finalizer illegally calls back into Haskell
     rtsBool running_finalizers;
 
-    // Stats that we collect about this task
-    // ToDo: we probably want to put this in a separate TaskStats
-    // structure, so we can share it between multiple Tasks.  We don't
-    // really want separate stats for each call in a nested chain of
-    // foreign->haskell->foreign->haskell calls, but we'll get a
-    // separate Task for each of the haskell calls.
-    Time       elapsedtimestart;
-    Time       muttimestart;
-    Time       mut_time;
-    Time       mut_etime;
-    Time       gc_time;
-    Time       gc_etime;
-
     // Links tasks on the returning_tasks queue of a Capability, and
     // on spare_workers.
     struct Task_ *next;
 
     // Links tasks on the all_tasks list
-    struct Task_ *all_link;
+    struct Task_ *all_next;
+    struct Task_ *all_prev;
 
 } Task;
 
@@ -201,15 +189,6 @@ void boundTaskExiting (Task *task);
 void workerTaskStop (Task *task);
 #endif
 
-// Record the time spent in this Task.
-// This is called by workerTaskStop() but not by boundTaskExiting(),
-// because it would impose an extra overhead on call-in.
-//
-void taskTimeStamp (Task *task);
-
-// The current Task has finished a GC, record the amount of time spent.
-void taskDoneGC (Task *task, Time cpu_time, Time elapsed_time);
-
 // Put the task back on the free list, mark it stopped.  Used by
 // forkProcess().
 //
@@ -240,6 +219,11 @@ void interruptWorkerTask (Task *task);
 //
 void updateCapabilityRefs (void);
 
+// For stats
+extern nat taskCount;
+extern nat workerCount;
+extern nat peakWorkerCount;
+
 // -----------------------------------------------------------------------------
 // INLINE functions... private from here on down:
 
index b1be93d..c294548 100644 (file)
@@ -198,7 +198,6 @@ forkOS_createThreadWrapper ( void * entry )
     Capability *cap;
     cap = rts_lock();
     rts_evalStableIO(&cap, (HsStablePtr) entry, NULL);
-    taskTimeStamp(myTask());
     rts_unlock(cap);
     return NULL;
 }
index 4f71026..987f78b 100644 (file)
@@ -979,7 +979,7 @@ compact(StgClosure *static_objects)
     {
        Task *task;
         InCall *incall;
-       for (task = all_tasks; task != NULL; task = task->all_link) {
+        for (task = all_tasks; task != NULL; task = task->all_next) {
             for (incall = task->incall; incall != NULL; 
                  incall = incall->prev_stack) {
                 if (incall->tso) {