Fix deadlock between STM and throwTo
authorSimon Marlow <marlowsd@gmail.com>
Thu, 12 Jul 2018 14:13:47 +0000 (10:13 -0400)
committerBen Gamari <ben@smart-cactus.org>
Sat, 14 Jul 2018 18:22:20 +0000 (14:22 -0400)
There was a lock-order reversal between lockTSO() and the TVar lock,
see #15136 for the details.

It turns out we can fix this pretty easily by just deleting all the
locking code(!).  The principle for unblocking a `BlockedOnSTM` thread
then becomes the same as for other kinds of blocking: if the TSO
belongs to this capability then we do it directly, otherwise we send a
message to the capability that owns the TSO. That is, a thread blocked
on STM is owned by its capability, as it should be.

The possible downside of this is that we might send multiple messages
to wake up a thread when the thread is on another capability. This is
safe, it's just not very efficient.  I'll try to do some experiments
to see if this is a problem.

Test Plan: Test case from #15136 doesn't deadlock any more.

Reviewers: bgamari, osa1, erikd

Reviewed By: osa1

Subscribers: rwbarton, thomie, carter

GHC Trac Issues: #15136

Differential Revision: https://phabricator.haskell.org/D4956

(cherry picked from commit 7fc418df856d9b58034eeec48915646e67a7a562)

rts/RaiseAsync.c
rts/SMPClosureOps.h
rts/STM.c
rts/Threads.c
rts/sm/Sanity.c

index f5e96a2..b08acc4 100644 (file)
@@ -416,21 +416,12 @@ check_target:
     }
 
     case BlockedOnSTM:
-        lockTSO(target);
-        // Unblocking BlockedOnSTM threads requires the TSO to be
-        // locked; see STM.c:unpark_tso().
-        if (target->why_blocked != BlockedOnSTM) {
-            unlockTSO(target);
-            goto retry;
-        }
         if ((target->flags & TSO_BLOCKEX) &&
             ((target->flags & TSO_INTERRUPTIBLE) == 0)) {
             blockedThrowTo(cap,target,msg);
-            unlockTSO(target);
             return THROWTO_BLOCKED;
         } else {
             raiseAsync(cap, target, msg->exception, false, NULL);
-            unlockTSO(target);
             return THROWTO_SUCCESS;
         }
 
index 1d18e1b..c73821a 100644 (file)
@@ -124,15 +124,6 @@ EXTERN_INLINE void unlockClosure(StgClosure *p, const StgInfoTable *info)
     p->header.info = info;
 }
 
-// Handy specialised versions of lockClosure()/unlockClosure()
-INLINE_HEADER void lockTSO(StgTSO *tso);
-INLINE_HEADER void lockTSO(StgTSO *tso)
-{ lockClosure((StgClosure *)tso); }
-
-INLINE_HEADER void unlockTSO(StgTSO *tso);
-INLINE_HEADER void unlockTSO(StgTSO *tso)
-{ unlockClosure((StgClosure*)tso, (const StgInfoTable *)&stg_TSO_info); }
-
 #endif /* CMINUSMINUS */
 
 #include "EndPrivate.h"
index 058eec7..abb4417 100644 (file)
--- a/rts/STM.c
+++ b/rts/STM.c
@@ -332,24 +332,7 @@ static void unpark_tso(Capability *cap, StgTSO *tso) {
     // queues: it's up to the thread itself to remove it from the wait queues
     // if it decides to do so when it is scheduled.
 
-    // Unblocking a TSO from BlockedOnSTM is done under the TSO lock,
-    // to avoid multiple CPUs unblocking the same TSO, and also to
-    // synchronise with throwTo(). The first time the TSO is unblocked
-    // we mark this fact by setting block_info.closure == STM_AWOKEN.
-    // This way we can avoid sending further wakeup messages in the
-    // future.
-    lockTSO(tso);
-    if (tso->why_blocked == BlockedOnSTM &&
-        tso->block_info.closure == &stg_STM_AWOKEN_closure) {
-      TRACE("unpark_tso already woken up tso=%p", tso);
-    } else if (tso -> why_blocked == BlockedOnSTM) {
-      TRACE("unpark_tso on tso=%p", tso);
-      tso->block_info.closure = &stg_STM_AWOKEN_closure;
-      tryWakeupThread(cap,tso);
-    } else {
-      TRACE("spurious unpark_tso on tso=%p", tso);
-    }
-    unlockTSO(tso);
+    tryWakeupThread(cap,tso);
 }
 
 static void unpark_waiters_on(Capability *cap, StgTVar *s) {
index be69622..78c5b6c 100644 (file)
@@ -297,8 +297,11 @@ tryWakeupThread (Capability *cap, StgTSO *tso)
         goto unblock;
     }
 
-    case BlockedOnBlackHole:
     case BlockedOnSTM:
+        tso->block_info.closure = &stg_STM_AWOKEN_closure;
+        goto unblock;
+
+    case BlockedOnBlackHole:
     case ThreadMigrating:
         goto unblock;
 
index e5a22fd..8d4171b 100644 (file)
@@ -547,7 +547,8 @@ checkTSO(StgTSO *tso)
     ASSERT(next == END_TSO_QUEUE ||
            info == &stg_MVAR_TSO_QUEUE_info ||
            info == &stg_TSO_info ||
-           info == &stg_WHITEHOLE_info); // happens due to STM doing lockTSO()
+           info == &stg_WHITEHOLE_info); // used to happen due to STM doing
+                                         // lockTSO(), might not happen now
 
     if (   tso->why_blocked == BlockedOnMVar
         || tso->why_blocked == BlockedOnMVarRead