Hadrian: bump Cabal submodule, install extra dynamic flavours of RTS
[ghc.git] / rts / RaiseAsync.c
index c8a3856..72f5dff 100644 (file)
 #include "STM.h"
 #include "sm/Sanity.h"
 #include "Profiling.h"
+#include "Messages.h"
 #if defined(mingw32_HOST_OS)
 #include "win32/IOManager.h"
 #endif
 
-static void raiseAsync (Capability *cap,
-                       StgTSO *tso,
-                       StgClosure *exception, 
-                       rtsBool stop_at_atomically,
-                       StgUpdateFrame *stop_here);
+static void blockedThrowTo (Capability *cap,
+                            StgTSO *target, MessageThrowTo *msg);
 
 static void removeFromQueues(Capability *cap, StgTSO *tso);
 
-static void blockedThrowTo (Capability *cap, StgTSO *source, StgTSO *target);
+static void removeFromMVarBlockedQueue (StgTSO *tso);
 
-static void performBlockedException (Capability *cap, 
-                                    StgTSO *source, StgTSO *target);
+static void throwToSendMsg (Capability *cap USED_IF_THREADS,
+                            Capability *target_cap USED_IF_THREADS,
+                            MessageThrowTo *msg USED_IF_THREADS);
 
 /* -----------------------------------------------------------------------------
    throwToSingleThreaded
 
    This version of throwTo is safe to use if and only if one of the
    following holds:
-   
+
      - !THREADED_RTS
 
      - all the other threads in the system are stopped (eg. during GC).
@@ -52,39 +51,92 @@ static void performBlockedException (Capability *cap,
    has been raised.
    -------------------------------------------------------------------------- */
 
-void
-throwToSingleThreaded(Capability *cap, StgTSO *tso, StgClosure *exception)
-{
-    throwToSingleThreaded_(cap, tso, exception, rtsFalse);
-}
-
-void
-throwToSingleThreaded_(Capability *cap, StgTSO *tso, StgClosure *exception, 
-                      rtsBool stop_at_atomically)
+static void
+throwToSingleThreaded__ (Capability *cap, StgTSO *tso, StgClosure *exception,
+                         bool stop_at_atomically, StgUpdateFrame *stop_here)
 {
     // Thread already dead?
     if (tso->what_next == ThreadComplete || tso->what_next == ThreadKilled) {
-       return;
+        return;
     }
 
     // Remove it from any blocking queues
     removeFromQueues(cap,tso);
 
-    raiseAsync(cap, tso, exception, stop_at_atomically, NULL);
+    raiseAsync(cap, tso, exception, stop_at_atomically, stop_here);
 }
 
 void
-suspendComputation(Capability *cap, StgTSO *tso, StgUpdateFrame *stop_here)
+throwToSingleThreaded (Capability *cap, StgTSO *tso, StgClosure *exception)
 {
-    // Thread already dead?
-    if (tso->what_next == ThreadComplete || tso->what_next == ThreadKilled) {
-       return;
-    }
+    throwToSingleThreaded__(cap, tso, exception, false, NULL);
+}
 
-    // Remove it from any blocking queues
-    removeFromQueues(cap,tso);
+void
+throwToSingleThreaded_ (Capability *cap, StgTSO *tso, StgClosure *exception,
+                        bool stop_at_atomically)
+{
+    throwToSingleThreaded__ (cap, tso, exception, stop_at_atomically, NULL);
+}
+
+void // cannot return a different TSO
+suspendComputation (Capability *cap, StgTSO *tso, StgUpdateFrame *stop_here)
+{
+    throwToSingleThreaded__ (cap, tso, NULL, false, stop_here);
+}
+
+/* -----------------------------------------------------------------------------
+   throwToSelf
+
+   Useful for throwing an async exception in a thread from the
+   runtime.  It handles unlocking the throwto message returned by
+   throwTo().
 
-    raiseAsync(cap, tso, NULL, rtsFalse, stop_here);
+   Note [Throw to self when masked]
+   
+   When a StackOverflow occurs when the thread is masked, we want to
+   defer the exception to when the thread becomes unmasked/hits an
+   interruptible point.  We already have a mechanism for doing this,
+   the blocked_exceptions list, but the use here is a bit unusual,
+   because an exception is normally only added to this list upon
+   an asynchronous 'throwTo' call (with all of the relevant
+   multithreaded nonsense). Morally, a stack overflow should be an
+   asynchronous exception sent by a thread to itself, and it should
+   have the same semantics.  But there are a few key differences:
+   
+   - If you actually tried to send an asynchronous exception to
+     yourself using throwTo, the exception would actually immediately
+     be delivered.  This is because throwTo itself is considered an
+     interruptible point, so the exception is always deliverable. Thus,
+     ordinarily, we never end up with a message to oneself in the
+     blocked_exceptions queue.
+   
+   - In the case of a StackOverflow, we don't actually care about the
+     wakeup semantics; when an exception is delivered, the thread that
+     originally threw the exception should be woken up, since throwTo
+     blocks until the exception is successfully thrown.  Fortunately,
+     it is harmless to wakeup a thread that doesn't actually need waking
+     up, e.g. ourselves.
+   
+   - No synchronization is necessary, because we own the TSO and the
+     capability.  You can observe this by tracing through the execution
+     of throwTo.  We skip synchronizing the message and inter-capability
+     communication.
+   
+   We think this doesn't break any invariants, but do be careful!
+   -------------------------------------------------------------------------- */
+
+void
+throwToSelf (Capability *cap, StgTSO *tso, StgClosure *exception)
+{
+    MessageThrowTo *m;
+
+    m = throwTo(cap, tso, tso, exception);
+
+    if (m != NULL) {
+        // throwTo leaves it locked
+        unlockClosure((StgClosure*)m, &stg_MSG_THROWTO_info);
+    }
 }
 
 /* -----------------------------------------------------------------------------
@@ -96,203 +148,214 @@ suspendComputation(Capability *cap, StgTSO *tso, StgUpdateFrame *stop_here)
    may be blocked and could be woken up at any point by another CPU.
    We have some delicate synchronisation to do.
 
-   There is a completely safe fallback scheme: it is always possible
-   to just block the source TSO on the target TSO's blocked_exceptions
-   queue.  This queue is locked using lockTSO()/unlockTSO().  It is
-   checked at regular intervals: before and after running a thread
-   (schedule() and threadPaused() respectively), and just before GC
-   (scheduleDoGC()).  Activating a thread on this queue should be done
-   using maybePerformBlockedException(): this is done in the context
-   of the target thread, so the exception can be raised eagerly.
-
-   This fallback scheme works even if the target thread is complete or
-   killed: scheduleDoGC() will discover the blocked thread before the
-   target is GC'd.
-
-   Blocking the source thread on the target thread's blocked_exception
-   queue is also employed when the target thread is currently blocking
-   exceptions (ie. inside Control.Exception.block).
-
-   We could use the safe fallback scheme exclusively, but that
-   wouldn't be ideal: most calls to throwTo would block immediately,
-   possibly until the next GC, which might require the deadlock
-   detection mechanism to kick in.  So we try to provide promptness
-   wherever possible.
-
-   We can promptly deliver the exception if the target thread is:
-
-     - runnable, on the same Capability as the source thread (because
-       we own the run queue and therefore the target thread).
-   
-     - blocked, and we can obtain exclusive access to it.  Obtaining
-       exclusive access to the thread depends on how it is blocked.
+   The underlying scheme when multiple Capabilities are in use is
+   message passing: when the target of a throwTo is on another
+   Capability, we send a message (a MessageThrowTo closure) to that
+   Capability.
+
+   If the throwTo needs to block because the target TSO is masking
+   exceptions (the TSO_BLOCKEX flag), then the message is placed on
+   the blocked_exceptions queue attached to the target TSO.  When the
+   target TSO enters the unmasked state again, it must check the
+   queue.  The blocked_exceptions queue is not locked; only the
+   Capability owning the TSO may modify it.
+
+   To make things simpler for throwTo, we always create the message
+   first before deciding what to do.  The message may get sent, or it
+   may get attached to a TSO's blocked_exceptions queue, or the
+   exception may get thrown immediately and the message dropped,
+   depending on the current state of the target.
+
+   Currently we send a message if the target belongs to another
+   Capability, and it is
+
+     - NotBlocked, BlockedOnMsgThrowTo,
+       BlockedOnCCall_Interruptible
+
+     - or it is masking exceptions (TSO_BLOCKEX)
 
-   We must also be careful to not trip over threadStackOverflow(),
-   which might be moving the TSO to enlarge its stack.
-   lockTSO()/unlockTSO() are used here too.
+   Currently, if the target is BlockedOnMVar, BlockedOnSTM, or
+   BlockedOnBlackHole then we acquire ownership of the TSO by locking
+   its parent container (e.g. the MVar) and then raise the exception.
+   We might change these cases to be more message-passing-like in the
+   future.
 
-   Returns: 
+   Returns:
 
-   THROWTO_SUCCESS    exception was raised, ok to continue
+   NULL               exception was raised, ok to continue
 
-   THROWTO_BLOCKED    exception was not raised; block the source
-                      thread then call throwToReleaseTarget() when
-                     the source thread is properly tidied away.
+   MessageThrowTo *   exception was not raised; the source TSO
+                      should now put itself in the state
+                      BlockedOnMsgThrowTo, and when it is ready
+                      it should unlock the mssage using
+                      unlockClosure(msg, &stg_MSG_THROWTO_info);
+                      If it decides not to raise the exception after
+                      all, it can revoke it safely with
+                      unlockClosure(msg, &stg_MSG_NULL_info);
 
    -------------------------------------------------------------------------- */
 
-nat
-throwTo (Capability *cap,      // the Capability we hold 
-        StgTSO *source,        // the TSO sending the exception
-        StgTSO *target,        // the TSO receiving the exception
-        StgClosure *exception, // the exception closure
-        /*[out]*/ void **out USED_IF_THREADS)
+MessageThrowTo *
+throwTo (Capability *cap,       // the Capability we hold
+         StgTSO *source,        // the TSO sending the exception (or NULL)
+         StgTSO *target,        // the TSO receiving the exception
+         StgClosure *exception) // the exception closure
 {
-    StgWord status;
+    MessageThrowTo *msg;
 
-    ASSERT(target != END_TSO_QUEUE);
+    msg = (MessageThrowTo *) allocate(cap, sizeofW(MessageThrowTo));
+    // the message starts locked; see below
+    SET_HDR(msg, &stg_WHITEHOLE_info, CCS_SYSTEM);
+    msg->source      = source;
+    msg->target      = target;
+    msg->exception   = exception;
 
-    // follow ThreadRelocated links in the target first
-    while (target->what_next == ThreadRelocated) {
-       target = target->_link;
-       // No, it might be a WHITEHOLE:
-       // ASSERT(get_itbl(target)->type == TSO);
+    switch (throwToMsg(cap, msg))
+    {
+    case THROWTO_SUCCESS:
+        // unlock the message now, otherwise we leave a WHITEHOLE in
+        // the heap (#6103)
+        SET_HDR(msg, &stg_MSG_THROWTO_info, CCS_SYSTEM);
+        return NULL;
+
+    case THROWTO_BLOCKED:
+    default:
+        // the caller will unlock the message when it is ready.  We
+        // cannot unlock it yet, because the calling thread will need
+        // to tidy up its state first.
+        return msg;
     }
+}
 
-    debugTrace(DEBUG_sched, "throwTo: from thread %lu to thread %lu",
-              (unsigned long)source->id, (unsigned long)target->id);
 
-#ifdef DEBUG
-    traceThreadStatus(DEBUG_sched, target);
-#endif
+uint32_t
+throwToMsg (Capability *cap, MessageThrowTo *msg)
+{
+    StgWord status;
+    StgTSO *target = msg->target;
+    Capability *target_cap;
 
     goto check_target;
+
 retry:
+    write_barrier();
     debugTrace(DEBUG_sched, "throwTo: retrying...");
 
 check_target:
     ASSERT(target != END_TSO_QUEUE);
 
     // Thread already dead?
-    if (target->what_next == ThreadComplete 
-       || target->what_next == ThreadKilled) {
-       return THROWTO_SUCCESS;
+    if (target->what_next == ThreadComplete
+        || target->what_next == ThreadKilled) {
+        return THROWTO_SUCCESS;
+    }
+
+    debugTraceCap(DEBUG_sched, cap,
+                  "throwTo: from thread %lu to thread %lu",
+                  (unsigned long)msg->source->id,
+                  (unsigned long)msg->target->id);
+
+#if defined(DEBUG)
+    traceThreadStatus(DEBUG_sched, target);
+#endif
+
+    target_cap = target->cap;
+    if (target->cap != cap) {
+        throwToSendMsg(cap, target_cap, msg);
+        return THROWTO_BLOCKED;
     }
 
     status = target->why_blocked;
-    
+
     switch (status) {
     case NotBlocked:
-       /* if status==NotBlocked, and target->cap == cap, then
-          we own this TSO and can raise the exception.
-          
-          How do we establish this condition?  Very carefully.
-
-          Let 
-              P = (status == NotBlocked)
-              Q = (tso->cap == cap)
-              
-          Now, if P & Q are true, then the TSO is locked and owned by
-          this capability.  No other OS thread can steal it.
-
-          If P==0 and Q==1: the TSO is blocked, but attached to this
-          capabilty, and it can be stolen by another capability.
-          
-          If P==1 and Q==0: the TSO is runnable on another
-          capability.  At any time, the TSO may change from runnable
-          to blocked and vice versa, while it remains owned by
-          another capability.
-
-          Suppose we test like this:
-
-             p = P
-             q = Q
-             if (p && q) ...
-
-           this is defeated by another capability stealing a blocked
-           TSO from us to wake it up (Schedule.c:unblockOne()).  The
-           other thread is doing
-
-             Q = 0
-             P = 1
-
-           assuming arbitrary reordering, we could see this
-           interleaving:
-
-             start: P==0 && Q==1 
-             P = 1
-             p = P
-             q = Q
-             Q = 0
-             if (p && q) ...
-              
-           so we need a memory barrier:
-
-             p = P
-             mb()
-             q = Q
-             if (p && q) ...
-
-           this avoids the problematic case.  There are other cases
-           to consider, but this is the tricky one.
-
-           Note that we must be sure that unblockOne() does the
-           writes in the correct order: Q before P.  The memory
-           barrier ensures that if we have seen the write to P, we
-           have also seen the write to Q.
-       */
     {
-       Capability *target_cap;
-
-       write_barrier();
-       target_cap = target->cap;
-       if (target_cap == cap && (target->flags & TSO_BLOCKEX) == 0) {
-           // It's on our run queue and not blocking exceptions
-           raiseAsync(cap, target, exception, rtsFalse, NULL);
-           return THROWTO_SUCCESS;
-       } else {
-           // Otherwise, just block on the blocked_exceptions queue
-           // of the target thread.  The queue will get looked at
-           // soon enough: it is checked before and after running a
-           // thread, and during GC.
-           lockTSO(target);
-
-           // Avoid race with threadStackOverflow, which may have
-           // just moved this TSO.
-           if (target->what_next == ThreadRelocated) {
-               unlockTSO(target);
-               target = target->_link;
-               goto retry;
-           }
-            // check again for ThreadComplete and ThreadKilled.  This
-            // cooperates with scheduleHandleThreadFinished to ensure
-            // that we never miss any threads that are throwing an
-            // exception to a thread in the process of terminating.
-            if (target->what_next == ThreadComplete
-                || target->what_next == ThreadKilled) {
-               unlockTSO(target);
-                return THROWTO_SUCCESS;
+        if ((target->flags & TSO_BLOCKEX) == 0) {
+            // It's on our run queue and not blocking exceptions
+            raiseAsync(cap, target, msg->exception, false, NULL);
+            return THROWTO_SUCCESS;
+        } else {
+            blockedThrowTo(cap,target,msg);
+            return THROWTO_BLOCKED;
+        }
+    }
+
+    case BlockedOnMsgThrowTo:
+    {
+        const StgInfoTable *i;
+        MessageThrowTo *m;
+
+        m = target->block_info.throwto;
+
+        // target is local to this cap, but has sent a throwto
+        // message to another cap.
+        //
+        // The source message is locked.  We need to revoke the
+        // target's message so that we can raise the exception, so
+        // we attempt to lock it.
+
+        // There's a possibility of a deadlock if two threads are both
+        // trying to throwTo each other (or more generally, a cycle of
+        // threads).  To break the symmetry we compare the addresses
+        // of the MessageThrowTo objects, and the one for which m <
+        // msg gets to spin, while the other can only try to lock
+        // once, but must then back off and unlock both before trying
+        // again.
+        if (m < msg) {
+            i = lockClosure((StgClosure *)m);
+        } else {
+            i = tryLockClosure((StgClosure *)m);
+            if (i == NULL) {
+//            debugBelch("collision\n");
+                throwToSendMsg(cap, target->cap, msg);
+                return THROWTO_BLOCKED;
             }
-           blockedThrowTo(cap,source,target);
-           *out = target;
-           return THROWTO_BLOCKED;
-       }
+        }
+
+        if (i == &stg_MSG_NULL_info) {
+            // we know there's a MSG_TRY_WAKEUP on the way, so we
+            // might as well just do it now.  The message will
+            // be a no-op when it arrives.
+            unlockClosure((StgClosure*)m, i);
+            tryWakeupThread(cap, target);
+            goto retry;
+        }
+
+        if (i != &stg_MSG_THROWTO_info) {
+            // if it's a MSG_NULL, this TSO has been woken up by another Cap
+            unlockClosure((StgClosure*)m, i);
+            goto retry;
+        }
+
+        if ((target->flags & TSO_BLOCKEX) &&
+            ((target->flags & TSO_INTERRUPTIBLE) == 0)) {
+            unlockClosure((StgClosure*)m, i);
+            blockedThrowTo(cap,target,msg);
+            return THROWTO_BLOCKED;
+        }
+
+        // nobody else can wake up this TSO after we claim the message
+        doneWithMsgThrowTo(m);
+
+        raiseAsync(cap, target, msg->exception, false, NULL);
+        return THROWTO_SUCCESS;
     }
 
     case BlockedOnMVar:
+    case BlockedOnMVarRead:
     {
-       /*
-         To establish ownership of this TSO, we need to acquire a
-         lock on the MVar that it is blocked on.
-       */
-       StgMVar *mvar;
-       StgInfoTable *info USED_IF_THREADS;
-       
-       mvar = (StgMVar *)target->block_info.closure;
-
-       // ASSUMPTION: tso->block_info must always point to a
-       // closure.  In the threaded RTS it does.
-        switch (get_itbl(mvar)->type) {
+        /*
+          To establish ownership of this TSO, we need to acquire a
+          lock on the MVar that it is blocked on.
+        */
+        StgMVar *mvar;
+        StgInfoTable *info USED_IF_THREADS;
+
+        mvar = (StgMVar *)target->block_info.closure;
+
+        // ASSUMPTION: tso->block_info must always point to a
+        // closure.  In the threaded RTS it does.
+        switch (get_itbl((StgClosure *)mvar)->type) {
         case MVAR_CLEAN:
         case MVAR_DIRTY:
             break;
@@ -300,210 +363,162 @@ check_target:
             goto retry;
         }
 
-       info = lockClosure((StgClosure *)mvar);
-
-       if (target->what_next == ThreadRelocated) {
-           target = target->_link;
-           unlockClosure((StgClosure *)mvar,info);
-           goto retry;
-       }
-       // we have the MVar, let's check whether the thread
-       // is still blocked on the same MVar.
-       if (target->why_blocked != BlockedOnMVar
-           || (StgMVar *)target->block_info.closure != mvar) {
-           unlockClosure((StgClosure *)mvar, info);
-           goto retry;
-       }
-
-       if ((target->flags & TSO_BLOCKEX) &&
-           ((target->flags & TSO_INTERRUPTIBLE) == 0)) {
-           lockClosure((StgClosure *)target);
-           blockedThrowTo(cap,source,target);
-           unlockClosure((StgClosure *)mvar, info);
-           *out = target;
-           return THROWTO_BLOCKED; // caller releases TSO
-       } else {
-           removeThreadFromMVarQueue(cap, mvar, target);
-           raiseAsync(cap, target, exception, rtsFalse, NULL);
-           unblockOne(cap, target);
-           unlockClosure((StgClosure *)mvar, info);
-           return THROWTO_SUCCESS;
-       }
+        info = lockClosure((StgClosure *)mvar);
+
+        // we have the MVar, let's check whether the thread
+        // is still blocked on the same MVar.
+        if ((target->why_blocked != BlockedOnMVar && target->why_blocked != BlockedOnMVarRead)
+            || (StgMVar *)target->block_info.closure != mvar) {
+            unlockClosure((StgClosure *)mvar, info);
+            goto retry;
+        }
+
+        if (target->_link == END_TSO_QUEUE) {
+            // the MVar operation has already completed.  There is a
+            // MSG_TRY_WAKEUP on the way, but we can just wake up the
+            // thread now anyway and ignore the message when it
+            // arrives.
+            unlockClosure((StgClosure *)mvar, info);
+            tryWakeupThread(cap, target);
+            goto retry;
+        }
+
+        if ((target->flags & TSO_BLOCKEX) &&
+            ((target->flags & TSO_INTERRUPTIBLE) == 0)) {
+            blockedThrowTo(cap,target,msg);
+            unlockClosure((StgClosure *)mvar, info);
+            return THROWTO_BLOCKED;
+        } else {
+            // revoke the MVar operation
+            removeFromMVarBlockedQueue(target);
+            raiseAsync(cap, target, msg->exception, false, NULL);
+            unlockClosure((StgClosure *)mvar, info);
+            return THROWTO_SUCCESS;
+        }
     }
 
     case BlockedOnBlackHole:
     {
-       ACQUIRE_LOCK(&sched_mutex);
-       // double checking the status after the memory barrier:
-       if (target->why_blocked != BlockedOnBlackHole) {
-           RELEASE_LOCK(&sched_mutex);
-           goto retry;
-       }
-
-       if (target->flags & TSO_BLOCKEX) {
-           lockTSO(target);
-           blockedThrowTo(cap,source,target);
-           RELEASE_LOCK(&sched_mutex);
-           *out = target;
-           return THROWTO_BLOCKED; // caller releases TSO
-       } else {
-           removeThreadFromQueue(cap, &blackhole_queue, target);
-           raiseAsync(cap, target, exception, rtsFalse, NULL);
-           unblockOne(cap, target);
-           RELEASE_LOCK(&sched_mutex);
-           return THROWTO_SUCCESS;
-       }
+        if (target->flags & TSO_BLOCKEX) {
+            // BlockedOnBlackHole is not interruptible.
+            blockedThrowTo(cap,target,msg);
+            return THROWTO_BLOCKED;
+        } else {
+            // Revoke the message by replacing it with IND. We're not
+            // locking anything here, so we might still get a TRY_WAKEUP
+            // message from the owner of the blackhole some time in the
+            // future, but that doesn't matter.
+            ASSERT(target->block_info.bh->header.info == &stg_MSG_BLACKHOLE_info);
+            OVERWRITE_INFO(target->block_info.bh, &stg_IND_info);
+            raiseAsync(cap, target, msg->exception, false, NULL);
+            return THROWTO_SUCCESS;
+        }
     }
 
-    case BlockedOnException:
-    {
-       StgTSO *target2;
-       StgInfoTable *info;
-
-       /*
-         To obtain exclusive access to a BlockedOnException thread,
-         we must call lockClosure() on the TSO on which it is blocked.
-         Since the TSO might change underneath our feet, after we
-         call lockClosure() we must check that 
-          
-             (a) the closure we locked is actually a TSO
-            (b) the original thread is still  BlockedOnException,
-            (c) the original thread is still blocked on the TSO we locked
-            and (d) the target thread has not been relocated.
-
-         We synchronise with threadStackOverflow() (which relocates
-         threads) using lockClosure()/unlockClosure().
-       */
-       target2 = target->block_info.tso;
-
-       info = lockClosure((StgClosure *)target2);
-       if (info != &stg_TSO_info) {
-           unlockClosure((StgClosure *)target2, info);
-           goto retry;
-       }
-       if (target->what_next == ThreadRelocated) {
-           target = target->_link;
-           unlockTSO(target2);
-           goto retry;
-       }
-       if (target2->what_next == ThreadRelocated) {
-           target->block_info.tso = target2->_link;
-           unlockTSO(target2);
-           goto retry;
-       }
-       if (target->why_blocked != BlockedOnException
-           || target->block_info.tso != target2) {
-           unlockTSO(target2);
-           goto retry;
-       }
-       
-       /* 
-          Now we have exclusive rights to the target TSO...
-
-          If it is blocking exceptions, add the source TSO to its
-          blocked_exceptions queue.  Otherwise, raise the exception.
-       */
-       if ((target->flags & TSO_BLOCKEX) &&
-           ((target->flags & TSO_INTERRUPTIBLE) == 0)) {
-           lockTSO(target);
-           blockedThrowTo(cap,source,target);
-           unlockTSO(target2);
-           *out = target;
-           return THROWTO_BLOCKED;
-       } else {
-           removeThreadFromQueue(cap, &target2->blocked_exceptions, target);
-           raiseAsync(cap, target, exception, rtsFalse, NULL);
-           unblockOne(cap, target);
-           unlockTSO(target2);
-           return THROWTO_SUCCESS;
-       }
-    }  
-
     case BlockedOnSTM:
-       lockTSO(target);
-       // Unblocking BlockedOnSTM threads requires the TSO to be
-       // locked; see STM.c:unpark_tso().
-       if (target->why_blocked != BlockedOnSTM) {
-           unlockTSO(target);
-           goto retry;
-       }
-       if ((target->flags & TSO_BLOCKEX) &&
-           ((target->flags & TSO_INTERRUPTIBLE) == 0)) {
-           blockedThrowTo(cap,source,target);
-           *out = target;
-           return THROWTO_BLOCKED;
-       } else {
-           raiseAsync(cap, target, exception, rtsFalse, NULL);
-           unblockOne(cap, target);
-           unlockTSO(target);
-           return THROWTO_SUCCESS;
-       }
+        if ((target->flags & TSO_BLOCKEX) &&
+            ((target->flags & TSO_INTERRUPTIBLE) == 0)) {
+            blockedThrowTo(cap,target,msg);
+            return THROWTO_BLOCKED;
+        } else {
+            raiseAsync(cap, target, msg->exception, false, NULL);
+            return THROWTO_SUCCESS;
+        }
 
+    case BlockedOnCCall_Interruptible:
+#if defined(THREADED_RTS)
+    {
+        Task *task = NULL;
+        // walk suspended_ccalls to find the correct worker thread
+        InCall *incall;
+        for (incall = cap->suspended_ccalls; incall != NULL; incall = incall->next) {
+            if (incall->suspended_tso == target) {
+                task = incall->task;
+                break;
+            }
+        }
+        if (task != NULL) {
+            blockedThrowTo(cap, target, msg);
+            if (!((target->flags & TSO_BLOCKEX) &&
+                  ((target->flags & TSO_INTERRUPTIBLE) == 0))) {
+                interruptWorkerTask(task);
+            }
+            return THROWTO_BLOCKED;
+        } else {
+            debugTraceCap(DEBUG_sched, cap, "throwTo: could not find worker thread to kill");
+        }
+        // fall to next
+    }
+    FALLTHROUGH;
+#endif
     case BlockedOnCCall:
-    case BlockedOnCCall_NoUnblockExc:
-       // I don't think it's possible to acquire ownership of a
-       // BlockedOnCCall thread.  We just assume that the target
-       // thread is blocking exceptions, and block on its
-       // blocked_exception queue.
-       lockTSO(target);
-       if (target->why_blocked != BlockedOnCCall &&
-           target->why_blocked != BlockedOnCCall_NoUnblockExc) {
-           unlockTSO(target);
-            goto retry;
-       }
-       blockedThrowTo(cap,source,target);
-       *out = target;
-       return THROWTO_BLOCKED;
+        blockedThrowTo(cap,target,msg);
+        return THROWTO_BLOCKED;
 
-#ifndef THREADEDED_RTS
+#if !defined(THREADEDED_RTS)
     case BlockedOnRead:
     case BlockedOnWrite:
     case BlockedOnDelay:
 #if defined(mingw32_HOST_OS)
     case BlockedOnDoProc:
 #endif
-       if ((target->flags & TSO_BLOCKEX) &&
-           ((target->flags & TSO_INTERRUPTIBLE) == 0)) {
-           blockedThrowTo(cap,source,target);
-           return THROWTO_BLOCKED;
-       } else {
-           removeFromQueues(cap,target);
-           raiseAsync(cap, target, exception, rtsFalse, NULL);
-           return THROWTO_SUCCESS;
-       }
+        if ((target->flags & TSO_BLOCKEX) &&
+            ((target->flags & TSO_INTERRUPTIBLE) == 0)) {
+            blockedThrowTo(cap,target,msg);
+            return THROWTO_BLOCKED;
+        } else {
+            removeFromQueues(cap,target);
+            raiseAsync(cap, target, msg->exception, false, NULL);
+            return THROWTO_SUCCESS;
+        }
 #endif
 
+    case ThreadMigrating:
+        // if it is ThreadMigrating and tso->cap is ours, then it
+        // *must* be migrating *to* this capability.  If it were
+        // migrating away from the capability, then tso->cap would
+        // point to the destination.
+        //
+        // There is a MSG_WAKEUP in the message queue for this thread,
+        // but we can just do it preemptively:
+        tryWakeupThread(cap, target);
+        // and now retry, the thread should be runnable.
+        goto retry;
+
     default:
-       barf("throwTo: unrecognised why_blocked value");
+        barf("throwTo: unrecognised why_blocked (%d)", target->why_blocked);
     }
     barf("throwTo");
 }
 
-// Block a TSO on another TSO's blocked_exceptions queue.
-// Precondition: we hold an exclusive lock on the target TSO (this is
-// complex to achieve as there's no single lock on a TSO; see
-// throwTo()).
 static void
-blockedThrowTo (Capability *cap, StgTSO *source, StgTSO *target)
+throwToSendMsg (Capability *cap STG_UNUSED,
+                Capability *target_cap USED_IF_THREADS,
+                MessageThrowTo *msg USED_IF_THREADS)
+
 {
-    debugTrace(DEBUG_sched, "throwTo: blocking on thread %lu", (unsigned long)target->id);
-    setTSOLink(cap, source, target->blocked_exceptions);
-    target->blocked_exceptions = source;
-    dirty_TSO(cap,target); // we modified the blocked_exceptions queue
-    
-    source->block_info.tso = target;
-    write_barrier(); // throwTo_exception *must* be visible if BlockedOnException is.
-    source->why_blocked = BlockedOnException;
-}
+#if defined(THREADED_RTS)
+    debugTraceCap(DEBUG_sched, cap, "throwTo: sending a throwto message to cap %lu", (unsigned long)target_cap->no);
 
+    sendMessage(cap, target_cap, (Message*)msg);
+#endif
+}
 
-#ifdef THREADED_RTS
+// Block a throwTo message on the target TSO's blocked_exceptions
+// queue.  The current Capability must own the target TSO in order to
+// modify the blocked_exceptions queue.
 void
-throwToReleaseTarget (void *tso)
+blockedThrowTo (Capability *cap, StgTSO *target, MessageThrowTo *msg)
 {
-    unlockTSO((StgTSO *)tso);
+    debugTraceCap(DEBUG_sched, cap, "throwTo: blocking on thread %lu",
+                  (unsigned long)target->id);
+
+    ASSERT(target->cap == cap);
+
+    msg->link = target->blocked_exceptions;
+    target->blocked_exceptions = msg;
+    dirty_TSO(cap,target); // we modified the blocked_exceptions queue
 }
-#endif
 
 /* -----------------------------------------------------------------------------
    Waking up threads blocked in throwTo
@@ -525,10 +540,12 @@ throwToReleaseTarget (void *tso)
 int
 maybePerformBlockedException (Capability *cap, StgTSO *tso)
 {
+    MessageThrowTo *msg;
+    const StgInfoTable *i;
     StgTSO *source;
-    
+
     if (tso->what_next == ThreadComplete || tso->what_next == ThreadFinished) {
-        if (tso->blocked_exceptions != END_TSO_QUEUE) {
+        if (tso->blocked_exceptions != END_BLOCKED_EXCEPTIONS_QUEUE) {
             awakenBlockedExceptionQueue(cap,tso);
             return 1;
         } else {
@@ -536,63 +553,58 @@ maybePerformBlockedException (Capability *cap, StgTSO *tso)
         }
     }
 
-    if (tso->blocked_exceptions != END_TSO_QUEUE && 
+    if (tso->blocked_exceptions != END_BLOCKED_EXCEPTIONS_QUEUE &&
         (tso->flags & TSO_BLOCKEX) != 0) {
-        debugTrace(DEBUG_sched, "throwTo: thread %lu has blocked exceptions but is inside block", (unsigned long)tso->id);
+        debugTraceCap(DEBUG_sched, cap, "throwTo: thread %lu has blocked exceptions but is inside block", (unsigned long)tso->id);
     }
 
-    if (tso->blocked_exceptions != END_TSO_QUEUE
-       && ((tso->flags & TSO_BLOCKEX) == 0
-           || ((tso->flags & TSO_INTERRUPTIBLE) && interruptible(tso)))) {
-
-       // Lock the TSO, this gives us exclusive access to the queue
-       lockTSO(tso);
-
-       // Check the queue again; it might have changed before we
-       // locked it.
-       if (tso->blocked_exceptions == END_TSO_QUEUE) {
-           unlockTSO(tso);
-           return 0;
-       }
-
-       // We unblock just the first thread on the queue, and perform
-       // its throw immediately.
-       source = tso->blocked_exceptions;
-       performBlockedException(cap, source, tso);
-       tso->blocked_exceptions = unblockOne_(cap, source, 
-                                             rtsFalse/*no migrate*/);
-       unlockTSO(tso);
+    if (tso->blocked_exceptions != END_BLOCKED_EXCEPTIONS_QUEUE
+        && ((tso->flags & TSO_BLOCKEX) == 0
+            || ((tso->flags & TSO_INTERRUPTIBLE) && interruptible(tso)))) {
+
+        // We unblock just the first thread on the queue, and perform
+        // its throw immediately.
+    loop:
+        msg = tso->blocked_exceptions;
+        if (msg == END_BLOCKED_EXCEPTIONS_QUEUE) return 0;
+        i = lockClosure((StgClosure*)msg);
+        tso->blocked_exceptions = (MessageThrowTo*)msg->link;
+        if (i == &stg_MSG_NULL_info) {
+            unlockClosure((StgClosure*)msg,i);
+            goto loop;
+        }
+
+        throwToSingleThreaded(cap, msg->target, msg->exception);
+        source = msg->source;
+        doneWithMsgThrowTo(msg);
+        tryWakeupThread(cap, source);
         return 1;
     }
     return 0;
 }
 
 // awakenBlockedExceptionQueue(): Just wake up the whole queue of
-// blocked exceptions and let them try again.
+// blocked exceptions.
 
 void
 awakenBlockedExceptionQueue (Capability *cap, StgTSO *tso)
 {
-    lockTSO(tso);
-    awakenBlockedQueue(cap, tso->blocked_exceptions);
-    tso->blocked_exceptions = END_TSO_QUEUE;
-    unlockTSO(tso);
-}    
-
-static void
-performBlockedException (Capability *cap, StgTSO *source, StgTSO *target)
-{
-    StgClosure *exception;
-
-    ASSERT(source->why_blocked == BlockedOnException);
-    ASSERT(source->block_info.tso->id == target->id);
-    ASSERT(source->sp[0] == (StgWord)&stg_block_throwto_info);
-    ASSERT(((StgTSO *)source->sp[1])->id == target->id);
-    // check ids not pointers, because the thread might be relocated
+    MessageThrowTo *msg;
+    const StgInfoTable *i;
+    StgTSO *source;
 
-    exception = (StgClosure *)source->sp[2];
-    throwToSingleThreaded(cap, target, exception);
-    source->sp += 3;
+    for (msg = tso->blocked_exceptions; msg != END_BLOCKED_EXCEPTIONS_QUEUE;
+         msg = (MessageThrowTo*)msg->link) {
+        i = lockClosure((StgClosure *)msg);
+        if (i != &stg_MSG_NULL_info) {
+            source = msg->source;
+            doneWithMsgThrowTo(msg);
+            tryWakeupThread(cap, source);
+        } else {
+            unlockClosure((StgClosure *)msg,i);
+        }
+    }
+    tso->blocked_exceptions = END_BLOCKED_EXCEPTIONS_QUEUE;
 }
 
 /* -----------------------------------------------------------------------------
@@ -606,11 +618,54 @@ performBlockedException (Capability *cap, StgTSO *source, StgTSO *target)
    -------------------------------------------------------------------------- */
 
 static void
+removeFromMVarBlockedQueue (StgTSO *tso)
+{
+    StgMVar *mvar = (StgMVar*)tso->block_info.closure;
+    StgMVarTSOQueue *q = (StgMVarTSOQueue*)tso->_link;
+
+    if (q == (StgMVarTSOQueue*)END_TSO_QUEUE) {
+        // already removed from this MVar
+        return;
+    }
+
+    // Assume the MVar is locked. (not assertable; sometimes it isn't
+    // actually WHITEHOLE'd).
+
+    // We want to remove the MVAR_TSO_QUEUE object from the queue.  It
+    // isn't doubly-linked so we can't actually remove it; instead we
+    // just overwrite it with an IND if possible and let the GC short
+    // it out.  However, we have to be careful to maintain the deque
+    // structure:
+
+    if (mvar->head == q) {
+        mvar->head = q->link;
+        OVERWRITE_INFO(q, &stg_IND_info);
+        if (mvar->tail == q) {
+            mvar->tail = (StgMVarTSOQueue*)END_TSO_QUEUE;
+        }
+    }
+    else if (mvar->tail == q) {
+        // we can't replace it with an IND in this case, because then
+        // we lose the tail pointer when the GC shorts out the IND.
+        // So we use MSG_NULL as a kind of non-dupable indirection;
+        // these are ignored by takeMVar/putMVar.
+        OVERWRITE_INFO(q, &stg_MSG_NULL_info);
+    }
+    else {
+        OVERWRITE_INFO(q, &stg_IND_info);
+    }
+
+    // revoke the MVar operation
+    tso->_link = END_TSO_QUEUE;
+}
+
+static void
 removeFromQueues(Capability *cap, StgTSO *tso)
 {
   switch (tso->why_blocked) {
 
   case NotBlocked:
+  case ThreadMigrating:
       return;
 
   case BlockedOnSTM:
@@ -623,29 +678,26 @@ removeFromQueues(Capability *cap, StgTSO *tso)
     goto done;
 
   case BlockedOnMVar:
-      removeThreadFromMVarQueue(cap, (StgMVar *)tso->block_info.closure, tso);
+  case BlockedOnMVarRead:
+      removeFromMVarBlockedQueue(tso);
       goto done;
 
   case BlockedOnBlackHole:
-      removeThreadFromQueue(cap, &blackhole_queue, tso);
+      // nothing to do
       goto done;
 
-  case BlockedOnException:
-    {
-      StgTSO *target  = tso->block_info.tso;
-
-      // NO: when called by threadPaused(), we probably have this
-      // TSO already locked (WHITEHOLEd) because we just placed
-      // ourselves on its queue.
-      // ASSERT(get_itbl(target)->type == TSO);
-
-      while (target->what_next == ThreadRelocated) {
-         target = target->_link;
-      }
-      
-      removeThreadFromQueue(cap, &target->blocked_exceptions, tso);
-      goto done;
-    }
+  case BlockedOnMsgThrowTo:
+  {
+      MessageThrowTo *m = tso->block_info.throwto;
+      // The message is locked by us, unless we got here via
+      // deleteAllThreads(), in which case we own all the
+      // capabilities.
+      // ASSERT(m->header.info == &stg_WHITEHOLE_info);
+
+      // unlock and revoke it at the same time
+      doneWithMsgThrowTo(m);
+      break;
+  }
 
 #if !defined(THREADED_RTS)
   case BlockedOnRead:
@@ -664,7 +716,7 @@ removeFromQueues(Capability *cap, StgTSO *tso)
 
   case BlockedOnDelay:
         removeThreadFromQueue(cap, &sleeping_queue, tso);
-       goto done;
+        goto done;
 #endif
 
   default:
@@ -672,7 +724,8 @@ removeFromQueues(Capability *cap, StgTSO *tso)
   }
 
  done:
-  unblockOne(cap, tso);
+  tso->why_blocked = NotBlocked;
+  appendToRunQueue(cap, tso);
 }
 
 /* -----------------------------------------------------------------------------
@@ -682,14 +735,15 @@ removeFromQueues(Capability *cap, StgTSO *tso)
  * asynchronous exception in an existing thread.
  *
  * We first remove the thread from any queue on which it might be
- * blocked.  The possible blockages are MVARs and BLACKHOLE_BQs.
+ * blocked.  The possible blockages are MVARs, BLOCKING_QUEUESs, and
+ * TSO blocked_exception queues.
  *
  * We strip the stack down to the innermost CATCH_FRAME, building
- * thunks in the heap for all the active computations, so they can 
+ * thunks in the heap for all the active computations, so they can
  * be restarted if necessary.  When we reach a CATCH_FRAME, we build
  * an application of the handler to the exception, and push it on
  * the top of the stack.
- * 
+ *
  * How exactly do we save all the active computations?  We create an
  * AP_STACK for every UpdateFrame on the stack.  Entering one of these
  * AP_STACKs pushes everything from the corresponding update frame
@@ -700,7 +754,7 @@ removeFromQueues(Capability *cap, StgTSO *tso)
  * exactly as it did when we killed the TSO and we can continue
  * execution by entering the closure on top of the stack.
  *
- * We can also kill a thread entirely - this happens if either (a) the 
+ * We can also kill a thread entirely - this happens if either (a) the
  * exception passed to raiseAsync is NULL, or (b) there's no
  * CATCH_FRAME on the stack.  In either case, we strip the entire
  * stack and replace the thread with a zombie.
@@ -712,38 +766,45 @@ removeFromQueues(Capability *cap, StgTSO *tso)
  *
  * -------------------------------------------------------------------------- */
 
-static void
-raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception, 
-          rtsBool stop_at_atomically, StgUpdateFrame *stop_here)
+StgTSO *
+raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
+           bool stop_at_atomically, StgUpdateFrame *stop_here)
 {
-    StgRetInfoTable *info;
+    const StgRetInfoTable *info;
     StgPtr sp, frame;
     StgClosure *updatee;
-    nat i;
+    uint32_t i;
+    StgStack *stack;
+
+    debugTraceCap(DEBUG_sched, cap,
+                  "raising exception in thread %ld.", (long)tso->id);
 
-    debugTrace(DEBUG_sched,
-              "raising exception in thread %ld.", (long)tso->id);
-    
 #if defined(PROFILING)
-    /* 
+    /*
      * Debugging tool: on raising an  exception, show where we are.
      * See also Exception.cmm:stg_raisezh.
-     * This wasn't done for asynchronous exceptions originally; see #1450 
+     * This wasn't done for asynchronous exceptions originally; see #1450
      */
-    if (RtsFlags.ProfFlags.showCCSOnException)
+    if (RtsFlags.ProfFlags.showCCSOnException && exception != NULL)
     {
-        fprintCCS_stderr(tso->prof.CCCS);
+        fprintCCS_stderr(tso->prof.cccs,exception,tso);
     }
 #endif
+    // ASSUMES: the thread is not already complete or dead
+    // Upper layers should deal with that.
+    ASSERT(tso->what_next != ThreadComplete &&
+           tso->what_next != ThreadKilled);
+
+    // only if we own this TSO (except that deleteThread() calls this
+    ASSERT(tso->cap == cap);
+
+    stack = tso->stackobj;
 
     // mark it dirty; we're about to change its stack.
     dirty_TSO(cap, tso);
+    dirty_STACK(cap, stack);
 
-    sp = tso->sp;
-    
-    // ASSUMES: the thread is not already complete or dead.  Upper
-    // layers should deal with that.
-    ASSERT(tso->what_next != ThreadComplete && tso->what_next != ThreadKilled);
+    sp = stack->sp;
 
     if (stop_here != NULL) {
         updatee = stop_here->updatee;
@@ -755,66 +816,70 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
     // the top of the stack, so we have to arrange that this is the case...
     //
     if (sp[0] == (W_)&stg_enter_info) {
-       sp++;
+        sp++;
     } else {
-       sp--;
-       sp[0] = (W_)&stg_dummy_ret_closure;
+        sp--;
+        sp[0] = (W_)&stg_dummy_ret_closure;
     }
 
     frame = sp + 1;
     while (stop_here == NULL || frame < (StgPtr)stop_here) {
 
-       // 1. Let the top of the stack be the "current closure"
-       //
-       // 2. Walk up the stack until we find either an UPDATE_FRAME or a
-       // CATCH_FRAME.
-       //
-       // 3. If it's an UPDATE_FRAME, then make an AP_STACK containing the
-       // current closure applied to the chunk of stack up to (but not
-       // including) the update frame.  This closure becomes the "current
-       // closure".  Go back to step 2.
-       //
-       // 4. If it's a CATCH_FRAME, then leave the exception handler on
-       // top of the stack applied to the exception.
-       // 
-       // 5. If it's a STOP_FRAME, then kill the thread.
-        // 
-        // NB: if we pass an ATOMICALLY_FRAME then abort the associated 
+        // 1. Let the top of the stack be the "current closure"
+        //
+        // 2. Walk up the stack until we find either an UPDATE_FRAME or a
+        // CATCH_FRAME.
+        //
+        // 3. If it's an UPDATE_FRAME, then make an AP_STACK containing the
+        // current closure applied to the chunk of stack up to (but not
+        // including) the update frame.  This closure becomes the "current
+        // closure".  Go back to step 2.
+        //
+        // 4. If it's a CATCH_FRAME, then leave the exception handler on
+        // top of the stack applied to the exception.
+        //
+        // 5. If it's a STOP_FRAME, then kill the thread.
+        //
+        // 6. If it's an UNDERFLOW_FRAME, then continue with the next
+        //    stack chunk.
+        //
+        // NB: if we pass an ATOMICALLY_FRAME then abort the associated
         // transaction
-       
-       info = get_ret_itbl((StgClosure *)frame);
-
-       switch (info->i.type) {
-
-       case UPDATE_FRAME:
-       {
-           StgAP_STACK * ap;
-           nat words;
-           
-           // First build an AP_STACK consisting of the stack chunk above the
-           // current update frame, with the top word on the stack as the
-           // fun field.
-           //
-           words = frame - sp - 1;
-           ap = (StgAP_STACK *)allocate(cap,AP_STACK_sizeW(words));
-           
-           ap->size = words;
-           ap->fun  = (StgClosure *)sp[0];
-           sp++;
-           for(i=0; i < (nat)words; ++i) {
-               ap->payload[i] = (StgClosure *)*sp++;
-           }
-           
-           SET_HDR(ap,&stg_AP_STACK_info,
-                   ((StgClosure *)frame)->header.prof.ccs /* ToDo */); 
-           TICK_ALLOC_UP_THK(words+1,0);
-           
-           //IF_DEBUG(scheduler,
-           //       debugBelch("sched: Updating ");
-           //       printPtr((P_)((StgUpdateFrame *)frame)->updatee); 
-           //       debugBelch(" with ");
-           //       printObj((StgClosure *)ap);
-           //  );
+
+        info = get_ret_itbl((StgClosure *)frame);
+
+        switch (info->i.type) {
+
+        case UPDATE_FRAME:
+        {
+            StgAP_STACK * ap;
+            uint32_t words;
+
+            // First build an AP_STACK consisting of the stack chunk above the
+            // current update frame, with the top word on the stack as the
+            // fun field.
+            //
+            words = frame - sp - 1;
+            ap = (StgAP_STACK *)allocate(cap,AP_STACK_sizeW(words));
+
+            ap->size = words;
+            ap->fun  = (StgClosure *)sp[0];
+
+            sp++;
+            for(i=0; i < words; ++i) {
+                ap->payload[i] = (StgClosure *)*sp++;
+            }
+
+            SET_HDR(ap,&stg_AP_STACK_info,
+                    ((StgClosure *)frame)->header.prof.ccs /* ToDo */);
+            TICK_ALLOC_UP_THK(WDS(words+1),0);
+
+            //IF_DEBUG(scheduler,
+            //       debugBelch("sched: Updating ");
+            //       printPtr((P_)((StgUpdateFrame *)frame)->updatee);
+            //       debugBelch(" with ");
+            //       printObj((StgClosure *)ap);
+            //  );
 
             if (((StgUpdateFrame *)frame)->updatee == updatee) {
                 // If this update frame points to the same closure as
@@ -827,70 +892,107 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
                 // Perform the update
                 // TODO: this may waste some work, if the thunk has
                 // already been updated by another thread.
-                UPD_IND(((StgUpdateFrame *)frame)->updatee, (StgClosure *)ap);
+                updateThunk(cap, tso,
+                            ((StgUpdateFrame *)frame)->updatee, (StgClosure *)ap);
             }
 
-           sp += sizeofW(StgUpdateFrame) - 1;
-           sp[0] = (W_)ap; // push onto stack
-           frame = sp + 1;
-           continue; //no need to bump frame
-       }
-
-       case STOP_FRAME:
-       {
-           // We've stripped the entire stack, the thread is now dead.
-           tso->what_next = ThreadKilled;
-           tso->sp = frame + sizeofW(StgStopFrame);
-           return;
-       }
-
-       case CATCH_FRAME:
-           // If we find a CATCH_FRAME, and we've got an exception to raise,
-           // then build the THUNK raise(exception), and leave it on
-           // top of the CATCH_FRAME ready to enter.
-           //
-       {
-#ifdef PROFILING
-           StgCatchFrame *cf = (StgCatchFrame *)frame;
-#endif
-           StgThunk *raise;
-           
-           if (exception == NULL) break;
-
-           // we've got an exception to raise, so let's pass it to the
-           // handler in this frame.
-           //
-           raise = (StgThunk *)allocate(cap,sizeofW(StgThunk)+1);
-           TICK_ALLOC_SE_THK(1,0);
-           SET_HDR(raise,&stg_raise_info,cf->header.prof.ccs);
-           raise->payload[0] = exception;
-           
-           // throw away the stack from Sp up to the CATCH_FRAME.
-           //
-           sp = frame - 1;
-           
-           /* Ensure that async excpetions are blocked now, so we don't get
-            * a surprise exception before we get around to executing the
-            * handler.
-            */
-           tso->flags |= TSO_BLOCKEX | TSO_INTERRUPTIBLE;
-
-           /* Put the newly-built THUNK on top of the stack, ready to execute
-            * when the thread restarts.
-            */
-           sp[0] = (W_)raise;
-           sp[-1] = (W_)&stg_enter_info;
-           tso->sp = sp-1;
-           tso->what_next = ThreadRunGHC;
-           IF_DEBUG(sanity, checkTSO(tso));
-           return;
-       }
-           
-       case ATOMICALLY_FRAME:
-           if (stop_at_atomically) {
-               ASSERT(tso->trec->enclosing_trec == NO_TREC);
-               stmCondemnTransaction(cap, tso -> trec);
-               tso->sp = frame - 2;
+            sp += sizeofW(StgUpdateFrame) - 1;
+            sp[0] = (W_)ap; // push onto stack
+            frame = sp + 1;
+            continue; //no need to bump frame
+        }
+
+        case UNDERFLOW_FRAME:
+        {
+            StgAP_STACK * ap;
+            uint32_t words;
+
+            // First build an AP_STACK consisting of the stack chunk above the
+            // current update frame, with the top word on the stack as the
+            // fun field.
+            //
+            words = frame - sp - 1;
+            ap = (StgAP_STACK *)allocate(cap,AP_STACK_sizeW(words));
+
+            ap->size = words;
+            ap->fun  = (StgClosure *)sp[0];
+            sp++;
+            for(i=0; i < words; ++i) {
+                ap->payload[i] = (StgClosure *)*sp++;
+            }
+
+            SET_HDR(ap,&stg_AP_STACK_NOUPD_info,
+                    ((StgClosure *)frame)->header.prof.ccs /* ToDo */);
+            TICK_ALLOC_SE_THK(WDS(words+1),0);
+
+            stack->sp = sp;
+            threadStackUnderflow(cap,tso);
+            stack = tso->stackobj;
+            sp = stack->sp;
+
+            sp--;
+            sp[0] = (W_)ap;
+            frame = sp + 1;
+            continue;
+        }
+
+        case STOP_FRAME:
+        {
+            // We've stripped the entire stack, the thread is now dead.
+            tso->what_next = ThreadKilled;
+            stack->sp = frame + sizeofW(StgStopFrame);
+            goto done;
+        }
+
+        case CATCH_FRAME:
+            // If we find a CATCH_FRAME, and we've got an exception to raise,
+            // then build the THUNK raise(exception), and leave it on
+            // top of the CATCH_FRAME ready to enter.
+            //
+        {
+            StgCatchFrame *cf = (StgCatchFrame *)frame;
+            StgThunk *raise;
+
+            if (exception == NULL) break;
+
+            // we've got an exception to raise, so let's pass it to the
+            // handler in this frame.
+            //
+            raise = (StgThunk *)allocate(cap,sizeofW(StgThunk)+1);
+            TICK_ALLOC_SE_THK(WDS(1),0);
+            SET_HDR(raise,&stg_raise_info,cf->header.prof.ccs);
+            raise->payload[0] = exception;
+
+            // throw away the stack from Sp up to the CATCH_FRAME.
+            //
+            sp = frame - 1;
+
+            /* Ensure that async exceptions are blocked now, so we don't get
+             * a surprise exception before we get around to executing the
+             * handler.
+             */
+            tso->flags |= TSO_BLOCKEX;
+            if ((cf->exceptions_blocked & TSO_INTERRUPTIBLE) == 0) {
+                tso->flags &= ~TSO_INTERRUPTIBLE;
+            } else {
+                tso->flags |= TSO_INTERRUPTIBLE;
+            }
+
+            /* Put the newly-built THUNK on top of the stack, ready to execute
+             * when the thread restarts.
+             */
+            sp[0] = (W_)raise;
+            sp[-1] = (W_)&stg_enter_info;
+            stack->sp = sp-1;
+            tso->what_next = ThreadRunGHC;
+            goto done;
+        }
+
+        case ATOMICALLY_FRAME:
+            if (stop_at_atomically) {
+                ASSERT(tso->trec->enclosing_trec == NO_TREC);
+                stmCondemnTransaction(cap, tso -> trec);
+                stack->sp = frame - 2;
                 // The ATOMICALLY_FRAME expects to be returned a
                 // result from the transaction, which it stores in the
                 // stack frame.  Hence we arrange to return a dummy
@@ -899,44 +1001,95 @@ raiseAsync(Capability *cap, StgTSO *tso, StgClosure *exception,
                 // ATOMICALLY_FRAME instance for condemned
                 // transactions, but I don't fully understand the
                 // interaction with STM invariants.
-                tso->sp[1] = (W_)&stg_NO_TREC_closure;
-                tso->sp[0] = (W_)&stg_gc_unpt_r1_info;
-               tso->what_next = ThreadRunGHC;
-               return;
-           }
-           // Not stop_at_atomically... fall through and abort the
-           // transaction.
-           
-       case CATCH_STM_FRAME:
-       case CATCH_RETRY_FRAME:
-           // IF we find an ATOMICALLY_FRAME then we abort the
-           // current transaction and propagate the exception.  In
-           // this case (unlike ordinary exceptions) we do not care
-           // whether the transaction is valid or not because its
-           // possible validity cannot have caused the exception
-           // and will not be visible after the abort.
-
-               {
+                stack->sp[1] = (W_)&stg_NO_TREC_closure;
+                stack->sp[0] = (W_)&stg_ret_p_info;
+                tso->what_next = ThreadRunGHC;
+                goto done;
+            }
+            else
+            {
+                // Freezing an STM transaction.  Just aborting the
+                // transaction would be wrong; this is what we used to
+                // do, and it goes wrong if the ATOMICALLY_FRAME ever
+                // gets back onto the stack again, which it will do if
+                // the transaction is inside unsafePerformIO or
+                // unsafeInterleaveIO and hence inside an UPDATE_FRAME.
+                //
+                // So we want to make it so that if the enclosing
+                // computation is resumed, we will re-execute the
+                // transaction.  We therefore:
+                //
+                //   1. abort the current transaction
+                //   3. replace the stack up to and including the
+                //      atomically frame with a closure representing
+                //      a call to "atomically x", where x is the code
+                //      of the transaction.
+                //   4. continue stripping the stack
+                //
+                StgTRecHeader *trec = tso->trec;
+                StgTRecHeader *outer = trec->enclosing_trec;
+
+                StgThunk *atomically;
+                StgAtomicallyFrame *af = (StgAtomicallyFrame*)frame;
+
+                debugTraceCap(DEBUG_stm, cap,
+                              "raiseAsync: freezing atomically frame")
+                stmAbortTransaction(cap, trec);
+                stmFreeAbortedTRec(cap, trec);
+                tso->trec = outer;
+
+                atomically = (StgThunk*)allocate(cap,sizeofW(StgThunk)+1);
+                TICK_ALLOC_SE_THK(1,0);
+                SET_HDR(atomically,&stg_atomically_info,af->header.prof.ccs);
+                atomically->payload[0] = af->code;
+
+                // discard stack up to and including the ATOMICALLY_FRAME
+                frame += sizeofW(StgAtomicallyFrame);
+                sp = frame - 1;
+
+                // replace the ATOMICALLY_FRAME with call to atomically#
+                sp[0] = (W_)atomically;
+                continue;
+            }
+
+        case CATCH_STM_FRAME:
+        case CATCH_RETRY_FRAME:
+            // CATCH frames within an atomically block: abort the
+            // inner transaction and continue.  Eventually we will
+            // hit the outer transaction that will get frozen (see
+            // above).
+            //
+            // In this case (unlike ordinary exceptions) we do not care
+            // whether the transaction is valid or not because its
+            // possible validity cannot have caused the exception
+            // and will not be visible after the abort.
+        {
             StgTRecHeader *trec = tso -> trec;
             StgTRecHeader *outer = trec -> enclosing_trec;
-           debugTrace(DEBUG_stm, 
-                      "found atomically block delivering async exception");
+            debugTraceCap(DEBUG_stm, cap,
+                          "found atomically block delivering async exception");
             stmAbortTransaction(cap, trec);
-           stmFreeAbortedTRec(cap, trec);
+            stmFreeAbortedTRec(cap, trec);
             tso -> trec = outer;
-           break;
-           };
-           
-       default:
-           break;
-       }
-
-       // move on to the next stack frame
-       frame += stack_frame_sizeW((StgClosure *)frame);
+            break;
+        };
+
+        default:
+            break;
+        }
+
+        // move on to the next stack frame
+        frame += stack_frame_sizeW((StgClosure *)frame);
     }
 
-    // if we got here, then we stopped at stop_here
-    ASSERT(stop_here != NULL);
-}
+done:
+    IF_DEBUG(sanity, checkTSO(tso));
 
+    // wake it up
+    if (tso->why_blocked != NotBlocked) {
+        tso->why_blocked = NotBlocked;
+        appendToRunQueue(cap,tso);
+    }
 
+    return tso;
+}