Fix memory leak from #12664
[ghc.git] / rts / RtsAPI.c
index d0bdead..e724307 100644 (file)
@@ -8,29 +8,28 @@
 
 #include "PosixSource.h"
 #include "Rts.h"
-#include "OSThreads.h"
 #include "RtsAPI.h"
-#include "SchedAPI.h"
-#include "RtsFlags.h"
+#include "HsFFI.h"
+
 #include "RtsUtils.h"
 #include "Prelude.h"
 #include "Schedule.h"
 #include "Capability.h"
 #include "Stable.h"
-
-#include <stdlib.h>
+#include "Threads.h"
+#include "Weak.h"
 
 /* ----------------------------------------------------------------------------
    Building Haskell objects from C datatypes.
 
    TODO: Currently this code does not tag created pointers,
-         however it is not unsafe (the contructor code will do it)
+         however it is not unsafe (the constructor code will do it)
          just inefficient.
    ------------------------------------------------------------------------- */
 HaskellObj
 rts_mkChar (Capability *cap, HsChar c)
 {
-  StgClosure *p = (StgClosure *)allocateLocal(cap, CONSTR_sizeW(0,1));
+  StgClosure *p = (StgClosure *)allocate(cap, CONSTR_sizeW(0,1));
   SET_HDR(p, Czh_con_info, CCS_SYSTEM);
   p->payload[0]  = (StgClosure *)(StgWord)(StgChar)c;
   return p;
@@ -39,7 +38,7 @@ rts_mkChar (Capability *cap, HsChar c)
 HaskellObj
 rts_mkInt (Capability *cap, HsInt i)
 {
-  StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+  StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
   SET_HDR(p, Izh_con_info, CCS_SYSTEM);
   p->payload[0]  = (StgClosure *)(StgInt)i;
   return p;
@@ -48,7 +47,7 @@ rts_mkInt (Capability *cap, HsInt i)
 HaskellObj
 rts_mkInt8 (Capability *cap, HsInt8 i)
 {
-  StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+  StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
   SET_HDR(p, I8zh_con_info, CCS_SYSTEM);
   /* Make sure we mask out the bits above the lowest 8 */
   p->payload[0]  = (StgClosure *)(StgInt)i;
@@ -58,7 +57,7 @@ rts_mkInt8 (Capability *cap, HsInt8 i)
 HaskellObj
 rts_mkInt16 (Capability *cap, HsInt16 i)
 {
-  StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+  StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
   SET_HDR(p, I16zh_con_info, CCS_SYSTEM);
   /* Make sure we mask out the relevant bits */
   p->payload[0]  = (StgClosure *)(StgInt)i;
@@ -68,7 +67,7 @@ rts_mkInt16 (Capability *cap, HsInt16 i)
 HaskellObj
 rts_mkInt32 (Capability *cap, HsInt32 i)
 {
-  StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+  StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
   SET_HDR(p, I32zh_con_info, CCS_SYSTEM);
   p->payload[0]  = (StgClosure *)(StgInt)i;
   return p;
@@ -77,18 +76,16 @@ rts_mkInt32 (Capability *cap, HsInt32 i)
 HaskellObj
 rts_mkInt64 (Capability *cap, HsInt64 i)
 {
-  llong *tmp;
-  StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,2));
+  StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,2));
   SET_HDR(p, I64zh_con_info, CCS_SYSTEM);
-  tmp  = (llong*)&(p->payload[0]);
-  *tmp = (StgInt64)i;
+  ASSIGN_Int64((P_)&(p->payload[0]), i);
   return p;
 }
 
 HaskellObj
 rts_mkWord (Capability *cap, HsWord i)
 {
-  StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+  StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
   SET_HDR(p, Wzh_con_info, CCS_SYSTEM);
   p->payload[0]  = (StgClosure *)(StgWord)i;
   return p;
@@ -98,7 +95,7 @@ HaskellObj
 rts_mkWord8 (Capability *cap, HsWord8 w)
 {
   /* see rts_mkInt* comments */
-  StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+  StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
   SET_HDR(p, W8zh_con_info, CCS_SYSTEM);
   p->payload[0]  = (StgClosure *)(StgWord)(w & 0xff);
   return p;
@@ -108,7 +105,7 @@ HaskellObj
 rts_mkWord16 (Capability *cap, HsWord16 w)
 {
   /* see rts_mkInt* comments */
-  StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+  StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
   SET_HDR(p, W16zh_con_info, CCS_SYSTEM);
   p->payload[0]  = (StgClosure *)(StgWord)(w & 0xffff);
   return p;
@@ -118,7 +115,7 @@ HaskellObj
 rts_mkWord32 (Capability *cap, HsWord32 w)
 {
   /* see rts_mkInt* comments */
-  StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+  StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
   SET_HDR(p, W32zh_con_info, CCS_SYSTEM);
   p->payload[0]  = (StgClosure *)(StgWord)(w & 0xffffffff);
   return p;
@@ -127,20 +124,18 @@ rts_mkWord32 (Capability *cap, HsWord32 w)
 HaskellObj
 rts_mkWord64 (Capability *cap, HsWord64 w)
 {
-  ullong *tmp;
-
-  StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,2));
+  StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,2));
   /* see mk_Int8 comment */
   SET_HDR(p, W64zh_con_info, CCS_SYSTEM);
-  tmp  = (ullong*)&(p->payload[0]);
-  *tmp = (StgWord64)w;
+  ASSIGN_Word64((P_)&(p->payload[0]), w);
   return p;
 }
 
+
 HaskellObj
 rts_mkFloat (Capability *cap, HsFloat f)
 {
-  StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,1));
+  StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,1));
   SET_HDR(p, Fzh_con_info, CCS_SYSTEM);
   ASSIGN_FLT((P_)p->payload, (StgFloat)f);
   return p;
@@ -149,7 +144,7 @@ rts_mkFloat (Capability *cap, HsFloat f)
 HaskellObj
 rts_mkDouble (Capability *cap, HsDouble d)
 {
-  StgClosure *p = (StgClosure *)allocateLocal(cap,CONSTR_sizeW(0,sizeofW(StgDouble)));
+  StgClosure *p = (StgClosure *)allocate(cap,CONSTR_sizeW(0,sizeofW(StgDouble)));
   SET_HDR(p, Dzh_con_info, CCS_SYSTEM);
   ASSIGN_DBL((P_)p->payload, (StgDouble)d);
   return p;
@@ -158,7 +153,7 @@ rts_mkDouble (Capability *cap, HsDouble d)
 HaskellObj
 rts_mkStablePtr (Capability *cap, HsStablePtr s)
 {
-  StgClosure *p = (StgClosure *)allocateLocal(cap,sizeofW(StgHeader)+1);
+  StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1);
   SET_HDR(p, StablePtr_con_info, CCS_SYSTEM);
   p->payload[0]  = (StgClosure *)s;
   return p;
@@ -167,7 +162,7 @@ rts_mkStablePtr (Capability *cap, HsStablePtr s)
 HaskellObj
 rts_mkPtr (Capability *cap, HsPtr a)
 {
-  StgClosure *p = (StgClosure *)allocateLocal(cap,sizeofW(StgHeader)+1);
+  StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1);
   SET_HDR(p, Ptr_con_info, CCS_SYSTEM);
   p->payload[0]  = (StgClosure *)a;
   return p;
@@ -176,7 +171,7 @@ rts_mkPtr (Capability *cap, HsPtr a)
 HaskellObj
 rts_mkFunPtr (Capability *cap, HsFunPtr a)
 {
-  StgClosure *p = (StgClosure *)allocateLocal(cap,sizeofW(StgHeader)+1);
+  StgClosure *p = (StgClosure *)allocate(cap,sizeofW(StgHeader)+1);
   SET_HDR(p, FunPtr_con_info, CCS_SYSTEM);
   p->payload[0]  = (StgClosure *)a;
   return p;
@@ -203,8 +198,11 @@ rts_apply (Capability *cap, HaskellObj f, HaskellObj arg)
 {
     StgThunk *ap;
 
-    ap = (StgThunk *)allocateLocal(cap,sizeofW(StgThunk) + 2);
-    SET_HDR(ap, (StgInfoTable *)&stg_ap_2_upd_info, CCS_SYSTEM);
+    ap = (StgThunk *)allocate(cap,sizeofW(StgThunk) + 2);
+    // Here we don't want to use CCS_SYSTEM, because it's a hidden cost centre,
+    // and evaluating Haskell code under a hidden cost centre leads to
+    // confusing profiling output. (#7753)
+    SET_HDR(ap, (StgInfoTable *)&stg_ap_2_upd_info, CCS_MAIN);
     ap->payload[0] = f;
     ap->payload[1] = arg;
     return (StgClosure *)ap;
@@ -267,13 +265,12 @@ rts_getInt32 (HaskellObj p)
 HsInt64
 rts_getInt64 (HaskellObj p)
 {
-    HsInt64* tmp;
     // See comment above:
     // ASSERT(p->header.info == I64zh_con_info ||
     //        p->header.info == I64zh_static_info);
-    tmp = (HsInt64*)&(UNTAG_CLOSURE(p)->payload[0]);
-    return *tmp;
+    return PK_Int64((P_)&(UNTAG_CLOSURE(p)->payload[0]));
 }
+
 HsWord
 rts_getWord (HaskellObj p)
 {
@@ -310,16 +307,13 @@ rts_getWord32 (HaskellObj p)
     return (HsWord32)(HsWord)(UNTAG_CLOSURE(p)->payload[0]);
 }
 
-
 HsWord64
 rts_getWord64 (HaskellObj p)
 {
-    HsWord64* tmp;
     // See comment above:
     // ASSERT(p->header.info == W64zh_con_info ||
     //        p->header.info == W64zh_static_info);
-    tmp = (HsWord64*)&(UNTAG_CLOSURE(p)->payload[0]);
-    return *tmp;
+    return PK_Word64((P_)&(UNTAG_CLOSURE(p)->payload[0]));
 }
 
 HsFloat
@@ -370,13 +364,13 @@ rts_getFunPtr (HaskellObj p)
 HsBool
 rts_getBool (HaskellObj p)
 {
-    StgInfoTable *info;
+    const StgInfoTable *info;
 
-    info = get_itbl((StgClosure *)UNTAG_CLOSURE(p));
+    info = get_itbl((const StgClosure *)UNTAG_CONST_CLOSURE(p));
     if (info->srt_bitmap == 0) { // srt_bitmap is the constructor tag
-       return 0;
+        return 0;
     } else {
-       return 1;
+        return 1;
     }
 }
 
@@ -385,34 +379,25 @@ rts_getBool (HaskellObj p)
    -------------------------------------------------------------------------- */
 
 INLINE_HEADER void pushClosure   (StgTSO *tso, StgWord c) {
-  tso->sp--;
-  tso->sp[0] = (W_) c;
+  tso->stackobj->sp--;
+  tso->stackobj->sp[0] = (W_) c;
 }
 
 StgTSO *
-createGenThread (Capability *cap, nat stack_size,  StgClosure *closure)
+createGenThread (Capability *cap, W_ stack_size,  StgClosure *closure)
 {
   StgTSO *t;
-#if defined(GRAN)
-  t = createThread (cap, stack_size, NO_PRI);
-#else
   t = createThread (cap, stack_size);
-#endif
   pushClosure(t, (W_)closure);
   pushClosure(t, (W_)&stg_enter_info);
   return t;
 }
 
 StgTSO *
-createIOThread (Capability *cap, nat stack_size,  StgClosure *closure)
+createIOThread (Capability *cap, W_ stack_size,  StgClosure *closure)
 {
   StgTSO *t;
-#if defined(GRAN)
-  t = createThread (cap, stack_size, NO_PRI);
-#else
   t = createThread (cap, stack_size);
-#endif
-  pushClosure(t, (W_)&stg_noforceIO_info);
   pushClosure(t, (W_)&stg_ap_v_info);
   pushClosure(t, (W_)closure);
   pushClosure(t, (W_)&stg_enter_info);
@@ -425,14 +410,10 @@ createIOThread (Capability *cap, nat stack_size,  StgClosure *closure)
  */
 
 StgTSO *
-createStrictIOThread(Capability *cap, nat stack_size,  StgClosure *closure)
+createStrictIOThread(Capability *cap, W_ stack_size,  StgClosure *closure)
 {
   StgTSO *t;
-#if defined(GRAN)
-  t = createThread(cap, stack_size, NO_PRI);
-#else
   t = createThread(cap, stack_size);
-#endif
   pushClosure(t, (W_)&stg_forceIO_info);
   pushClosure(t, (W_)&stg_ap_v_info);
   pushClosure(t, (W_)closure);
@@ -444,36 +425,39 @@ createStrictIOThread(Capability *cap, nat stack_size,  StgClosure *closure)
    Evaluating Haskell expressions
    ------------------------------------------------------------------------- */
 
-Capability *
-rts_eval (Capability *cap, HaskellObj p, /*out*/HaskellObj *ret)
+void rts_eval (/* inout */ Capability **cap,
+               /* in    */ HaskellObj p,
+               /* out */   HaskellObj *ret)
 {
     StgTSO *tso;
-    
-    tso = createGenThread(cap, RtsFlags.GcFlags.initialStkSize, p);
-    return scheduleWaitThread(tso,ret,cap);
+
+    tso = createGenThread(*cap, RtsFlags.GcFlags.initialStkSize, p);
+    scheduleWaitThread(tso,ret,cap);
 }
 
-Capability *
-rts_eval_ (Capability *cap, HaskellObj p, unsigned int stack_size, 
-          /*out*/HaskellObj *ret)
+void rts_eval_ (/* inout */ Capability **cap,
+                /* in    */ HaskellObj p,
+                /* in    */ unsigned int stack_size,
+                /* out   */ HaskellObj *ret)
 {
     StgTSO *tso;
 
-    tso = createGenThread(cap, stack_size, p);
-    return scheduleWaitThread(tso,ret,cap);
+    tso = createGenThread(*cap, stack_size, p);
+    scheduleWaitThread(tso,ret,cap);
 }
 
 /*
  * rts_evalIO() evaluates a value of the form (IO a), forcing the action's
  * result to WHNF before returning.
  */
-Capability *
-rts_evalIO (Capability *cap, HaskellObj p, /*out*/HaskellObj *ret)
+void rts_evalIO (/* inout */ Capability **cap,
+                 /* in    */ HaskellObj p,
+                 /* out */   HaskellObj *ret)
 {
-    StgTSO* tso; 
-    
-    tso = createStrictIOThread(cap, RtsFlags.GcFlags.initialStkSize, p);
-    return scheduleWaitThread(tso,ret,cap);
+    StgTSO* tso;
+
+    tso = createStrictIOThread(*cap, RtsFlags.GcFlags.initialStkSize, p);
+    scheduleWaitThread(tso,ret,cap);
 }
 
 /*
@@ -482,49 +466,50 @@ rts_evalIO (Capability *cap, HaskellObj p, /*out*/HaskellObj *ret)
  * action's result to WHNF before returning.  The result is returned
  * in a StablePtr.
  */
-Capability *
-rts_evalStableIO (Capability *cap, HsStablePtr s, /*out*/HsStablePtr *ret)
+void rts_evalStableIO (/* inout */ Capability **cap,
+                       /* in    */ HsStablePtr s,
+                       /* out */   HsStablePtr *ret)
 {
     StgTSO* tso;
     StgClosure *p, *r;
     SchedulerStatus stat;
-    
+
     p = (StgClosure *)deRefStablePtr(s);
-    tso = createStrictIOThread(cap, RtsFlags.GcFlags.initialStkSize, p);
+    tso = createStrictIOThread(*cap, RtsFlags.GcFlags.initialStkSize, p);
     // async exceptions are always blocked by default in the created
     // thread.  See #1048.
     tso->flags |= TSO_BLOCKEX | TSO_INTERRUPTIBLE;
-    cap = scheduleWaitThread(tso,&r,cap);
-    stat = rts_getSchedStatus(cap);
+    scheduleWaitThread(tso,&r,cap);
+    stat = rts_getSchedStatus(*cap);
 
     if (stat == Success && ret != NULL) {
-       ASSERT(r != NULL);
-       *ret = getStablePtr((StgPtr)r);
+        ASSERT(r != NULL);
+        *ret = getStablePtr((StgPtr)r);
     }
-
-    return cap;
 }
 
 /*
  * Like rts_evalIO(), but doesn't force the action's result.
  */
-Capability *
-rts_evalLazyIO (Capability *cap, HaskellObj p, /*out*/HaskellObj *ret)
+void rts_evalLazyIO (/* inout */ Capability **cap,
+                     /* in    */ HaskellObj p,
+                     /* out */   HaskellObj *ret)
 {
     StgTSO *tso;
 
-    tso = createIOThread(cap, RtsFlags.GcFlags.initialStkSize, p);
-    return scheduleWaitThread(tso,ret,cap);
+    tso = createIOThread(*cap, RtsFlags.GcFlags.initialStkSize, p);
+    scheduleWaitThread(tso,ret,cap);
 }
 
-Capability *
-rts_evalLazyIO_ (Capability *cap, HaskellObj p, unsigned int stack_size, 
-                /*out*/HaskellObj *ret)
+void rts_evalLazyIO_ (/* inout */ Capability **cap,
+                      /* in    */ HaskellObj p,
+                      /* in    */ unsigned int stack_size,
+                      /* out   */ HaskellObj *ret)
 {
     StgTSO *tso;
 
-    tso = createIOThread(cap, stack_size, p);
-    return scheduleWaitThread(tso,ret,cap);
+    tso = createIOThread(*cap, stack_size, p);
+    scheduleWaitThread(tso,ret,cap);
 }
 
 /* Convenience function for decoding the returned status. */
@@ -532,26 +517,35 @@ rts_evalLazyIO_ (Capability *cap, HaskellObj p, unsigned int stack_size,
 void
 rts_checkSchedStatus (char* site, Capability *cap)
 {
-    SchedulerStatus rc = cap->running_task->stat;
+    SchedulerStatus rc = cap->running_task->incall->rstat;
     switch (rc) {
     case Success:
-       return;
+        return;
     case Killed:
-       errorBelch("%s: uncaught exception",site);
-       stg_exit(EXIT_FAILURE);
+        errorBelch("%s: uncaught exception",site);
+        stg_exit(EXIT_FAILURE);
     case Interrupted:
-       errorBelch("%s: interrupted", site);
-       stg_exit(EXIT_FAILURE);
+        errorBelch("%s: interrupted", site);
+#ifdef THREADED_RTS
+        // The RTS is shutting down, and the process will probably
+        // soon exit.  We don't want to preempt the shutdown
+        // by exiting the whole process here, so we just terminate the
+        // current thread.  Don't forget to release the cap first though.
+        rts_unlock(cap);
+        shutdownThread();
+#else
+        stg_exit(EXIT_FAILURE);
+#endif
     default:
-       errorBelch("%s: Return code (%d) not ok",(site),(rc));  
-       stg_exit(EXIT_FAILURE);
+        errorBelch("%s: Return code (%d) not ok",(site),(rc));
+        stg_exit(EXIT_FAILURE);
     }
 }
 
 SchedulerStatus
 rts_getSchedStatus (Capability *cap)
 {
-    return cap->running_task->stat;
+    return cap->running_task->incall->rstat;
 }
 
 Capability *
@@ -560,16 +554,26 @@ rts_lock (void)
     Capability *cap;
     Task *task;
 
-    // ToDo: get rid of this lock in the common case.  We could store
-    // a free Task in thread-local storage, for example.  That would
-    // leave just one lock on the path into the RTS: cap->lock when
-    // acquiring the Capability.
-    ACQUIRE_LOCK(&sched_mutex);
     task = newBoundTask();
-    RELEASE_LOCK(&sched_mutex);
+
+    if (task->running_finalizers) {
+        errorBelch("error: a C finalizer called back into Haskell.\n"
+                   "   This was previously allowed, but is disallowed in GHC 6.10.2 and later.\n"
+                   "   To create finalizers that may call back into Haskell, use\n"
+                   "   Foreign.Concurrent.newForeignPtr instead of Foreign.newForeignPtr.");
+        stg_exit(EXIT_FAILURE);
+    }
 
     cap = NULL;
-    waitForReturnCapability(&cap, task);
+    waitForCapability(&cap, task);
+
+    if (task->incall->prev_stack == NULL) {
+      // This is a new outermost call from C into Haskell land.
+      // Until the corresponding call to rts_unlock, this task
+      // is doing work on behalf of the RTS.
+      traceTaskCreate(task, cap);
+    }
+
     return (Capability *)cap;
 }
 
@@ -603,4 +607,91 @@ rts_unlock (Capability *cap)
     // Finally, we can release the Task to the free list.
     boundTaskExiting(task);
     RELEASE_LOCK(&cap->lock);
+
+    if (task->incall == NULL) {
+      // This is the end of an outermost call from C into Haskell land.
+      // From here on, the task goes back to C land and we should not count
+      // it as doing work on behalf of the RTS.
+      traceTaskDelete(task);
+    }
+}
+
+void rts_done (void)
+{
+    freeMyTask();
+}
+
+/* -----------------------------------------------------------------------------
+   tryPutMVar from outside Haskell
+
+   The C call
+
+      hs_try_putmvar(cap, mvar)
+
+   is equivalent to the Haskell call
+
+      tryPutMVar mvar ()
+
+   but it is
+
+     * non-blocking: takes a bounded, short, amount of time
+     * asynchronous: the actual putMVar may be performed after the
+       call returns.  That's why hs_try_putmvar() doesn't return a
+       result to say whether the put succeeded.
+
+   NOTE: this call transfers ownership of the StablePtr to the RTS, which will
+   free it after the tryPutMVar has taken place.  The reason is that otherwise,
+   it would be very difficult for the caller to arrange to free the StablePtr
+   in all circumstances.
+
+   For more details, see the section "Waking up Haskell threads from C" in the
+   User's Guide.
+   -------------------------------------------------------------------------- */
+
+void hs_try_putmvar (/* in */ int capability,
+                     /* in */ HsStablePtr mvar)
+{
+    Task *task = getTask();
+    Capability *cap;
+
+    if (capability < 0) {
+        capability = task->preferred_capability;
+        if (capability < 0) {
+            capability = 0;
+        }
+    }
+    cap = capabilities[capability % enabled_capabilities];
+
+#if !defined(THREADED_RTS)
+
+    performTryPutMVar(cap, (StgMVar*)deRefStablePtr(mvar), Unit_closure);
+    freeStablePtr(mvar);
+
+#else
+
+    ACQUIRE_LOCK(&cap->lock);
+    // If the capability is free, we can perform the tryPutMVar immediately
+    if (cap->running_task == NULL) {
+        cap->running_task = task;
+        task->cap = cap;
+        RELEASE_LOCK(&cap->lock);
+
+        performTryPutMVar(cap, (StgMVar*)deRefStablePtr(mvar), Unit_closure);
+
+        freeStablePtr(mvar);
+
+        // Wake up the capability, which will start running the thread that we
+        // just awoke (if there was one).
+        releaseCapability(cap);
+    } else {
+        PutMVar *p = stgMallocBytes(sizeof(PutMVar),"hs_try_putmvar");
+        // We cannot deref the StablePtr if we don't have a capability,
+        // so we have to store it and deref it later.
+        p->mvar = mvar;
+        p->link = cap->putMVars;
+        cap->putMVars = p;
+        RELEASE_LOCK(&cap->lock);
+    }
+
+#endif
 }