Fix a cause of very occasional <<loop>> with parallel programs and
[ghc.git] / rts / ThreadPaused.c
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2006
4 *
5 * Tidying up a thread when it stops running
6 *
7 * ---------------------------------------------------------------------------*/
8
9 // #include "PosixSource.h"
10 #include "Rts.h"
11
12 #include "ThreadPaused.h"
13 #include "sm/Storage.h"
14 #include "Updates.h"
15 #include "RaiseAsync.h"
16 #include "Trace.h"
17 #include "Threads.h"
18
19 #include <string.h> // for memmove()
20
21 /* -----------------------------------------------------------------------------
22 * Stack squeezing
23 *
24 * Code largely pinched from old RTS, then hacked to bits. We also do
25 * lazy black holing here.
26 *
27 * -------------------------------------------------------------------------- */
28
29 struct stack_gap { StgWord gap_size; struct stack_gap *next_gap; };
30
31 static void
32 stackSqueeze(Capability *cap, StgTSO *tso, StgPtr bottom)
33 {
34 StgPtr frame;
35 rtsBool prev_was_update_frame;
36 StgClosure *updatee = NULL;
37 StgRetInfoTable *info;
38 StgWord current_gap_size;
39 struct stack_gap *gap;
40
41 // Stage 1:
42 // Traverse the stack upwards, replacing adjacent update frames
43 // with a single update frame and a "stack gap". A stack gap
44 // contains two values: the size of the gap, and the distance
45 // to the next gap (or the stack top).
46
47 frame = tso->stackobj->sp;
48
49 ASSERT(frame < bottom);
50
51 prev_was_update_frame = rtsFalse;
52 current_gap_size = 0;
53 gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame));
54
55 while (frame <= bottom) {
56
57 info = get_ret_itbl((StgClosure *)frame);
58 switch (info->i.type) {
59
60 case UPDATE_FRAME:
61 {
62 StgUpdateFrame *upd = (StgUpdateFrame *)frame;
63
64 if (prev_was_update_frame) {
65
66 TICK_UPD_SQUEEZED();
67 /* wasn't there something about update squeezing and ticky to be
68 * sorted out? oh yes: we aren't counting each enter properly
69 * in this case. See the log somewhere. KSW 1999-04-21
70 *
71 * Check two things: that the two update frames don't point to
72 * the same object, and that the updatee_bypass isn't already an
73 * indirection. Both of these cases only happen when we're in a
74 * block hole-style loop (and there are multiple update frames
75 * on the stack pointing to the same closure), but they can both
76 * screw us up if we don't check.
77 */
78 if (upd->updatee != updatee && !closure_IND(upd->updatee)) {
79 updateThunk(cap, tso, upd->updatee, updatee);
80 }
81
82 // now mark this update frame as a stack gap. The gap
83 // marker resides in the bottom-most update frame of
84 // the series of adjacent frames, and covers all the
85 // frames in this series.
86 current_gap_size += sizeofW(StgUpdateFrame);
87 ((struct stack_gap *)frame)->gap_size = current_gap_size;
88 ((struct stack_gap *)frame)->next_gap = gap;
89
90 frame += sizeofW(StgUpdateFrame);
91 continue;
92 }
93
94 // single update frame, or the topmost update frame in a series
95 else {
96 prev_was_update_frame = rtsTrue;
97 updatee = upd->updatee;
98 frame += sizeofW(StgUpdateFrame);
99 continue;
100 }
101 }
102
103 default:
104 prev_was_update_frame = rtsFalse;
105
106 // we're not in a gap... check whether this is the end of a gap
107 // (an update frame can't be the end of a gap).
108 if (current_gap_size != 0) {
109 gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame));
110 }
111 current_gap_size = 0;
112
113 frame += stack_frame_sizeW((StgClosure *)frame);
114 continue;
115 }
116 }
117
118 if (current_gap_size != 0) {
119 gap = (struct stack_gap *) (frame - sizeofW(StgUpdateFrame));
120 }
121
122 // Now we have a stack with gaps in it, and we have to walk down
123 // shoving the stack up to fill in the gaps. A diagram might
124 // help:
125 //
126 // +| ********* |
127 // | ********* | <- sp
128 // | |
129 // | | <- gap_start
130 // | ......... | |
131 // | stack_gap | <- gap | chunk_size
132 // | ......... | |
133 // | ......... | <- gap_end v
134 // | ********* |
135 // | ********* |
136 // | ********* |
137 // -| ********* |
138 //
139 // 'sp' points the the current top-of-stack
140 // 'gap' points to the stack_gap structure inside the gap
141 // ***** indicates real stack data
142 // ..... indicates gap
143 // <empty> indicates unused
144 //
145 {
146 StgWord8 *sp;
147 StgWord8 *gap_start, *next_gap_start, *gap_end;
148 nat chunk_size;
149
150 next_gap_start = (StgWord8*)gap + sizeof(StgUpdateFrame);
151 sp = next_gap_start;
152
153 while ((StgPtr)gap > tso->stackobj->sp) {
154
155 // we're working in *bytes* now...
156 gap_start = next_gap_start;
157 gap_end = gap_start - gap->gap_size * sizeof(W_);
158
159 gap = gap->next_gap;
160 next_gap_start = (StgWord8*)gap + sizeof(StgUpdateFrame);
161
162 chunk_size = gap_end - next_gap_start;
163 sp -= chunk_size;
164 memmove(sp, next_gap_start, chunk_size);
165 }
166
167 tso->stackobj->sp = (StgPtr)sp;
168 }
169 }
170
171 /* -----------------------------------------------------------------------------
172 * Pausing a thread
173 *
174 * We have to prepare for GC - this means doing lazy black holing
175 * here. We also take the opportunity to do stack squeezing if it's
176 * turned on.
177 * -------------------------------------------------------------------------- */
178 void
179 threadPaused(Capability *cap, StgTSO *tso)
180 {
181 StgClosure *frame;
182 StgRetInfoTable *info;
183 const StgInfoTable *bh_info;
184 const StgInfoTable *cur_bh_info USED_IF_THREADS;
185 StgClosure *bh;
186 StgPtr stack_end;
187 nat words_to_squeeze = 0;
188 nat weight = 0;
189 nat weight_pending = 0;
190 rtsBool prev_was_update_frame = rtsFalse;
191
192 // Check to see whether we have threads waiting to raise
193 // exceptions, and we're not blocking exceptions, or are blocked
194 // interruptibly. This is important; if a thread is running with
195 // TSO_BLOCKEX and becomes blocked interruptibly, this is the only
196 // place we ensure that the blocked_exceptions get a chance.
197 maybePerformBlockedException (cap, tso);
198 if (tso->what_next == ThreadKilled) { return; }
199
200 // NB. Blackholing is *compulsory*, we must either do lazy
201 // blackholing, or eager blackholing consistently. See Note
202 // [upd-black-hole] in sm/Scav.c.
203
204 stack_end = tso->stackobj->stack + tso->stackobj->stack_size;
205
206 frame = (StgClosure *)tso->stackobj->sp;
207
208 while ((P_)frame < stack_end) {
209 info = get_ret_itbl(frame);
210
211 switch (info->i.type) {
212
213 case UPDATE_FRAME:
214
215 // If we've already marked this frame, then stop here.
216 if (frame->header.info == (StgInfoTable *)&stg_marked_upd_frame_info) {
217 if (prev_was_update_frame) {
218 words_to_squeeze += sizeofW(StgUpdateFrame);
219 weight += weight_pending;
220 weight_pending = 0;
221 }
222 goto end;
223 }
224
225 SET_INFO(frame, (StgInfoTable *)&stg_marked_upd_frame_info);
226
227 bh = ((StgUpdateFrame *)frame)->updatee;
228 bh_info = bh->header.info;
229
230 #ifdef THREADED_RTS
231 retry:
232 #endif
233 // If the info table is a WHITEHOLE or a BLACKHOLE, then
234 // another thread has claimed it (via the SET_INFO()
235 // below), or is in the process of doing so. In that case
236 // we want to suspend the work that the current thread has
237 // done on this thunk and wait until the other thread has
238 // finished.
239 //
240 // If eager blackholing is taking place, it could be the
241 // case that the blackhole points to the current
242 // TSO. e.g.:
243 //
244 // this thread other thread
245 // --------------------------------------------------------
246 // c->indirectee = other_tso;
247 // c->header.info = EAGER_BH
248 // threadPaused()
249 // c->indirectee = other_tso;
250 // c->header.info = EAGER_BH
251 // c->header.info = BLACKHOLE
252 // threadPaused()
253 // *** c->header.info is now BLACKHOLE,
254 // c->indirectee points to this TSO
255 //
256 // So in this case do *not* suspend the work of the
257 // current thread, because the current thread will become
258 // deadlocked on itself. See #5226 for an instance of
259 // this bug.
260 //
261 if ((bh_info == &stg_WHITEHOLE_info ||
262 bh_info == &stg_BLACKHOLE_info)
263 &&
264 ((StgInd*)bh)->indirectee != (StgClosure*)tso)
265 {
266 debugTrace(DEBUG_squeeze,
267 "suspending duplicate work: %ld words of stack",
268 (long)((StgPtr)frame - tso->stackobj->sp));
269
270 // If this closure is already an indirection, then
271 // suspend the computation up to this point.
272 // NB. check raiseAsync() to see what happens when
273 // we're in a loop (#2783).
274 suspendComputation(cap,tso,(StgUpdateFrame*)frame);
275
276 // Now drop the update frame, and arrange to return
277 // the value to the frame underneath:
278 tso->stackobj->sp = (StgPtr)frame + sizeofW(StgUpdateFrame) - 2;
279 tso->stackobj->sp[1] = (StgWord)bh;
280 ASSERT(bh->header.info != &stg_TSO_info);
281 tso->stackobj->sp[0] = (W_)&stg_enter_info;
282
283 // And continue with threadPaused; there might be
284 // yet more computation to suspend.
285 frame = (StgClosure *)(tso->stackobj->sp + 2);
286 prev_was_update_frame = rtsFalse;
287 continue;
288 }
289
290
291 // zero out the slop so that the sanity checker can tell
292 // where the next closure is.
293 OVERWRITING_CLOSURE(bh);
294
295 // an EAGER_BLACKHOLE or CAF_BLACKHOLE gets turned into a
296 // BLACKHOLE here.
297 #ifdef THREADED_RTS
298 // first we turn it into a WHITEHOLE to claim it, and if
299 // successful we write our TSO and then the BLACKHOLE info pointer.
300 cur_bh_info = (const StgInfoTable *)
301 cas((StgVolatilePtr)&bh->header.info,
302 (StgWord)bh_info,
303 (StgWord)&stg_WHITEHOLE_info);
304
305 if (cur_bh_info != bh_info) {
306 bh_info = cur_bh_info;
307 goto retry;
308 }
309 #endif
310
311 // The payload of the BLACKHOLE points to the TSO
312 ((StgInd *)bh)->indirectee = (StgClosure *)tso;
313 write_barrier();
314 SET_INFO(bh,&stg_BLACKHOLE_info);
315
316 // .. and we need a write barrier, since we just mutated the closure:
317 recordClosureMutated(cap,bh);
318
319 // We pretend that bh has just been created.
320 LDV_RECORD_CREATE(bh);
321
322 frame = (StgClosure *) ((StgUpdateFrame *)frame + 1);
323 if (prev_was_update_frame) {
324 words_to_squeeze += sizeofW(StgUpdateFrame);
325 weight += weight_pending;
326 weight_pending = 0;
327 }
328 prev_was_update_frame = rtsTrue;
329 break;
330
331 case UNDERFLOW_FRAME:
332 case STOP_FRAME:
333 goto end;
334
335 // normal stack frames; do nothing except advance the pointer
336 default:
337 {
338 nat frame_size = stack_frame_sizeW(frame);
339 weight_pending += frame_size;
340 frame = (StgClosure *)((StgPtr)frame + frame_size);
341 prev_was_update_frame = rtsFalse;
342 }
343 }
344 }
345
346 end:
347 debugTrace(DEBUG_squeeze,
348 "words_to_squeeze: %d, weight: %d, squeeze: %s",
349 words_to_squeeze, weight,
350 weight < words_to_squeeze ? "YES" : "NO");
351
352 // Should we squeeze or not? Arbitrary heuristic: we squeeze if
353 // the number of words we have to shift down is less than the
354 // number of stack words we squeeze away by doing so.
355 if (RtsFlags.GcFlags.squeezeUpdFrames == rtsTrue &&
356 ((weight <= 8 && words_to_squeeze > 0) || weight < words_to_squeeze)) {
357 // threshold above bumped from 5 to 8 as a result of #2797
358 stackSqueeze(cap, tso, (StgPtr)frame);
359 tso->flags |= TSO_SQUEEZED;
360 // This flag tells threadStackOverflow() that the stack was
361 // squeezed, because it may not need to be expanded.
362 } else {
363 tso->flags &= ~TSO_SQUEEZED;
364 }
365 }