Detect overly long GC sync
[ghc.git] / rts / sm / GCThread.h
1 /* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team 1998-2008
4 *
5 * Generational garbage collector
6 *
7 * Documentation on the architecture of the Garbage Collector can be
8 * found in the online commentary:
9 *
10 * http://ghc.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage/GC
11 *
12 * ---------------------------------------------------------------------------*/
13
14 #pragma once
15
16 #include "WSDeque.h"
17 #include "GetTime.h" // for Ticks
18
19 #include "BeginPrivate.h"
20
21 /* -----------------------------------------------------------------------------
22 General scheme
23
24 ToDo: move this to the wiki when the implementation is done.
25
26 We're only going to try to parallelise the copying GC for now. The
27 Plan is as follows.
28
29 Each thread has a gc_thread structure (see below) which holds its
30 thread-local data. We'll keep a pointer to this in a thread-local
31 variable, or possibly in a register.
32
33 In the gc_thread structure is a gen_workspace for each generation. The
34 primary purpose of the gen_workspace is to hold evacuated objects;
35 when an object is evacuated, it is copied to the "todo" block in
36 the thread's workspace for the appropriate generation. When the todo
37 block is full, it is pushed to the global gen->todos list, which
38 is protected by a lock. (in fact we intervene a one-place buffer
39 here to reduce contention).
40
41 A thread repeatedly grabs a block of work from one of the
42 gen->todos lists, scavenges it, and keeps the scavenged block on
43 its own ws->scavd_list (this is to avoid unnecessary contention
44 returning the completed buffers back to the generation: we can just
45 collect them all later).
46
47 When there is no global work to do, we start scavenging the todo
48 blocks in the workspaces. This is where the scan_bd field comes
49 in: we can scan the contents of the todo block, when we have
50 scavenged the contents of the todo block (up to todo_bd->free), we
51 don't want to move this block immediately to the scavd_list,
52 because it is probably only partially full. So we remember that we
53 have scanned up to this point by saving the block in ws->scan_bd,
54 with the current scan pointer in ws->scan. Later, when more
55 objects have been copied to this block, we can come back and scan
56 the rest. When we visit this workspace again in the future,
57 scan_bd may still be the same as todo_bd, or it might be different:
58 if enough objects were copied into this block that it filled up,
59 then we will have allocated a new todo block, but *not* pushed the
60 old one to the generation, because it is partially scanned.
61
62 The reason to leave scanning the todo blocks until last is that we
63 want to deal with full blocks as far as possible.
64 ------------------------------------------------------------------------- */
65
66
67 /* -----------------------------------------------------------------------------
68 Generation Workspace
69
70 A generation workspace exists for each generation for each GC
71 thread. The GC thread takes a block from the todos list of the
72 generation into the scanbd and then scans it. Objects referred to
73 by those in the scan block are copied into the todo or scavd blocks
74 of the relevant generation.
75
76 ------------------------------------------------------------------------- */
77
78 typedef struct gen_workspace_ {
79 generation * gen; // the gen for this workspace
80 struct gc_thread_ * my_gct; // the gc_thread that contains this workspace
81
82 // where objects to be scavenged go
83 bdescr * todo_bd;
84 StgPtr todo_free; // free ptr for todo_bd
85 StgPtr todo_lim; // lim for todo_bd
86
87 WSDeque * todo_q;
88 bdescr * todo_overflow;
89 uint32_t n_todo_overflow;
90
91 // where large objects to be scavenged go
92 bdescr * todo_large_objects;
93
94 // Objects that have already been scavenged.
95 bdescr * scavd_list;
96 StgWord n_scavd_blocks; // count of blocks in this list
97 StgWord n_scavd_words;
98
99 // Partially-full, scavenged, blocks
100 bdescr * part_list;
101 StgWord n_part_blocks; // count of above
102 StgWord n_part_words;
103
104 StgWord pad[1];
105
106 } gen_workspace ATTRIBUTE_ALIGNED(64);
107 // align so that computing gct->gens[n] is a shift, not a multiply
108 // fails if the size is <64, which is why we need the pad above
109
110 /* ----------------------------------------------------------------------------
111 GC thread object
112
113 Every GC thread has one of these. It contains all the generation
114 specific workspaces and other GC thread local information. At some
115 later point it maybe useful to move this other into the TLS store
116 of the GC threads
117 ------------------------------------------------------------------------- */
118
119 /* values for the wakeup field */
120 #define GC_THREAD_INACTIVE 0
121 #define GC_THREAD_STANDING_BY 1
122 #define GC_THREAD_RUNNING 2
123 #define GC_THREAD_WAITING_TO_CONTINUE 3
124
125 typedef struct gc_thread_ {
126 Capability *cap;
127
128 #if defined(THREADED_RTS)
129 OSThreadId id; // The OS thread that this struct belongs to
130 SpinLock gc_spin;
131 SpinLock mut_spin;
132 volatile StgWord wakeup; // NB not StgWord8; only StgWord is guaranteed atomic
133 #endif
134 uint32_t thread_index; // a zero based index identifying the thread
135
136 bdescr * free_blocks; // a buffer of free blocks for this thread
137 // during GC without accessing the block
138 // allocators spin lock.
139
140 // These two lists are chained through the STATIC_LINK() fields of static
141 // objects. Pointers are tagged with the current static_flag, so before
142 // following a pointer, untag it with UNTAG_STATIC_LIST_PTR().
143 StgClosure* static_objects; // live static objects
144 StgClosure* scavenged_static_objects; // static objects scavenged so far
145
146 W_ gc_count; // number of GCs this thread has done
147
148 // block that is currently being scanned
149 bdescr * scan_bd;
150
151 // Remembered sets on this CPU. Each GC thread has its own
152 // private per-generation remembered sets, so it can add an item
153 // to the remembered set without taking a lock. The mut_lists
154 // array on a gc_thread is the same as the one on the
155 // corresponding Capability; we stash it here too for easy access
156 // during GC; see recordMutableGen_GC().
157 bdescr ** mut_lists;
158
159 // --------------------
160 // evacuate flags
161
162 uint32_t evac_gen_no; // Youngest generation that objects
163 // should be evacuated to in
164 // evacuate(). (Logically an
165 // argument to evacuate, but it's
166 // static a lot of the time so we
167 // optimise it into a per-thread
168 // variable).
169
170 bool failed_to_evac; // failure to evacuate an object typically
171 // Causes it to be recorded in the mutable
172 // object list
173
174 bool eager_promotion; // forces promotion to the evac gen
175 // instead of the to-space
176 // corresponding to the object
177
178 W_ thunk_selector_depth; // used to avoid unbounded recursion in
179 // evacuate() for THUNK_SELECTOR
180
181 // -------------------
182 // stats
183
184 W_ copied;
185 W_ scanned;
186 W_ any_work;
187 W_ no_work;
188 W_ scav_find_work;
189
190 Time gc_start_cpu; // process CPU time
191 Time gc_sync_start_elapsed; // start of GC sync
192 Time gc_start_elapsed; // process elapsed time
193 W_ gc_start_faults;
194
195 // -------------------
196 // workspaces
197
198 // array of workspaces, indexed by gen->abs_no. This is placed
199 // directly at the end of the gc_thread structure so that we can get from
200 // the gc_thread pointer to a workspace using only pointer
201 // arithmetic, no memory access. This happens in the inner loop
202 // of the GC, see Evac.c:alloc_for_copy().
203 gen_workspace gens[];
204 } gc_thread;
205
206
207 extern uint32_t n_gc_threads;
208
209 extern gc_thread **gc_threads;
210
211 #if defined(THREADED_RTS) && defined(llvm_CC_FLAVOR)
212 extern ThreadLocalKey gctKey;
213 #endif
214
215 #include "EndPrivate.h"