Revert "rts, base: Refactor stats.c to improve --machine-readable report"
[ghc.git] / rts / Hash.c
1 /*-----------------------------------------------------------------------------
2 *
3 * (c) The AQUA Project, Glasgow University, 1995-1998
4 * (c) The GHC Team, 1999
5 *
6 * Dynamically expanding linear hash tables, as described in
7 * Per-\AAke Larson, ``Dynamic Hash Tables,'' CACM 31(4), April 1988,
8 * pp. 446 -- 457.
9 * -------------------------------------------------------------------------- */
10
11 #include "PosixSource.h"
12 #include "Rts.h"
13
14 #include "Hash.h"
15 #include "RtsUtils.h"
16 #include "xxhash.h"
17
18 #include <string.h>
19
20 #define HSEGSIZE 1024 /* Size of a single hash table segment */
21 /* Also the minimum size of a hash table */
22 #define HDIRSIZE 1024 /* Size of the segment directory */
23 /* Maximum hash table size is HSEGSIZE * HDIRSIZE */
24 #define HLOAD 5 /* Maximum average load of a single hash bucket */
25
26 #define HCHUNK (1024 * sizeof(W_) / sizeof(HashList))
27 /* Number of HashList cells to allocate in one go */
28
29
30 /* Linked list of (key, data) pairs for separate chaining */
31 typedef struct hashlist {
32 StgWord key;
33 const void *data;
34 struct hashlist *next; /* Next cell in bucket chain (same hash value) */
35 } HashList;
36
37 typedef struct chunklist {
38 HashList *chunk;
39 struct chunklist *next;
40 } HashListChunk;
41
42 struct hashtable {
43 int split; /* Next bucket to split when expanding */
44 int max; /* Max bucket of smaller table */
45 int mask1; /* Mask for doing the mod of h_1 (smaller table) */
46 int mask2; /* Mask for doing the mod of h_2 (larger table) */
47 int kcount; /* Number of keys */
48 int bcount; /* Number of buckets */
49 HashList **dir[HDIRSIZE]; /* Directory of segments */
50 HashList *freeList; /* free list of HashLists */
51 HashListChunk *chunks;
52 HashFunction *hash; /* hash function */
53 CompareFunction *compare; /* key comparison function */
54 };
55
56 /* -----------------------------------------------------------------------------
57 * Hash first using the smaller table. If the bucket is less than the
58 * next bucket to be split, re-hash using the larger table.
59 * -------------------------------------------------------------------------- */
60
61 int
62 hashWord(const HashTable *table, StgWord key)
63 {
64 int bucket;
65
66 /* Strip the boring zero bits */
67 key /= sizeof(StgWord);
68
69 /* Mod the size of the hash table (a power of 2) */
70 bucket = key & table->mask1;
71
72 if (bucket < table->split) {
73 /* Mod the size of the expanded hash table (also a power of 2) */
74 bucket = key & table->mask2;
75 }
76 return bucket;
77 }
78
79 int
80 hashStr(const HashTable *table, char *key)
81 {
82 #ifdef x86_64_HOST_ARCH
83 StgWord h = XXH64 (key, strlen(key), 1048583);
84 #else
85 StgWord h = XXH32 (key, strlen(key), 1048583);
86 #endif
87
88 /* Mod the size of the hash table (a power of 2) */
89 int bucket = h & table->mask1;
90
91 if (bucket < table->split) {
92 /* Mod the size of the expanded hash table (also a power of 2) */
93 bucket = h & table->mask2;
94 }
95
96 return bucket;
97 }
98
99 static int
100 compareWord(StgWord key1, StgWord key2)
101 {
102 return (key1 == key2);
103 }
104
105 static int
106 compareStr(StgWord key1, StgWord key2)
107 {
108 return (strcmp((char *)key1, (char *)key2) == 0);
109 }
110
111
112 /* -----------------------------------------------------------------------------
113 * Allocate a new segment of the dynamically growing hash table.
114 * -------------------------------------------------------------------------- */
115
116 static void
117 allocSegment(HashTable *table, int segment)
118 {
119 table->dir[segment] = stgMallocBytes(HSEGSIZE * sizeof(HashList *),
120 "allocSegment");
121 }
122
123
124 /* -----------------------------------------------------------------------------
125 * Expand the larger hash table by one bucket, and split one bucket
126 * from the smaller table into two parts. Only the bucket referenced
127 * by @table->split@ is affected by the expansion.
128 * -------------------------------------------------------------------------- */
129
130 static void
131 expand(HashTable *table)
132 {
133 int oldsegment;
134 int oldindex;
135 int newbucket;
136 int newsegment;
137 int newindex;
138 HashList *hl;
139 HashList *next;
140 HashList *old, *new;
141
142 if (table->split + table->max >= HDIRSIZE * HSEGSIZE)
143 /* Wow! That's big. Too big, so don't expand. */
144 return;
145
146 /* Calculate indices of bucket to split */
147 oldsegment = table->split / HSEGSIZE;
148 oldindex = table->split % HSEGSIZE;
149
150 newbucket = table->max + table->split;
151
152 /* And the indices of the new bucket */
153 newsegment = newbucket / HSEGSIZE;
154 newindex = newbucket % HSEGSIZE;
155
156 if (newindex == 0)
157 allocSegment(table, newsegment);
158
159 if (++table->split == table->max) {
160 table->split = 0;
161 table->max *= 2;
162 table->mask1 = table->mask2;
163 table->mask2 = table->mask2 << 1 | 1;
164 }
165 table->bcount++;
166
167 /* Split the bucket, paying no attention to the original order */
168
169 old = new = NULL;
170 for (hl = table->dir[oldsegment][oldindex]; hl != NULL; hl = next) {
171 next = hl->next;
172 if (table->hash(table, hl->key) == newbucket) {
173 hl->next = new;
174 new = hl;
175 } else {
176 hl->next = old;
177 old = hl;
178 }
179 }
180 table->dir[oldsegment][oldindex] = old;
181 table->dir[newsegment][newindex] = new;
182
183 return;
184 }
185
186 void *
187 lookupHashTable(const HashTable *table, StgWord key)
188 {
189 int bucket;
190 int segment;
191 int index;
192 HashList *hl;
193
194 bucket = table->hash(table, key);
195 segment = bucket / HSEGSIZE;
196 index = bucket % HSEGSIZE;
197
198 CompareFunction *cmp = table->compare;
199 for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
200 if (cmp(hl->key, key))
201 return (void *) hl->data;
202 }
203
204 /* It's not there */
205 return NULL;
206 }
207
208 // Puts up to szKeys keys of the hash table into the given array. Returns the
209 // actual amount of keys that have been retrieved.
210 //
211 // If the table is modified concurrently, the function behavior is undefined.
212 //
213 int keysHashTable(HashTable *table, StgWord keys[], int szKeys) {
214 int segment, index;
215 int k = 0;
216 HashList *hl;
217
218
219 /* The last bucket with something in it is table->max + table->split - 1 */
220 segment = (table->max + table->split - 1) / HSEGSIZE;
221 index = (table->max + table->split - 1) % HSEGSIZE;
222
223 while (segment >= 0 && k < szKeys) {
224 while (index >= 0 && k < szKeys) {
225 hl = table->dir[segment][index];
226 while (hl && k < szKeys) {
227 keys[k] = hl->key;
228 k += 1;
229 hl = hl->next;
230 }
231 index--;
232 }
233 segment--;
234 index = HSEGSIZE - 1;
235 }
236 return k;
237 }
238
239 /* -----------------------------------------------------------------------------
240 * We allocate the hashlist cells in large chunks to cut down on malloc
241 * overhead. Although we keep a free list of hashlist cells, we make
242 * no effort to actually return the space to the malloc arena.
243 * -------------------------------------------------------------------------- */
244
245 static HashList *
246 allocHashList (HashTable *table)
247 {
248 HashList *hl, *p;
249 HashListChunk *cl;
250
251 if ((hl = table->freeList) != NULL) {
252 table->freeList = hl->next;
253 } else {
254 hl = stgMallocBytes(HCHUNK * sizeof(HashList), "allocHashList");
255 cl = stgMallocBytes(sizeof (*cl), "allocHashList: chunkList");
256 cl->chunk = hl;
257 cl->next = table->chunks;
258 table->chunks = cl;
259
260 table->freeList = hl + 1;
261 for (p = table->freeList; p < hl + HCHUNK - 1; p++)
262 p->next = p + 1;
263 p->next = NULL;
264 }
265 return hl;
266 }
267
268 static void
269 freeHashList (HashTable *table, HashList *hl)
270 {
271 hl->next = table->freeList;
272 table->freeList = hl;
273 }
274
275 void
276 insertHashTable(HashTable *table, StgWord key, const void *data)
277 {
278 int bucket;
279 int segment;
280 int index;
281 HashList *hl;
282
283 // Disable this assert; sometimes it's useful to be able to
284 // overwrite entries in the hash table.
285 // ASSERT(lookupHashTable(table, key) == NULL);
286
287 /* When the average load gets too high, we expand the table */
288 if (++table->kcount >= HLOAD * table->bcount)
289 expand(table);
290
291 bucket = table->hash(table, key);
292 segment = bucket / HSEGSIZE;
293 index = bucket % HSEGSIZE;
294
295 hl = allocHashList(table);
296
297 hl->key = key;
298 hl->data = data;
299 hl->next = table->dir[segment][index];
300 table->dir[segment][index] = hl;
301
302 }
303
304 void *
305 removeHashTable(HashTable *table, StgWord key, const void *data)
306 {
307 int bucket;
308 int segment;
309 int index;
310 HashList *hl;
311 HashList *prev = NULL;
312
313 bucket = table->hash(table, key);
314 segment = bucket / HSEGSIZE;
315 index = bucket % HSEGSIZE;
316
317 for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
318 if (table->compare(hl->key,key) && (data == NULL || hl->data == data)) {
319 if (prev == NULL)
320 table->dir[segment][index] = hl->next;
321 else
322 prev->next = hl->next;
323 freeHashList(table,hl);
324 table->kcount--;
325 return (void *) hl->data;
326 }
327 prev = hl;
328 }
329
330 /* It's not there */
331 ASSERT(data == NULL);
332 return NULL;
333 }
334
335 /* -----------------------------------------------------------------------------
336 * When we free a hash table, we are also good enough to free the
337 * data part of each (key, data) pair, as long as our caller can tell
338 * us how to do it.
339 * -------------------------------------------------------------------------- */
340
341 void
342 freeHashTable(HashTable *table, void (*freeDataFun)(void *) )
343 {
344 long segment;
345 long index;
346 HashList *hl;
347 HashList *next;
348 HashListChunk *cl, *cl_next;
349
350 /* The last bucket with something in it is table->max + table->split - 1 */
351 segment = (table->max + table->split - 1) / HSEGSIZE;
352 index = (table->max + table->split - 1) % HSEGSIZE;
353
354 while (segment >= 0) {
355 while (index >= 0) {
356 for (hl = table->dir[segment][index]; hl != NULL; hl = next) {
357 next = hl->next;
358 if (freeDataFun != NULL)
359 (*freeDataFun)((void *) hl->data);
360 }
361 index--;
362 }
363 stgFree(table->dir[segment]);
364 segment--;
365 index = HSEGSIZE - 1;
366 }
367 for (cl = table->chunks; cl != NULL; cl = cl_next) {
368 cl_next = cl->next;
369 stgFree(cl->chunk);
370 stgFree(cl);
371 }
372 stgFree(table);
373 }
374
375 /* -----------------------------------------------------------------------------
376 * Map a function over all the keys/values in a HashTable
377 * -------------------------------------------------------------------------- */
378
379 void
380 mapHashTable(HashTable *table, void *data, MapHashFn fn)
381 {
382 long segment;
383 long index;
384 HashList *hl;
385
386 /* The last bucket with something in it is table->max + table->split - 1 */
387 segment = (table->max + table->split - 1) / HSEGSIZE;
388 index = (table->max + table->split - 1) % HSEGSIZE;
389
390 while (segment >= 0) {
391 while (index >= 0) {
392 for (hl = table->dir[segment][index]; hl != NULL; hl = hl->next) {
393 fn(data, hl->key, hl->data);
394 }
395 index--;
396 }
397 segment--;
398 index = HSEGSIZE - 1;
399 }
400 }
401
402 /* -----------------------------------------------------------------------------
403 * When we initialize a hash table, we set up the first segment as well,
404 * initializing all of the first segment's hash buckets to NULL.
405 * -------------------------------------------------------------------------- */
406
407 HashTable *
408 allocHashTable_(HashFunction *hash, CompareFunction *compare)
409 {
410 HashTable *table;
411 HashList **hb;
412
413 table = stgMallocBytes(sizeof(HashTable),"allocHashTable");
414
415 allocSegment(table, 0);
416
417 for (hb = table->dir[0]; hb < table->dir[0] + HSEGSIZE; hb++)
418 *hb = NULL;
419
420 table->split = 0;
421 table->max = HSEGSIZE;
422 table->mask1 = HSEGSIZE - 1;
423 table->mask2 = 2 * HSEGSIZE - 1;
424 table->kcount = 0;
425 table->bcount = HSEGSIZE;
426 table->freeList = NULL;
427 table->chunks = NULL;
428 table->hash = hash;
429 table->compare = compare;
430
431 return table;
432 }
433
434 HashTable *
435 allocHashTable(void)
436 {
437 return allocHashTable_(hashWord, compareWord);
438 }
439
440 HashTable *
441 allocStrHashTable(void)
442 {
443 return allocHashTable_((HashFunction *)hashStr,
444 (CompareFunction *)compareStr);
445 }
446
447 void
448 exitHashTable(void)
449 {
450 /* nothing to do */
451 }
452
453 int keyCountHashTable (HashTable *table)
454 {
455 return table->kcount;
456 }