@header
Generic hash table library.
-
- @copyright Heng Li
*/
#define AC_VERSION_KHASH_H "0.2.6"
#define __ac_inc(k) ((k) | 1)
#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
+#ifndef kroundup32
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
static const double __ac_HASH_UPPER = 0.77;
#define KHASH_DECLARE(name, khkey_t, khval_t) \
khint_t inc, k, i, last, mask; \
mask = h->n_buckets - 1; \
k = __hash_func(key); i = k & mask; \
- inc = __ac_inc(k) & mask; last = i; \
+ inc = __ac_inc(k) & mask; last = i; /* inc==1 for linear probing */ \
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
i = (i + inc) & mask; \
if (i == last) return h->n_buckets; \
} else return 0; \
} \
SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
- { \
+ { /* This function uses 0.25*n_bucktes bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
khint32_t *new_flags = 0; \
khint_t j = 1; \
{ \
- new_n_buckets = h->n_buckets? h->n_buckets<<1 : 4; \
- if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; \
- else { \
+ kroundup32(new_n_buckets); \
+ if (new_n_buckets < 4) new_n_buckets = 4; \
+ if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \
+ else { /* hash table size to be changed (shrink or expand); rehash */ \
new_flags = (khint32_t*)malloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
- if (h->n_buckets < new_n_buckets) { \
+ if (h->n_buckets < new_n_buckets) { /* expand */ \
h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
- if (kh_is_map) \
- h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
- } \
+ if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
+ } /* otherwise shrink */ \
} \
} \
- if (j) { \
+ if (j) { /* rehashing is needed */ \
for (j = 0; j != h->n_buckets; ++j) { \
if (__ac_iseither(h->flags, j) == 0) { \
khkey_t key = h->keys[j]; \
khval_t val; \
+ khint_t new_mask; \
+ new_mask = new_n_buckets - 1; \
if (kh_is_map) val = h->vals[j]; \
__ac_set_isdel_true(h->flags, j); \
- while (1) { \
- khint_t inc, k, i, new_mask; \
- new_mask = new_n_buckets - 1; \
+ while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
+ khint_t inc, k, i; \
k = __hash_func(key); \
i = k & new_mask; \
inc = __ac_inc(k) & new_mask; \
while (!__ac_isempty(new_flags, i)) i = (i + inc) & new_mask; \
__ac_set_isempty_false(new_flags, i); \
- if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \
+ if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
- __ac_set_isdel_true(h->flags, i); \
- } else { \
+ __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \
+ } else { /* write the element and jump out of the loop */ \
h->keys[i] = key; \
if (kh_is_map) h->vals[i] = val; \
break; \
} \
} \
} \
- if (h->n_buckets > new_n_buckets) { \
+ if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
- if (kh_is_map) \
- h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
+ if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
} \
- free(h->flags); \
+ free(h->flags); /* free the working space */ \
h->flags = new_flags; \
h->n_buckets = new_n_buckets; \
h->n_occupied = h->size; \
SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
{ \
khint_t x; \
- if (h->n_occupied >= h->upper_bound) { \
- if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \
- else kh_resize_##name(h, h->n_buckets + 1); \
- } \
+ if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
+ if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); /* clear "deleted" elements */ \
+ else kh_resize_##name(h, h->n_buckets + 1); /* expand the hash table */ \
+ } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
{ \
khint_t inc, k, i, site, last, mask = h->n_buckets - 1; \
x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
- if (__ac_isempty(h->flags, i)) x = i; \
+ if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
else { \
inc = __ac_inc(k) & mask; last = i; \
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
} \
} \
} \
- if (__ac_isempty(h->flags, x)) { \
+ if (__ac_isempty(h->flags, x)) { /* not present at all */ \
h->keys[x] = key; \
__ac_set_isboth_false(h->flags, x); \
++h->size; ++h->n_occupied; \
*ret = 1; \
- } else if (__ac_isdel(h->flags, x)) { \
+ } else if (__ac_isdel(h->flags, x)) { /* deleted */ \
h->keys[x] = key; \
__ac_set_isboth_false(h->flags, x); \
++h->size; \
*ret = 2; \
- } else *ret = 0; \
+ } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
return x; \
} \
SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \