From: Attractive Chaos Date: Wed, 25 Dec 2019 23:52:20 +0000 (-0500) Subject: added the C++ version X-Git-Url: http://www.kaiwu.me/postgresql/commit/static/gitweb.js?a=commitdiff_plain;h=2d766f4d12406b18d2caf714d1152419d26422b8;p=klib.git added the C++ version --- diff --git a/cpp/khashl.hpp b/cpp/khashl.hpp new file mode 100644 index 0000000..318eb70 --- /dev/null +++ b/cpp/khashl.hpp @@ -0,0 +1,130 @@ +#ifndef __AC_KHASHL_HPP +#define __AC_KHASHL_HPP + +#include +#include +#include +#include + +namespace klib { + +template, typename khint_t = uint32_t> +class KHashL { + khint_t bits, count; + uint32_t *used; + T *keys; + static inline uint32_t __kh_used(const uint32_t *flag, khint_t i) { return flag[i>>5] >> (i&0x1fU) & 1U; }; + static inline void __kh_set_used(uint32_t *flag, khint_t i) { flag[i>>5] |= 1U<<(i&0x1fU); }; + static inline void __kh_set_unused(uint32_t *flag, khint_t i) { flag[i>>5] &= ~(1U<<(i&0x1fU)); }; + static inline khint_t __kh_fsize(khint_t m) { return m<32? 1 : m>>5; } + static inline uint32_t __kh_h2b(uint32_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); } + static inline uint64_t __kh_h2b(uint64_t hash, khint_t bits) { return hash * 11400714819323198485ULL >> (64 - bits); } +public: + KHashL() : bits(0), count(0), used(0), keys(0) {}; + ~KHashL() { std::free(used); std::free(keys); }; + inline khint_t n_buckets() const { return used? khint_t(1) << bits : 0; } + inline khint_t end() const { return n_buckets(); } + inline khint_t size() const { return count; } + inline T &at(khint_t x) { return keys[x]; }; + inline bool exist(khint_t x) const { return (__kh_used(used, x) != 0); } + void clear(void) { + if (!used) return; + memset(used, 0, __kh_fsize(n_buckets()) * sizeof(uint32_t)); + count = 0; + } + khint_t get(const T &key) const { + khint_t i, last, mask, nb; + if (keys == 0) return 0; + nb = n_buckets(); + mask = nb - khint_t(1); + i = last = __kh_h2b(Hash()(key), bits); + while (__kh_used(used, i) && !Eq()(keys[i], key)) { + i = (i + khint_t(1)) & mask; + if (i == last) return nb; + } + return !__kh_used(used, i)? nb : i; + } + int resize(khint_t new_nb) { + uint32_t *new_used = 0; + khint_t j = 0, x = new_nb, nb, new_bits, new_mask; + while ((x >>= khint_t(1)) != 0) ++j; + if (new_nb & (new_nb - 1)) ++j; + new_bits = j > 2? j : 2; + new_nb = khint_t(1) << new_bits; + if (count > (new_nb>>1) + (new_nb>>2)) return 0; /* requested size is too small */ + new_used = (uint32_t*)std::malloc(__kh_fsize(new_nb) * sizeof(uint32_t)); + memset(new_used, 0, __kh_fsize(new_nb) * sizeof(uint32_t)); + if (!new_used) return -1; /* not enough memory */ + nb = n_buckets(); + if (nb < new_nb) { /* expand */ + T *new_keys = (T*)std::realloc(keys, new_nb * sizeof(T)); + if (!new_keys) { std::free(new_used); return -1; } + keys = new_keys; + } /* otherwise shrink */ + new_mask = new_nb - 1; + for (j = 0; j != nb; ++j) { + if (!__kh_used(used, j)) continue; + T key = keys[j]; + __kh_set_unused(used, j); + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ + khint_t i; + i = __kh_h2b(Hash()(key), new_bits); + while (__kh_used(new_used, i)) i = (i + khint_t(1)) & new_mask; + __kh_set_used(new_used, i); + if (i < nb && __kh_used(used, i)) { /* kick out the existing element */ + { T tmp = keys[i]; keys[i] = key; key = tmp; } + __kh_set_unused(used, i); /* mark it as deleted in the old hash table */ + } else { /* write the element and jump out of the loop */ + keys[i] = key; + break; + } + } + } + if (nb > new_nb) /* shrink the hash table */ + keys = (T*)std::realloc(keys, new_nb * sizeof(T)); + std::free(used); /* free the working space */ + used = new_used, bits = new_bits; + return 0; + } + khint_t put(const T &key, int *absent) { + khint_t nb, i, last, mask; + nb = n_buckets(); + *absent = -1; + if (count >= (nb>>1) + (nb>>2)) { /* rehashing */ + if (resize(nb + khint_t(1)) < 0) + return nb; + nb = n_buckets(); + } /* TODO: to implement automatically shrinking; resize() already support shrinking */ + mask = nb - 1; + i = last = __kh_h2b(Hash()(key), bits); + while (__kh_used(used, i) && !Eq()(keys[i], key)) { + i = (i + 1U) & mask; + if (i == last) break; + } + if (!__kh_used(used, i)) { /* not present at all */ + keys[i] = key; + __kh_set_used(used, i); + ++count; + *absent = 1; + } else *absent = 0; /* Don't touch keys[i] if present */ + return i; + } + int del(khint_t i) { + khint_t j = i, k, mask; + if (keys == 0) return 0; + mask = n_buckets() - khint_t(1); + while (1) { + j = (j + khint_t(1)) & mask; + if (j == i || !__kh_used(used, j)) break; /* j==i only when the table is completely full */ + k = __kh_h2b(Hash()(keys[j]), bits); + if (k <= i || k > j) + keys[i] = keys[j], i = j; + } + __kh_set_unused(used, i); + --count; + return 1; + } +}; +} + +#endif /* __AC_KHASHL_HPP */ diff --git a/khashl.h b/khashl.h index c7e696a..6dc0e18 100644 --- a/khashl.h +++ b/khashl.h @@ -89,17 +89,13 @@ typedef khint32_t khint_t; * Simple private functions * ****************************/ -#ifndef kroundup32 -#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) -#endif - #define __kh_used(flag, i) (flag[i>>5] >> (i&0x1fU) & 1U) #define __kh_set_used(flag, i) (flag[i>>5] |= 1U<<(i&0x1fU)) #define __kh_set_unused(flag, i) (flag[i>>5] &= ~(1U<<(i&0x1fU))) #define __kh_fsize(m) ((m) < 32? 1 : (m)>>5) -static kh_inline khint_t __kh_h2b(uint32_t hash, uint32_t bits) { return hash * 2654435769U >> (32 - bits); } +static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); } /******************* * Hash table base * @@ -107,7 +103,7 @@ static kh_inline khint_t __kh_h2b(uint32_t hash, uint32_t bits) { return hash * #define __KHASHL_TYPE(HType, khkey_t) \ typedef struct { \ - khint32_t bits, count; \ + khint_t bits, count; \ khint32_t *used; \ khkey_t *keys; \ } HType; @@ -156,10 +152,9 @@ static kh_inline khint_t __kh_h2b(uint32_t hash, uint32_t bits) { return hash * #define __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ SCOPE int prefix##_resize(HType *h, khint_t new_n_buckets) { \ khint32_t *new_used = 0; \ - khint_t j, n_buckets, new_bits, new_mask; \ - kroundup32(new_n_buckets); \ - for (j = 0; j < 32; ++j) \ - if (new_n_buckets>>j&1) break; \ + khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; \ + while ((x >>= 1) != 0) ++j; \ + if (new_n_buckets & (new_n_buckets - 1)) ++j; \ new_bits = j > 2? j : 2; \ new_n_buckets = 1U << new_bits; \ if (h->count > (new_n_buckets>>1) + (new_n_buckets>>2)) return 0; /* requested size is too small */ \