aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorThomas Munro <tmunro@postgresql.org>2020-08-01 12:16:15 +1200
committerThomas Munro <tmunro@postgresql.org>2020-08-01 12:16:15 +1200
commit84c0e4b9bce794da914fe9c062753bf21369745f (patch)
tree67e2fa3533e5d38527ae5346638e7c910943a379 /src
parentc79aed4f793086300abfc188def94b5c0bd0b45d (diff)
downloadpostgresql-84c0e4b9bce794da914fe9c062753bf21369745f.tar.gz
postgresql-84c0e4b9bce794da914fe9c062753bf21369745f.zip
Improve programmer docs for simplehash and dynahash.
When reading the code it's not obvious when one should prefer dynahash over simplehash and vice-versa, so, for programmer-friendliness, add comments to inform that decision. Show sample simplehash method signatures. Author: James Coleman <jtc331@gmail.com> Discussion: https://postgr.es/m/CAAaqYe_dOF39gAJ8rL-a3YO3Qo96MHMRQ2whFjK5ZcU6YvMQSA%40mail.gmail.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/hash/dynahash.c12
-rw-r--r--src/include/lib/simplehash.h73
2 files changed, 80 insertions, 5 deletions
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index 5948b01abc3..f4fbccdd7e4 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -1,7 +1,7 @@
/*-------------------------------------------------------------------------
*
* dynahash.c
- * dynamic hash tables
+ * dynamic chained hash tables
*
* dynahash.c supports both local-to-a-backend hash tables and hash tables in
* shared memory. For shared hash tables, it is the caller's responsibility
@@ -41,6 +41,16 @@
* function must be supplied; comparison defaults to memcmp() and key copying
* to memcpy() when a user-defined hashing function is selected.
*
+ * Compared to simplehash, dynahash has the following benefits:
+ *
+ * - It supports partitioning, which is useful for shared memory access using
+ * locks.
+ * - Shared memory hashes are allocated in a fixed size area at startup and
+ * are discoverable by name from other processes.
+ * - Because entries don't need to be moved in the case of hash conflicts, has
+ * better performance for large entries
+ * - Guarantees stable pointers to entries.
+ *
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
diff --git a/src/include/lib/simplehash.h b/src/include/lib/simplehash.h
index 90dfa8a695d..96f0c21f606 100644
--- a/src/include/lib/simplehash.h
+++ b/src/include/lib/simplehash.h
@@ -1,10 +1,27 @@
/*
* simplehash.h
*
- * Hash table implementation which will be specialized to user-defined
- * types, by including this file to generate the required code. It's
- * probably not worthwhile to do so for hash tables that aren't performance
- * or space sensitive.
+ * When included this file generates a "templated" (by way of macros)
+ * open-addressing hash table implementation specialized to user-defined
+ * types.
+ *
+ * It's probably not worthwhile to generate such a specialized implementation
+ * for hash tables that aren't performance or space sensitive.
+ *
+ * Compared to dynahash, simplehash has the following benefits:
+ *
+ * - Due to the "templated" code generation has known structure sizes and no
+ * indirect function calls (which show up substantially in dynahash
+ * profiles). These features considerably increase speed for small
+ * entries.
+ * - Open addressing has better CPU cache behavior than dynahash's chained
+ * hashtables.
+ * - The generated interface is type-safe and easier to use than dynahash,
+ * though at the cost of more complex setup.
+ * - Allocates memory in a MemoryContext or another allocator with a
+ * malloc/free style interface (which isn't easily usable in a shared
+ * memory context)
+ * - Does not require the overhead of a separate memory context.
*
* Usage notes:
*
@@ -34,6 +51,19 @@
* - SH_STORE_HASH - if defined the hash is stored in the elements
* - SH_GET_HASH(tb, a) - return the field to store the hash in
*
+ * The element type is required to contain a "uint32 status" member.
+ *
+ * While SH_STORE_HASH (and subsequently SH_GET_HASH) are optional, because
+ * the hash table implementation needs to compare hashes to move elements
+ * (particularly when growing the hash), it's preferable, if possible, to
+ * store the element's hash in the element's data type. If the hash is so
+ * stored, the hash table will also compare hashes before calling SH_EQUAL
+ * when comparing two keys.
+ *
+ * For convenience the hash table create functions accept a void pointer
+ * that will be stored in the hash table type's member private_data. This
+ * allows callbacks to reference caller provided data.
+ *
* For examples of usage look at tidbitmap.c (file local definition) and
* execnodes.h/execGrouping.c (exposed declaration, file local
* implementation).
@@ -149,24 +179,59 @@ typedef struct SH_ITERATOR
/* externally visible function prototypes */
#ifdef SH_RAW_ALLOCATOR
+/* <prefix>_hash <prefix>_create(uint32 nelements, void *private_data) */
SH_SCOPE SH_TYPE *SH_CREATE(uint32 nelements, void *private_data);
#else
+/*
+ * <prefix>_hash <prefix>_create(MemoryContext ctx, uint32 nelements,
+ * void *private_data)
+ */
SH_SCOPE SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements,
void *private_data);
#endif
+
+/* void <prefix>_destroy(<prefix>_hash *tb) */
SH_SCOPE void SH_DESTROY(SH_TYPE * tb);
+
+/* void <prefix>_reset(<prefix>_hash *tb) */
SH_SCOPE void SH_RESET(SH_TYPE * tb);
+
+/* void <prefix>_grow(<prefix>_hash *tb) */
SH_SCOPE void SH_GROW(SH_TYPE * tb, uint32 newsize);
+
+/* <element> *<prefix>_insert(<prefix>_hash *tb, <key> key, bool *found) */
SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
+
+/*
+ * <element> *<prefix>_insert_hash(<prefix>_hash *tb, <key> key, uint32 hash,
+ * bool *found)
+ */
SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
uint32 hash, bool *found);
+
+/* <element> *<prefix>_lookup(<prefix>_hash *tb, <key> key) */
SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key);
+
+/* <element> *<prefix>_lookup_hash(<prefix>_hash *tb, <key> key, uint32 hash) */
SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP_HASH(SH_TYPE * tb, SH_KEY_TYPE key,
uint32 hash);
+
+/* bool <prefix>_delete(<prefix>_hash *tb, <key> key) */
SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key);
+
+/* void <prefix>_start_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
SH_SCOPE void SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
+
+/*
+ * void <prefix>_start_iterate_at(<prefix>_hash *tb, <prefix>_iterator *iter,
+ * uint32 at)
+ */
SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at);
+
+/* <element> *<prefix>_iterate(<prefix>_hash *tb, <prefix>_iterator *iter) */
SH_SCOPE SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
+
+/* void <prefix>_stat(<prefix>_hash *tb */
SH_SCOPE void SH_STAT(SH_TYPE * tb);
#endif /* SH_DECLARE */