aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/storage/ipc/shmem.c4
-rw-r--r--src/backend/utils/hash/dynahash.c281
-rw-r--r--src/include/utils/hsearch.h3
3 files changed, 222 insertions, 66 deletions
diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
index 895a43fb39e..e453f856794 100644
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -73,6 +73,7 @@
#include "storage/shmem.h"
#include "storage/spin.h"
#include "utils/builtins.h"
+#include "utils/dynahash.h"
static void *ShmemAllocRaw(Size size, Size *allocated_size);
@@ -346,7 +347,8 @@ ShmemInitHash(const char *name, /* table string name for shmem index */
/* look it up in the shmem index */
location = ShmemInitStruct(name,
- hash_get_shared_size(infoP, hash_flags),
+ hash_get_size(infoP, hash_flags,
+ init_size, true),
&found);
/*
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index 3f25929f2d8..2a5d6efed79 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -260,12 +260,39 @@ static long hash_accesses,
hash_expansions;
#endif
+/* access to parts of the hash table, allocated as a single chunk */
+#define HASH_DIRECTORY_PTR(hashp) \
+ (((char *) (hashp)->hctl) + sizeof(HASHHDR))
+
+#define HASH_SEGMENT_OFFSET(hctl, idx) \
+ (sizeof(HASHHDR) + \
+ ((hctl)->dsize * sizeof(HASHSEGMENT)) + \
+ ((hctl)->ssize * (idx) * sizeof(HASHBUCKET)))
+
+#define HASH_SEGMENT_PTR(hashp, idx) \
+ ((char *) (hashp)->hctl + HASH_SEGMENT_OFFSET((hashp)->hctl, (idx)))
+
+#define HASH_SEGMENT_SIZE(hashp) \
+ ((hashp)->ssize * sizeof(HASHBUCKET))
+
+#define HASH_ELEMENTS_PTR(hashp, nsegs) \
+ ((char *) (hashp)->hctl + HASH_SEGMENT_OFFSET((hashp)->hctl, nsegs))
+
+/* Each element has a HASHELEMENT header plus user data. */
+#define HASH_ELEMENT_SIZE(hctl) \
+ (MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN((hctl)->entrysize))
+
+#define HASH_ELEMENT_NEXT(hctl, num, ptr) \
+ ((char *) (ptr) + ((num) * HASH_ELEMENT_SIZE(hctl)))
+
/*
* Private function prototypes
*/
static void *DynaHashAlloc(Size size);
static HASHSEGMENT seg_alloc(HTAB *hashp);
-static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx);
+static HASHELEMENT *element_alloc(HTAB *hashp, int nelem);
+static void element_add(HTAB *hashp, HASHELEMENT *firstElement,
+ int nelem, int freelist_idx);
static bool dir_realloc(HTAB *hashp);
static bool expand_table(HTAB *hashp);
static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx);
@@ -280,6 +307,9 @@ static int next_pow2_int(long num);
static void register_seq_scan(HTAB *hashp);
static void deregister_seq_scan(HTAB *hashp);
static bool has_seq_scans(HTAB *hashp);
+static void compute_buckets_and_segs(long nelem, long num_partitions,
+ long ssize,
+ int *nbuckets, int *nsegments);
/*
@@ -353,6 +383,8 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
{
HTAB *hashp;
HASHHDR *hctl;
+ int nelem_batch;
+ bool prealloc;
/*
* Hash tables now allocate space for key and data, but you have to say
@@ -507,9 +539,31 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
hashp->isshared = false;
}
+ /* Choose the number of entries to allocate at a time. */
+ nelem_batch = choose_nelem_alloc(info->entrysize);
+
+ /*
+ * Decide whether to pre-allocate elements.
+ *
+ * For a private hash table, preallocate the requested number of elements
+ * if it's less than our chosen nelem_alloc. This avoids wasting space if
+ * the caller correctly estimates a small table size.
+ *
+ * For a shared hash table, preallocate the requested number of elements.
+ * This reduces problems with run-time out-of-shared-memory conditions.
+ */
+ prealloc = (flags & HASH_SHARED_MEM) || (nelem < nelem_batch);
+
+ /*
+ * Allocate the memory needed for hash header, directory, segments and
+ * elements together. Use pointer arithmetic to arrive at the start of
+ * each of these structures later.
+ */
if (!hashp->hctl)
{
- hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR));
+ Size size = hash_get_size(info, flags, nelem, prealloc);
+
+ hashp->hctl = (HASHHDR *) hashp->alloc(size);
if (!hashp->hctl)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
@@ -558,6 +612,9 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
hctl->keysize = info->keysize;
hctl->entrysize = info->entrysize;
+ /* remember how many elements to allocate at once */
+ hctl->nelem_alloc = nelem_batch;
+
/* make local copies of heavily-used constant fields */
hashp->keysize = hctl->keysize;
hashp->ssize = hctl->ssize;
@@ -567,14 +624,6 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
if (!init_htab(hashp, nelem))
elog(ERROR, "failed to initialize hash table \"%s\"", hashp->tabname);
- /*
- * For a shared hash table, preallocate the requested number of elements.
- * This reduces problems with run-time out-of-shared-memory conditions.
- *
- * For a non-shared hash table, preallocate the requested number of
- * elements if it's less than our chosen nelem_alloc. This avoids wasting
- * space if the caller correctly estimates a small table size.
- */
if ((flags & HASH_SHARED_MEM) ||
nelem < hctl->nelem_alloc)
{
@@ -582,6 +631,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
freelist_partitions,
nelem_alloc,
nelem_alloc_first;
+ void *ptr = NULL;
/*
* If hash table is partitioned, give each freelist an equal share of
@@ -606,14 +656,28 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags)
else
nelem_alloc_first = nelem_alloc;
+ /*
+ * Calculate the offset at which to find the first partition of
+ * elements. We have to skip space for the header, segments and
+ * buckets.
+ */
+ ptr = HASH_ELEMENTS_PTR(hashp, hctl->nsegs);
+
+ /*
+ * Assign the correct location of each parition within a pre-allocated
+ * buffer.
+ *
+ * Actual memory allocation happens in ShmemInitHash for shared hash
+ * tables or earlier in this function for non-shared hash tables.
+ *
+ * We just need to split that allocation into per-batch freelists.
+ */
for (i = 0; i < freelist_partitions; i++)
{
int temp = (i == 0) ? nelem_alloc_first : nelem_alloc;
- if (!element_alloc(hashp, temp, i))
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
+ element_add(hashp, (HASHELEMENT *) ptr, temp, i);
+ ptr = HASH_ELEMENT_NEXT(hctl, temp, ptr);
}
}
@@ -702,30 +766,17 @@ init_htab(HTAB *hashp, long nelem)
SpinLockInit(&(hctl->freeList[i].mutex));
/*
- * Allocate space for the next greater power of two number of buckets,
- * assuming a desired maximum load factor of 1.
- */
- nbuckets = next_pow2_int(nelem);
-
- /*
- * In a partitioned table, nbuckets must be at least equal to
- * num_partitions; were it less, keys with apparently different partition
- * numbers would map to the same bucket, breaking partition independence.
- * (Normally nbuckets will be much bigger; this is just a safety check.)
+ * We've already calculated these parameters when we calculated how much
+ * space to allocate in hash_get_size(). Be careful to keep these two
+ * places in sync, so that we get the same parameters.
*/
- while (nbuckets < hctl->num_partitions)
- nbuckets <<= 1;
+ compute_buckets_and_segs(nelem, hctl->num_partitions, hctl->ssize,
+ &nbuckets, &nsegs);
hctl->max_bucket = hctl->low_mask = nbuckets - 1;
hctl->high_mask = (nbuckets << 1) - 1;
/*
- * Figure number of directory segments needed, round up to a power of 2
- */
- nsegs = (nbuckets - 1) / hctl->ssize + 1;
- nsegs = next_pow2_int(nsegs);
-
- /*
* Make sure directory is big enough. If pre-allocated directory is too
* small, choke (caller screwed up).
*/
@@ -737,26 +788,25 @@ init_htab(HTAB *hashp, long nelem)
return false;
}
- /* Allocate a directory */
+ /*
+ * Assign a directory by making it point to the correct location in the
+ * pre-allocated buffer.
+ */
if (!(hashp->dir))
{
CurrentDynaHashCxt = hashp->hcxt;
- hashp->dir = (HASHSEGMENT *)
- hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT));
- if (!hashp->dir)
- return false;
+ hashp->dir = (HASHSEGMENT *) HASH_DIRECTORY_PTR(hashp);
}
- /* Allocate initial segments */
+ /* Assign initial segments, which are also pre-allocated */
+ i = 0;
for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++)
{
- *segp = seg_alloc(hashp);
- if (*segp == NULL)
- return false;
+ *segp = (HASHSEGMENT) HASH_SEGMENT_PTR(hashp, i++);
+ MemSet(*segp, 0, HASH_SEGMENT_SIZE(hashp));
}
- /* Choose number of entries to allocate at a time */
- hctl->nelem_alloc = choose_nelem_alloc(hctl->entrysize);
+ Assert(i == nsegs);
#ifdef HASH_DEBUG
fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n",
@@ -846,16 +896,75 @@ hash_select_dirsize(long num_entries)
}
/*
- * Compute the required initial memory allocation for a shared-memory
- * hashtable with the given parameters. We need space for the HASHHDR
- * and for the (non expansible) directory.
+ * hash_get_size -- determine memory needed for a new dynamic hash table
+ *
+ * info: hash table parameters
+ * flags: bitmask indicating which parameters to take from *info
+ * nelem: maximum number of elements expected
+ * prealloc: request preallocation of elements
+ *
+ * Compute the required initial memory allocation for a hashtable with the given
+ * parameters. We need space for the HASHHDR, for the directory, segments and
+ * preallocated elements.
+ *
+ * The hash table may be private or shared. For shared hash tables the directory
+ * size is non-expansive, and we preallocate all elements (nelem). For private
+ * hash tables, we preallocate elements only if the expected number of elements
+ * is small (less than nelem_alloc).
*/
Size
-hash_get_shared_size(HASHCTL *info, int flags)
+hash_get_size(const HASHCTL *info, int flags, long nelem, bool prealloc)
{
- Assert(flags & HASH_DIRSIZE);
- Assert(info->dsize == info->max_dsize);
- return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT);
+ int nbuckets;
+ int nsegs;
+ int num_partitions;
+ long ssize;
+ long dsize;
+ Size elementSize = HASH_ELEMENT_SIZE(info);
+ long nelem_prealloc = 0;
+
+#ifdef USE_ASSERT_CHECKING
+ /* shared hash tables have non-expansive directory */
+ if (flags & HASH_SHARED_MEM)
+ {
+ Assert(flags & HASH_DIRSIZE);
+ Assert(info->dsize == info->max_dsize);
+ Assert(prealloc);
+ }
+#endif
+
+ /* Non-shared hash tables may not specify dir size */
+ if (flags & HASH_DIRSIZE)
+ dsize = info->dsize;
+ else
+ dsize = DEF_DIRSIZE;
+
+ if (flags & HASH_SEGMENT)
+ ssize = info->ssize;
+ else
+ ssize = DEF_SEGSIZE;
+
+ if (flags & HASH_PARTITION)
+ {
+ num_partitions = info->num_partitions;
+
+ /* Number of entries should be atleast equal to the freelists */
+ if (nelem < NUM_FREELISTS)
+ nelem = NUM_FREELISTS;
+ }
+ else
+ num_partitions = 0;
+
+ compute_buckets_and_segs(nelem, num_partitions, ssize,
+ &nbuckets, &nsegs);
+
+ /* initial_elems as false indicates no elements are to be pre-allocated */
+ if (prealloc)
+ nelem_prealloc = nelem;
+
+ return sizeof(HASHHDR) + dsize * sizeof(HASHSEGMENT)
+ + sizeof(HASHBUCKET) * ssize * nsegs
+ + nelem_prealloc * elementSize;
}
@@ -1285,7 +1394,7 @@ get_hash_entry(HTAB *hashp, int freelist_idx)
* Failing because the needed element is in a different freelist is
* not acceptable.
*/
- if (!element_alloc(hashp, hctl->nelem_alloc, freelist_idx))
+ if ((newElement = element_alloc(hashp, hctl->nelem_alloc)) == NULL)
{
int borrow_from_idx;
@@ -1322,6 +1431,7 @@ get_hash_entry(HTAB *hashp, int freelist_idx)
/* no elements available to borrow either, so out of memory */
return NULL;
}
+ element_add(hashp, newElement, hctl->nelem_alloc, freelist_idx);
}
/* remove entry from freelist, bump nentries */
@@ -1700,29 +1810,43 @@ seg_alloc(HTAB *hashp)
}
/*
- * allocate some new elements and link them into the indicated free list
+ * allocate some new elements
*/
-static bool
-element_alloc(HTAB *hashp, int nelem, int freelist_idx)
+static HASHELEMENT *
+element_alloc(HTAB *hashp, int nelem)
{
HASHHDR *hctl = hashp->hctl;
Size elementSize;
- HASHELEMENT *firstElement;
- HASHELEMENT *tmpElement;
- HASHELEMENT *prevElement;
- int i;
+ HASHELEMENT *firstElement = NULL;
if (hashp->isfixed)
- return false;
+ return NULL;
/* Each element has a HASHELEMENT header plus user data. */
- elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize);
-
+ elementSize = HASH_ELEMENT_SIZE(hctl);
CurrentDynaHashCxt = hashp->hcxt;
firstElement = (HASHELEMENT *) hashp->alloc(nelem * elementSize);
if (!firstElement)
- return false;
+ return NULL;
+
+ return firstElement;
+}
+
+/*
+ * link the elements allocated by element_alloc into the indicated free list
+ */
+static void
+element_add(HTAB *hashp, HASHELEMENT *firstElement, int nelem, int freelist_idx)
+{
+ HASHHDR *hctl = hashp->hctl;
+ Size elementSize;
+ HASHELEMENT *tmpElement;
+ HASHELEMENT *prevElement;
+ int i;
+
+ /* Each element has a HASHELEMENT header plus user data. */
+ elementSize = HASH_ELEMENT_SIZE(hctl);
/* prepare to link all the new entries into the freelist */
prevElement = NULL;
@@ -1744,8 +1868,6 @@ element_alloc(HTAB *hashp, int nelem, int freelist_idx)
if (IS_PARTITIONED(hctl))
SpinLockRelease(&hctl->freeList[freelist_idx].mutex);
-
- return true;
}
/*
@@ -1957,3 +2079,34 @@ AtEOSubXact_HashTables(bool isCommit, int nestDepth)
}
}
}
+
+/*
+ * Calculate the number of buckets and segments to store the given
+ * number of elements in a hash table. Segments contain buckets which
+ * in turn contain elements.
+ */
+static void
+compute_buckets_and_segs(long nelem, long num_partitions, long ssize,
+ int *nbuckets, int *nsegments)
+{
+ /*
+ * Allocate space for the next greater power of two number of buckets,
+ * assuming a desired maximum load factor of 1.
+ */
+ *nbuckets = next_pow2_int(nelem);
+
+ /*
+ * In a partitioned table, nbuckets must be at least equal to
+ * num_partitions; were it less, keys with apparently different partition
+ * numbers would map to the same bucket, breaking partition independence.
+ * (Normally nbuckets will be much bigger; this is just a safety check.)
+ */
+ while ((*nbuckets) < num_partitions)
+ (*nbuckets) <<= 1;
+
+ /*
+ * Figure number of directory segments needed, round up to a power of 2
+ */
+ *nsegments = ((*nbuckets) - 1) / ssize + 1;
+ *nsegments = next_pow2_int(*nsegments);
+}
diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h
index 932cc4f34d9..6c190ce02d4 100644
--- a/src/include/utils/hsearch.h
+++ b/src/include/utils/hsearch.h
@@ -151,7 +151,8 @@ extern void hash_seq_term(HASH_SEQ_STATUS *status);
extern void hash_freeze(HTAB *hashp);
extern Size hash_estimate_size(long num_entries, Size entrysize);
extern long hash_select_dirsize(long num_entries);
-extern Size hash_get_shared_size(HASHCTL *info, int flags);
+extern Size hash_get_size(const HASHCTL *info, int flags,
+ long nelem, bool prealloc);
extern void AtEOXact_HashTables(bool isCommit);
extern void AtEOSubXact_HashTables(bool isCommit, int nestDepth);