diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/storage/ipc/shmem.c | 4 | ||||
-rw-r--r-- | src/backend/utils/hash/dynahash.c | 281 | ||||
-rw-r--r-- | src/include/utils/hsearch.h | 3 |
3 files changed, 222 insertions, 66 deletions
diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c index 895a43fb39e..e453f856794 100644 --- a/src/backend/storage/ipc/shmem.c +++ b/src/backend/storage/ipc/shmem.c @@ -73,6 +73,7 @@ #include "storage/shmem.h" #include "storage/spin.h" #include "utils/builtins.h" +#include "utils/dynahash.h" static void *ShmemAllocRaw(Size size, Size *allocated_size); @@ -346,7 +347,8 @@ ShmemInitHash(const char *name, /* table string name for shmem index */ /* look it up in the shmem index */ location = ShmemInitStruct(name, - hash_get_shared_size(infoP, hash_flags), + hash_get_size(infoP, hash_flags, + init_size, true), &found); /* diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c index 3f25929f2d8..2a5d6efed79 100644 --- a/src/backend/utils/hash/dynahash.c +++ b/src/backend/utils/hash/dynahash.c @@ -260,12 +260,39 @@ static long hash_accesses, hash_expansions; #endif +/* access to parts of the hash table, allocated as a single chunk */ +#define HASH_DIRECTORY_PTR(hashp) \ + (((char *) (hashp)->hctl) + sizeof(HASHHDR)) + +#define HASH_SEGMENT_OFFSET(hctl, idx) \ + (sizeof(HASHHDR) + \ + ((hctl)->dsize * sizeof(HASHSEGMENT)) + \ + ((hctl)->ssize * (idx) * sizeof(HASHBUCKET))) + +#define HASH_SEGMENT_PTR(hashp, idx) \ + ((char *) (hashp)->hctl + HASH_SEGMENT_OFFSET((hashp)->hctl, (idx))) + +#define HASH_SEGMENT_SIZE(hashp) \ + ((hashp)->ssize * sizeof(HASHBUCKET)) + +#define HASH_ELEMENTS_PTR(hashp, nsegs) \ + ((char *) (hashp)->hctl + HASH_SEGMENT_OFFSET((hashp)->hctl, nsegs)) + +/* Each element has a HASHELEMENT header plus user data. */ +#define HASH_ELEMENT_SIZE(hctl) \ + (MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN((hctl)->entrysize)) + +#define HASH_ELEMENT_NEXT(hctl, num, ptr) \ + ((char *) (ptr) + ((num) * HASH_ELEMENT_SIZE(hctl))) + /* * Private function prototypes */ static void *DynaHashAlloc(Size size); static HASHSEGMENT seg_alloc(HTAB *hashp); -static bool element_alloc(HTAB *hashp, int nelem, int freelist_idx); +static HASHELEMENT *element_alloc(HTAB *hashp, int nelem); +static void element_add(HTAB *hashp, HASHELEMENT *firstElement, + int nelem, int freelist_idx); static bool dir_realloc(HTAB *hashp); static bool expand_table(HTAB *hashp); static HASHBUCKET get_hash_entry(HTAB *hashp, int freelist_idx); @@ -280,6 +307,9 @@ static int next_pow2_int(long num); static void register_seq_scan(HTAB *hashp); static void deregister_seq_scan(HTAB *hashp); static bool has_seq_scans(HTAB *hashp); +static void compute_buckets_and_segs(long nelem, long num_partitions, + long ssize, + int *nbuckets, int *nsegments); /* @@ -353,6 +383,8 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) { HTAB *hashp; HASHHDR *hctl; + int nelem_batch; + bool prealloc; /* * Hash tables now allocate space for key and data, but you have to say @@ -507,9 +539,31 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) hashp->isshared = false; } + /* Choose the number of entries to allocate at a time. */ + nelem_batch = choose_nelem_alloc(info->entrysize); + + /* + * Decide whether to pre-allocate elements. + * + * For a private hash table, preallocate the requested number of elements + * if it's less than our chosen nelem_alloc. This avoids wasting space if + * the caller correctly estimates a small table size. + * + * For a shared hash table, preallocate the requested number of elements. + * This reduces problems with run-time out-of-shared-memory conditions. + */ + prealloc = (flags & HASH_SHARED_MEM) || (nelem < nelem_batch); + + /* + * Allocate the memory needed for hash header, directory, segments and + * elements together. Use pointer arithmetic to arrive at the start of + * each of these structures later. + */ if (!hashp->hctl) { - hashp->hctl = (HASHHDR *) hashp->alloc(sizeof(HASHHDR)); + Size size = hash_get_size(info, flags, nelem, prealloc); + + hashp->hctl = (HASHHDR *) hashp->alloc(size); if (!hashp->hctl) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), @@ -558,6 +612,9 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) hctl->keysize = info->keysize; hctl->entrysize = info->entrysize; + /* remember how many elements to allocate at once */ + hctl->nelem_alloc = nelem_batch; + /* make local copies of heavily-used constant fields */ hashp->keysize = hctl->keysize; hashp->ssize = hctl->ssize; @@ -567,14 +624,6 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) if (!init_htab(hashp, nelem)) elog(ERROR, "failed to initialize hash table \"%s\"", hashp->tabname); - /* - * For a shared hash table, preallocate the requested number of elements. - * This reduces problems with run-time out-of-shared-memory conditions. - * - * For a non-shared hash table, preallocate the requested number of - * elements if it's less than our chosen nelem_alloc. This avoids wasting - * space if the caller correctly estimates a small table size. - */ if ((flags & HASH_SHARED_MEM) || nelem < hctl->nelem_alloc) { @@ -582,6 +631,7 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) freelist_partitions, nelem_alloc, nelem_alloc_first; + void *ptr = NULL; /* * If hash table is partitioned, give each freelist an equal share of @@ -606,14 +656,28 @@ hash_create(const char *tabname, long nelem, const HASHCTL *info, int flags) else nelem_alloc_first = nelem_alloc; + /* + * Calculate the offset at which to find the first partition of + * elements. We have to skip space for the header, segments and + * buckets. + */ + ptr = HASH_ELEMENTS_PTR(hashp, hctl->nsegs); + + /* + * Assign the correct location of each parition within a pre-allocated + * buffer. + * + * Actual memory allocation happens in ShmemInitHash for shared hash + * tables or earlier in this function for non-shared hash tables. + * + * We just need to split that allocation into per-batch freelists. + */ for (i = 0; i < freelist_partitions; i++) { int temp = (i == 0) ? nelem_alloc_first : nelem_alloc; - if (!element_alloc(hashp, temp, i)) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); + element_add(hashp, (HASHELEMENT *) ptr, temp, i); + ptr = HASH_ELEMENT_NEXT(hctl, temp, ptr); } } @@ -702,30 +766,17 @@ init_htab(HTAB *hashp, long nelem) SpinLockInit(&(hctl->freeList[i].mutex)); /* - * Allocate space for the next greater power of two number of buckets, - * assuming a desired maximum load factor of 1. - */ - nbuckets = next_pow2_int(nelem); - - /* - * In a partitioned table, nbuckets must be at least equal to - * num_partitions; were it less, keys with apparently different partition - * numbers would map to the same bucket, breaking partition independence. - * (Normally nbuckets will be much bigger; this is just a safety check.) + * We've already calculated these parameters when we calculated how much + * space to allocate in hash_get_size(). Be careful to keep these two + * places in sync, so that we get the same parameters. */ - while (nbuckets < hctl->num_partitions) - nbuckets <<= 1; + compute_buckets_and_segs(nelem, hctl->num_partitions, hctl->ssize, + &nbuckets, &nsegs); hctl->max_bucket = hctl->low_mask = nbuckets - 1; hctl->high_mask = (nbuckets << 1) - 1; /* - * Figure number of directory segments needed, round up to a power of 2 - */ - nsegs = (nbuckets - 1) / hctl->ssize + 1; - nsegs = next_pow2_int(nsegs); - - /* * Make sure directory is big enough. If pre-allocated directory is too * small, choke (caller screwed up). */ @@ -737,26 +788,25 @@ init_htab(HTAB *hashp, long nelem) return false; } - /* Allocate a directory */ + /* + * Assign a directory by making it point to the correct location in the + * pre-allocated buffer. + */ if (!(hashp->dir)) { CurrentDynaHashCxt = hashp->hcxt; - hashp->dir = (HASHSEGMENT *) - hashp->alloc(hctl->dsize * sizeof(HASHSEGMENT)); - if (!hashp->dir) - return false; + hashp->dir = (HASHSEGMENT *) HASH_DIRECTORY_PTR(hashp); } - /* Allocate initial segments */ + /* Assign initial segments, which are also pre-allocated */ + i = 0; for (segp = hashp->dir; hctl->nsegs < nsegs; hctl->nsegs++, segp++) { - *segp = seg_alloc(hashp); - if (*segp == NULL) - return false; + *segp = (HASHSEGMENT) HASH_SEGMENT_PTR(hashp, i++); + MemSet(*segp, 0, HASH_SEGMENT_SIZE(hashp)); } - /* Choose number of entries to allocate at a time */ - hctl->nelem_alloc = choose_nelem_alloc(hctl->entrysize); + Assert(i == nsegs); #ifdef HASH_DEBUG fprintf(stderr, "init_htab:\n%s%p\n%s%ld\n%s%ld\n%s%d\n%s%ld\n%s%u\n%s%x\n%s%x\n%s%ld\n", @@ -846,16 +896,75 @@ hash_select_dirsize(long num_entries) } /* - * Compute the required initial memory allocation for a shared-memory - * hashtable with the given parameters. We need space for the HASHHDR - * and for the (non expansible) directory. + * hash_get_size -- determine memory needed for a new dynamic hash table + * + * info: hash table parameters + * flags: bitmask indicating which parameters to take from *info + * nelem: maximum number of elements expected + * prealloc: request preallocation of elements + * + * Compute the required initial memory allocation for a hashtable with the given + * parameters. We need space for the HASHHDR, for the directory, segments and + * preallocated elements. + * + * The hash table may be private or shared. For shared hash tables the directory + * size is non-expansive, and we preallocate all elements (nelem). For private + * hash tables, we preallocate elements only if the expected number of elements + * is small (less than nelem_alloc). */ Size -hash_get_shared_size(HASHCTL *info, int flags) +hash_get_size(const HASHCTL *info, int flags, long nelem, bool prealloc) { - Assert(flags & HASH_DIRSIZE); - Assert(info->dsize == info->max_dsize); - return sizeof(HASHHDR) + info->dsize * sizeof(HASHSEGMENT); + int nbuckets; + int nsegs; + int num_partitions; + long ssize; + long dsize; + Size elementSize = HASH_ELEMENT_SIZE(info); + long nelem_prealloc = 0; + +#ifdef USE_ASSERT_CHECKING + /* shared hash tables have non-expansive directory */ + if (flags & HASH_SHARED_MEM) + { + Assert(flags & HASH_DIRSIZE); + Assert(info->dsize == info->max_dsize); + Assert(prealloc); + } +#endif + + /* Non-shared hash tables may not specify dir size */ + if (flags & HASH_DIRSIZE) + dsize = info->dsize; + else + dsize = DEF_DIRSIZE; + + if (flags & HASH_SEGMENT) + ssize = info->ssize; + else + ssize = DEF_SEGSIZE; + + if (flags & HASH_PARTITION) + { + num_partitions = info->num_partitions; + + /* Number of entries should be atleast equal to the freelists */ + if (nelem < NUM_FREELISTS) + nelem = NUM_FREELISTS; + } + else + num_partitions = 0; + + compute_buckets_and_segs(nelem, num_partitions, ssize, + &nbuckets, &nsegs); + + /* initial_elems as false indicates no elements are to be pre-allocated */ + if (prealloc) + nelem_prealloc = nelem; + + return sizeof(HASHHDR) + dsize * sizeof(HASHSEGMENT) + + sizeof(HASHBUCKET) * ssize * nsegs + + nelem_prealloc * elementSize; } @@ -1285,7 +1394,7 @@ get_hash_entry(HTAB *hashp, int freelist_idx) * Failing because the needed element is in a different freelist is * not acceptable. */ - if (!element_alloc(hashp, hctl->nelem_alloc, freelist_idx)) + if ((newElement = element_alloc(hashp, hctl->nelem_alloc)) == NULL) { int borrow_from_idx; @@ -1322,6 +1431,7 @@ get_hash_entry(HTAB *hashp, int freelist_idx) /* no elements available to borrow either, so out of memory */ return NULL; } + element_add(hashp, newElement, hctl->nelem_alloc, freelist_idx); } /* remove entry from freelist, bump nentries */ @@ -1700,29 +1810,43 @@ seg_alloc(HTAB *hashp) } /* - * allocate some new elements and link them into the indicated free list + * allocate some new elements */ -static bool -element_alloc(HTAB *hashp, int nelem, int freelist_idx) +static HASHELEMENT * +element_alloc(HTAB *hashp, int nelem) { HASHHDR *hctl = hashp->hctl; Size elementSize; - HASHELEMENT *firstElement; - HASHELEMENT *tmpElement; - HASHELEMENT *prevElement; - int i; + HASHELEMENT *firstElement = NULL; if (hashp->isfixed) - return false; + return NULL; /* Each element has a HASHELEMENT header plus user data. */ - elementSize = MAXALIGN(sizeof(HASHELEMENT)) + MAXALIGN(hctl->entrysize); - + elementSize = HASH_ELEMENT_SIZE(hctl); CurrentDynaHashCxt = hashp->hcxt; firstElement = (HASHELEMENT *) hashp->alloc(nelem * elementSize); if (!firstElement) - return false; + return NULL; + + return firstElement; +} + +/* + * link the elements allocated by element_alloc into the indicated free list + */ +static void +element_add(HTAB *hashp, HASHELEMENT *firstElement, int nelem, int freelist_idx) +{ + HASHHDR *hctl = hashp->hctl; + Size elementSize; + HASHELEMENT *tmpElement; + HASHELEMENT *prevElement; + int i; + + /* Each element has a HASHELEMENT header plus user data. */ + elementSize = HASH_ELEMENT_SIZE(hctl); /* prepare to link all the new entries into the freelist */ prevElement = NULL; @@ -1744,8 +1868,6 @@ element_alloc(HTAB *hashp, int nelem, int freelist_idx) if (IS_PARTITIONED(hctl)) SpinLockRelease(&hctl->freeList[freelist_idx].mutex); - - return true; } /* @@ -1957,3 +2079,34 @@ AtEOSubXact_HashTables(bool isCommit, int nestDepth) } } } + +/* + * Calculate the number of buckets and segments to store the given + * number of elements in a hash table. Segments contain buckets which + * in turn contain elements. + */ +static void +compute_buckets_and_segs(long nelem, long num_partitions, long ssize, + int *nbuckets, int *nsegments) +{ + /* + * Allocate space for the next greater power of two number of buckets, + * assuming a desired maximum load factor of 1. + */ + *nbuckets = next_pow2_int(nelem); + + /* + * In a partitioned table, nbuckets must be at least equal to + * num_partitions; were it less, keys with apparently different partition + * numbers would map to the same bucket, breaking partition independence. + * (Normally nbuckets will be much bigger; this is just a safety check.) + */ + while ((*nbuckets) < num_partitions) + (*nbuckets) <<= 1; + + /* + * Figure number of directory segments needed, round up to a power of 2 + */ + *nsegments = ((*nbuckets) - 1) / ssize + 1; + *nsegments = next_pow2_int(*nsegments); +} diff --git a/src/include/utils/hsearch.h b/src/include/utils/hsearch.h index 932cc4f34d9..6c190ce02d4 100644 --- a/src/include/utils/hsearch.h +++ b/src/include/utils/hsearch.h @@ -151,7 +151,8 @@ extern void hash_seq_term(HASH_SEQ_STATUS *status); extern void hash_freeze(HTAB *hashp); extern Size hash_estimate_size(long num_entries, Size entrysize); extern long hash_select_dirsize(long num_entries); -extern Size hash_get_shared_size(HASHCTL *info, int flags); +extern Size hash_get_size(const HASHCTL *info, int flags, + long nelem, bool prealloc); extern void AtEOXact_HashTables(bool isCommit); extern void AtEOSubXact_HashTables(bool isCommit, int nestDepth); |