aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/executor')
-rw-r--r--src/backend/executor/execGrouping.c6
-rw-r--r--src/backend/executor/nodeAgg.c42
-rw-r--r--src/backend/executor/nodeHash.c126
-rw-r--r--src/backend/executor/nodeMemoize.c2
4 files changed, 92 insertions, 84 deletions
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 5fd0b26cbc1..c11427a1f66 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -165,14 +165,16 @@ BuildTupleHashTableExt(PlanState *parent,
{
TupleHashTable hashtable;
Size entrysize = sizeof(TupleHashEntryData) + additionalsize;
- int hash_mem = get_hash_mem();
+ Size hash_mem_limit;
MemoryContext oldcontext;
bool allow_jit;
Assert(nbuckets > 0);
/* Limit initial table size request to not more than hash_mem */
- nbuckets = Min(nbuckets, (long) ((hash_mem * 1024L) / entrysize));
+ hash_mem_limit = get_hash_memory_limit() / entrysize;
+ if (nbuckets > hash_mem_limit)
+ nbuckets = hash_mem_limit;
oldcontext = MemoryContextSwitchTo(metacxt);
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 914b02ceee4..39bea204d16 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -1802,15 +1802,15 @@ hash_agg_set_limits(double hashentrysize, double input_groups, int used_bits,
{
int npartitions;
Size partition_mem;
- int hash_mem = get_hash_mem();
+ Size hash_mem_limit = get_hash_memory_limit();
/* if not expected to spill, use all of hash_mem */
- if (input_groups * hashentrysize < hash_mem * 1024L)
+ if (input_groups * hashentrysize <= hash_mem_limit)
{
if (num_partitions != NULL)
*num_partitions = 0;
- *mem_limit = hash_mem * 1024L;
- *ngroups_limit = *mem_limit / hashentrysize;
+ *mem_limit = hash_mem_limit;
+ *ngroups_limit = hash_mem_limit / hashentrysize;
return;
}
@@ -1835,10 +1835,10 @@ hash_agg_set_limits(double hashentrysize, double input_groups, int used_bits,
* minimum number of partitions, so we aren't going to dramatically exceed
* work mem anyway.
*/
- if (hash_mem * 1024L > 4 * partition_mem)
- *mem_limit = hash_mem * 1024L - partition_mem;
+ if (hash_mem_limit > 4 * partition_mem)
+ *mem_limit = hash_mem_limit - partition_mem;
else
- *mem_limit = hash_mem * 1024L * 0.75;
+ *mem_limit = hash_mem_limit * 0.75;
if (*mem_limit > hashentrysize)
*ngroups_limit = *mem_limit / hashentrysize;
@@ -1992,32 +1992,36 @@ static int
hash_choose_num_partitions(double input_groups, double hashentrysize,
int used_bits, int *log2_npartitions)
{
- Size mem_wanted;
- int partition_limit;
+ Size hash_mem_limit = get_hash_memory_limit();
+ double partition_limit;
+ double mem_wanted;
+ double dpartitions;
int npartitions;
int partition_bits;
- int hash_mem = get_hash_mem();
/*
* Avoid creating so many partitions that the memory requirements of the
* open partition files are greater than 1/4 of hash_mem.
*/
partition_limit =
- (hash_mem * 1024L * 0.25 - HASHAGG_READ_BUFFER_SIZE) /
+ (hash_mem_limit * 0.25 - HASHAGG_READ_BUFFER_SIZE) /
HASHAGG_WRITE_BUFFER_SIZE;
mem_wanted = HASHAGG_PARTITION_FACTOR * input_groups * hashentrysize;
/* make enough partitions so that each one is likely to fit in memory */
- npartitions = 1 + (mem_wanted / (hash_mem * 1024L));
+ dpartitions = 1 + (mem_wanted / hash_mem_limit);
+
+ if (dpartitions > partition_limit)
+ dpartitions = partition_limit;
- if (npartitions > partition_limit)
- npartitions = partition_limit;
+ if (dpartitions < HASHAGG_MIN_PARTITIONS)
+ dpartitions = HASHAGG_MIN_PARTITIONS;
+ if (dpartitions > HASHAGG_MAX_PARTITIONS)
+ dpartitions = HASHAGG_MAX_PARTITIONS;
- if (npartitions < HASHAGG_MIN_PARTITIONS)
- npartitions = HASHAGG_MIN_PARTITIONS;
- if (npartitions > HASHAGG_MAX_PARTITIONS)
- npartitions = HASHAGG_MAX_PARTITIONS;
+ /* HASHAGG_MAX_PARTITIONS limit makes this safe */
+ npartitions = (int) dpartitions;
/* ceil(log2(npartitions)) */
partition_bits = my_log2(npartitions);
@@ -2030,7 +2034,7 @@ hash_choose_num_partitions(double input_groups, double hashentrysize,
*log2_npartitions = partition_bits;
/* number of partitions will be a power of two */
- npartitions = 1L << partition_bits;
+ npartitions = 1 << partition_bits;
return npartitions;
}
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index c5f2d1d22b1..73eb074cbf9 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -675,15 +675,12 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
{
int tupsize;
double inner_rel_bytes;
- long bucket_bytes;
- long hash_table_bytes;
- long skew_table_bytes;
- long max_pointers;
- long mppow2;
+ size_t hash_table_bytes;
+ size_t bucket_bytes;
+ size_t max_pointers;
int nbatch = 1;
int nbuckets;
double dbuckets;
- int hash_mem = get_hash_mem();
/* Force a plausible relation size if no info */
if (ntuples <= 0.0)
@@ -700,9 +697,9 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
inner_rel_bytes = ntuples * tupsize;
/*
- * Target in-memory hashtable size is hash_mem kilobytes.
+ * Compute in-memory hashtable size limit from GUCs.
*/
- hash_table_bytes = hash_mem * 1024L;
+ hash_table_bytes = get_hash_memory_limit();
/*
* Parallel Hash tries to use the combined hash_mem of all workers to
@@ -710,7 +707,14 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
* per worker and tries to process batches in parallel.
*/
if (try_combined_hash_mem)
- hash_table_bytes += hash_table_bytes * parallel_workers;
+ {
+ /* Careful, this could overflow size_t */
+ double newlimit;
+
+ newlimit = (double) hash_table_bytes * (double) (parallel_workers + 1);
+ newlimit = Min(newlimit, (double) SIZE_MAX);
+ hash_table_bytes = (size_t) newlimit;
+ }
*space_allowed = hash_table_bytes;
@@ -730,9 +734,12 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
*/
if (useskew)
{
- skew_table_bytes = hash_table_bytes * SKEW_HASH_MEM_PERCENT / 100;
+ size_t bytes_per_mcv;
+ size_t skew_mcvs;
/*----------
+ * Compute number of MCVs we could hold in hash_table_bytes
+ *
* Divisor is:
* size of a hash tuple +
* worst-case size of skewBucket[] per MCV +
@@ -740,12 +747,26 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
* size of skew bucket struct itself
*----------
*/
- *num_skew_mcvs = skew_table_bytes / (tupsize +
- (8 * sizeof(HashSkewBucket *)) +
- sizeof(int) +
- SKEW_BUCKET_OVERHEAD);
- if (*num_skew_mcvs > 0)
- hash_table_bytes -= skew_table_bytes;
+ bytes_per_mcv = tupsize +
+ (8 * sizeof(HashSkewBucket *)) +
+ sizeof(int) +
+ SKEW_BUCKET_OVERHEAD;
+ skew_mcvs = hash_table_bytes / bytes_per_mcv;
+
+ /*
+ * Now scale by SKEW_HASH_MEM_PERCENT (we do it in this order so as
+ * not to worry about size_t overflow in the multiplication)
+ */
+ skew_mcvs = (skew_mcvs * SKEW_HASH_MEM_PERCENT) / 100;
+
+ /* Now clamp to integer range */
+ skew_mcvs = Min(skew_mcvs, INT_MAX);
+
+ *num_skew_mcvs = (int) skew_mcvs;
+
+ /* Reduce hash_table_bytes by the amount needed for the skew table */
+ if (skew_mcvs > 0)
+ hash_table_bytes -= skew_mcvs * bytes_per_mcv;
}
else
*num_skew_mcvs = 0;
@@ -753,22 +774,20 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
/*
* Set nbuckets to achieve an average bucket load of NTUP_PER_BUCKET when
* memory is filled, assuming a single batch; but limit the value so that
- * the pointer arrays we'll try to allocate do not exceed hash_mem nor
- * MaxAllocSize.
+ * the pointer arrays we'll try to allocate do not exceed hash_table_bytes
+ * nor MaxAllocSize.
*
* Note that both nbuckets and nbatch must be powers of 2 to make
* ExecHashGetBucketAndBatch fast.
*/
- max_pointers = *space_allowed / sizeof(HashJoinTuple);
+ max_pointers = hash_table_bytes / sizeof(HashJoinTuple);
max_pointers = Min(max_pointers, MaxAllocSize / sizeof(HashJoinTuple));
/* If max_pointers isn't a power of 2, must round it down to one */
- mppow2 = 1L << my_log2(max_pointers);
- if (max_pointers != mppow2)
- max_pointers = mppow2 / 2;
+ max_pointers = pg_prevpower2_size_t(max_pointers);
/* Also ensure we avoid integer overflow in nbatch and nbuckets */
/* (this step is redundant given the current value of MaxAllocSize) */
- max_pointers = Min(max_pointers, INT_MAX / 2);
+ max_pointers = Min(max_pointers, INT_MAX / 2 + 1);
dbuckets = ceil(ntuples / NTUP_PER_BUCKET);
dbuckets = Min(dbuckets, max_pointers);
@@ -776,7 +795,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
/* don't let nbuckets be really small, though ... */
nbuckets = Max(nbuckets, 1024);
/* ... and force it to be a power of 2. */
- nbuckets = 1 << my_log2(nbuckets);
+ nbuckets = pg_nextpower2_32(nbuckets);
/*
* If there's not enough space to store the projected number of tuples and
@@ -786,10 +805,10 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
if (inner_rel_bytes + bucket_bytes > hash_table_bytes)
{
/* We'll need multiple batches */
- long lbuckets;
+ size_t sbuckets;
double dbatch;
int minbatch;
- long bucket_size;
+ size_t bucket_size;
/*
* If Parallel Hash with combined hash_mem would still need multiple
@@ -813,10 +832,10 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
* overhead for the hash code, pointer to the next tuple, etc.
*/
bucket_size = (tupsize * NTUP_PER_BUCKET + sizeof(HashJoinTuple));
- lbuckets = 1L << my_log2(hash_table_bytes / bucket_size);
- lbuckets = Min(lbuckets, max_pointers);
- nbuckets = (int) lbuckets;
- nbuckets = 1 << my_log2(nbuckets);
+ sbuckets = pg_nextpower2_size_t(hash_table_bytes / bucket_size);
+ sbuckets = Min(sbuckets, max_pointers);
+ nbuckets = (int) sbuckets;
+ nbuckets = pg_nextpower2_32(nbuckets);
bucket_bytes = nbuckets * sizeof(HashJoinTuple);
/*
@@ -1097,14 +1116,12 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable)
/* Figure out how many batches to use. */
if (hashtable->nbatch == 1)
{
- int hash_mem = get_hash_mem();
-
/*
* We are going from single-batch to multi-batch. We need
* to switch from one large combined memory budget to the
* regular hash_mem budget.
*/
- pstate->space_allowed = hash_mem * 1024L;
+ pstate->space_allowed = get_hash_memory_limit();
/*
* The combined hash_mem of all participants wasn't
@@ -1113,7 +1130,7 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable)
* insufficient. So try two batches per participant,
* rounded up to a power of two.
*/
- new_nbatch = 1 << my_log2(pstate->nparticipants * 2);
+ new_nbatch = pg_nextpower2_32(pstate->nparticipants * 2);
}
else
{
@@ -1152,7 +1169,7 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable)
MaxAllocSize / sizeof(dsa_pointer_atomic));
new_nbuckets = (int) dbuckets;
new_nbuckets = Max(new_nbuckets, 1024);
- new_nbuckets = 1 << my_log2(new_nbuckets);
+ new_nbuckets = pg_nextpower2_32(new_nbuckets);
dsa_free(hashtable->area, old_batch0->buckets);
hashtable->batches[0].shared->buckets =
dsa_allocate(hashtable->area,
@@ -3372,39 +3389,24 @@ ExecParallelHashTuplePrealloc(HashJoinTable hashtable, int batchno, size_t size)
}
/*
- * Get a hash_mem value by multiplying the work_mem GUC's value by the
- * hash_mem_multiplier GUC's value.
+ * Calculate the limit on how much memory can be used by Hash and similar
+ * plan types. This is work_mem times hash_mem_multiplier, and is
+ * expressed in bytes.
*
- * Returns a work_mem style KB value that hash-based nodes (including but not
- * limited to hash join) use in place of work_mem. This is subject to the
- * same restrictions as work_mem itself. (There is no such thing as the
- * hash_mem GUC, but it's convenient for our callers to pretend that there
- * is.)
- *
- * Exported for use by the planner, as well as other hash-based executor
+ * Exported for use by the planner, as well as other hash-like executor
* nodes. This is a rather random place for this, but there is no better
* place.
*/
-int
-get_hash_mem(void)
+size_t
+get_hash_memory_limit(void)
{
- double hash_mem;
+ double mem_limit;
- Assert(hash_mem_multiplier >= 1.0);
+ /* Do initial calculation in double arithmetic */
+ mem_limit = (double) work_mem * hash_mem_multiplier * 1024.0;
- hash_mem = (double) work_mem * hash_mem_multiplier;
-
- /*
- * guc.c enforces a MAX_KILOBYTES limitation on work_mem in order to
- * support the assumption that raw derived byte values can be stored in
- * 'long' variables. The returned hash_mem value must also meet this
- * assumption.
- *
- * We clamp the final value rather than throw an error because it should
- * be possible to set work_mem and hash_mem_multiplier independently.
- */
- if (hash_mem < MAX_KILOBYTES)
- return (int) hash_mem;
+ /* Clamp in case it doesn't fit in size_t */
+ mem_limit = Min(mem_limit, (double) SIZE_MAX);
- return MAX_KILOBYTES;
+ return (size_t) mem_limit;
}
diff --git a/src/backend/executor/nodeMemoize.c b/src/backend/executor/nodeMemoize.c
index 2fde4ebce69..bec588b3a04 100644
--- a/src/backend/executor/nodeMemoize.c
+++ b/src/backend/executor/nodeMemoize.c
@@ -905,7 +905,7 @@ ExecInitMemoize(Memoize *node, EState *estate, int eflags)
mstate->mem_used = 0;
/* Limit the total memory consumed by the cache to this */
- mstate->mem_limit = get_hash_mem() * 1024L;
+ mstate->mem_limit = get_hash_memory_limit();
/* A memory context dedicated for the cache */
mstate->tableContext = AllocSetContextCreate(CurrentMemoryContext,