diff options
Diffstat (limited to 'src/backend/executor/nodeHash.c')
-rw-r--r-- | src/backend/executor/nodeHash.c | 75 |
1 files changed, 41 insertions, 34 deletions
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 3ef7cfbb5bf..b3203ba7fa4 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -386,7 +386,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls) */ /* Target bucket loading (tuples per bucket) */ -#define NTUP_PER_BUCKET 10 +#define NTUP_PER_BUCKET 1 void ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew, @@ -396,12 +396,13 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew, { int tupsize; double inner_rel_bytes; + long bucket_bytes; long hash_table_bytes; long skew_table_bytes; long max_pointers; - int nbatch; + int nbatch = 1; int nbuckets; - int i; + double dbuckets; /* Force a plausible relation size if no info */ if (ntuples <= 0.0) @@ -460,56 +461,61 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew, /* * Set nbuckets to achieve an average bucket load of NTUP_PER_BUCKET when - * memory is filled. Set nbatch to the smallest power of 2 that appears - * sufficient. The Min() steps limit the results so that the pointer - * arrays we'll try to allocate do not exceed work_mem. + * memory is filled, assuming a single batch. The Min() step limits the + * results so that the pointer arrays we'll try to allocate do not exceed + * work_mem. */ max_pointers = (work_mem * 1024L) / sizeof(void *); /* also ensure we avoid integer overflow in nbatch and nbuckets */ max_pointers = Min(max_pointers, INT_MAX / 2); + dbuckets = ceil(ntuples / NTUP_PER_BUCKET); + dbuckets = Min(dbuckets, max_pointers); + nbuckets = Max((int) dbuckets, 1024); + nbuckets = 1 << my_log2(nbuckets); + bucket_bytes = sizeof(HashJoinTuple) * nbuckets; - if (inner_rel_bytes > hash_table_bytes) + /* + * If there's not enough space to store the projected number of tuples + * and the required bucket headers, we will need multiple batches. + */ + if (inner_rel_bytes + bucket_bytes > hash_table_bytes) { /* We'll need multiple batches */ long lbuckets; double dbatch; int minbatch; + long bucket_size; - lbuckets = (hash_table_bytes / tupsize) / NTUP_PER_BUCKET; + /* + * Estimate the number of buckets we'll want to have when work_mem + * is entirely full. Each bucket will contain a bucket pointer plus + * NTUP_PER_BUCKET tuples, whose projected size already includes + * overhead for the hash code, pointer to the next tuple, etc. + */ + bucket_size = (tupsize * NTUP_PER_BUCKET + sizeof(HashJoinTuple)); + lbuckets = 1 << my_log2(hash_table_bytes / bucket_size); lbuckets = Min(lbuckets, max_pointers); nbuckets = (int) lbuckets; + bucket_bytes = nbuckets * sizeof(HashJoinTuple); + + /* + * Buckets are simple pointers to hashjoin tuples, while tupsize + * includes the pointer, hash code, and MinimalTupleData. So buckets + * should never really exceed 25% of work_mem (even for + * NTUP_PER_BUCKET=1); except maybe * for work_mem values that are + * not 2^N bytes, where we might get more * because of doubling. + * So let's look for 50% here. + */ + Assert(bucket_bytes <= hash_table_bytes / 2); - dbatch = ceil(inner_rel_bytes / hash_table_bytes); + /* Calculate required number of batches. */ + dbatch = ceil(inner_rel_bytes / (hash_table_bytes - bucket_bytes)); dbatch = Min(dbatch, max_pointers); minbatch = (int) dbatch; nbatch = 2; while (nbatch < minbatch) nbatch <<= 1; } - else - { - /* We expect the hashtable to fit in memory */ - double dbuckets; - - dbuckets = ceil(ntuples / NTUP_PER_BUCKET); - dbuckets = Min(dbuckets, max_pointers); - nbuckets = (int) dbuckets; - - nbatch = 1; - } - - /* - * Both nbuckets and nbatch must be powers of 2 to make - * ExecHashGetBucketAndBatch fast. We already fixed nbatch; now inflate - * nbuckets to the next larger power of 2. We also force nbuckets to not - * be real small, by starting the search at 2^10. (Note: above we made - * sure that nbuckets is not more than INT_MAX / 2, so this loop cannot - * overflow, nor can the final shift to recalculate nbuckets.) - */ - i = 10; - while ((1 << i) < nbuckets) - i++; - nbuckets = (1 << i); *numbuckets = nbuckets; *numbatches = nbatch; @@ -754,7 +760,8 @@ ExecHashTableInsert(HashJoinTable hashtable, hashtable->spaceUsed += hashTupleSize; if (hashtable->spaceUsed > hashtable->spacePeak) hashtable->spacePeak = hashtable->spaceUsed; - if (hashtable->spaceUsed > hashtable->spaceAllowed) + if (hashtable->spaceUsed + hashtable->nbuckets * sizeof(HashJoinTuple) + > hashtable->spaceAllowed) ExecHashIncreaseNumBatches(hashtable); } else |