aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor/nodeHash.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2002-12-29 22:28:50 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2002-12-29 22:28:50 +0000
commitb33265e9e6648e0d1302896cb944e500364497ea (patch)
tree312d3803b3197359cf83db25ab4bda53c1d23d6a /src/backend/executor/nodeHash.c
parentc9d87120430f7137e074a1acdb753161d56e88bb (diff)
downloadpostgresql-b33265e9e6648e0d1302896cb944e500364497ea.tar.gz
postgresql-b33265e9e6648e0d1302896cb944e500364497ea.zip
Adjust hash table sizing algorithm to avoid integer overflow in
ExecHashJoinGetBatch(). Fixes core dump on large hash joins, as in example from Rae Stiening.
Diffstat (limited to 'src/backend/executor/nodeHash.c')
-rw-r--r--src/backend/executor/nodeHash.c36
1 files changed, 23 insertions, 13 deletions
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index efdd3b3cabb..9e0e4f0b8cb 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.71 2002/12/15 16:17:46 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.72 2002/12/29 22:28:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -20,6 +20,7 @@
*/
#include "postgres.h"
+#include <limits.h>
#include <math.h>
#include "access/hash.h"
@@ -344,7 +345,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
{
int tupsize;
double inner_rel_bytes;
- double hash_table_bytes;
+ long hash_table_bytes;
+ double dtmp;
int nbatch;
int nbuckets;
int totalbuckets;
@@ -362,20 +364,22 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
inner_rel_bytes = ntuples * tupsize * FUDGE_FAC;
/*
- * Target hashtable size is SortMem kilobytes, but not less than
- * sqrt(estimated inner rel size), so as to avoid horrible
- * performance.
+ * Target in-memory hashtable size is SortMem kilobytes.
*/
- hash_table_bytes = sqrt(inner_rel_bytes);
- if (hash_table_bytes < (SortMem * 1024L))
- hash_table_bytes = SortMem * 1024L;
+ hash_table_bytes = SortMem * 1024L;
/*
* Count the number of hash buckets we want for the whole relation,
* for an average bucket load of NTUP_PER_BUCKET (per virtual
- * bucket!).
+ * bucket!). It has to fit in an int, however.
*/
- totalbuckets = (int) ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);
+ dtmp = ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);
+ if (dtmp < INT_MAX)
+ totalbuckets = (int) dtmp;
+ else
+ totalbuckets = INT_MAX;
+ if (totalbuckets <= 0)
+ totalbuckets = 1;
/*
* Count the number of buckets we think will actually fit in the
@@ -409,10 +413,16 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
* that nbatch doesn't have to have anything to do with the ratio
* totalbuckets/nbuckets; in fact, it is the number of groups we
* will use for the part of the data that doesn't fall into the
- * first nbuckets hash buckets.
+ * first nbuckets hash buckets. We try to set it to make all the
+ * batches the same size. But we have to keep nbatch small
+ * enough to avoid integer overflow in ExecHashJoinGetBatch().
*/
- nbatch = (int) ceil((inner_rel_bytes - hash_table_bytes) /
- hash_table_bytes);
+ dtmp = ceil((inner_rel_bytes - hash_table_bytes) /
+ hash_table_bytes);
+ if (dtmp < INT_MAX / totalbuckets)
+ nbatch = (int) dtmp;
+ else
+ nbatch = INT_MAX / totalbuckets;
if (nbatch <= 0)
nbatch = 1;
}