diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2002-12-29 22:28:50 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2002-12-29 22:28:50 +0000 |
commit | b33265e9e6648e0d1302896cb944e500364497ea (patch) | |
tree | 312d3803b3197359cf83db25ab4bda53c1d23d6a /src/backend/executor/nodeHash.c | |
parent | c9d87120430f7137e074a1acdb753161d56e88bb (diff) | |
download | postgresql-b33265e9e6648e0d1302896cb944e500364497ea.tar.gz postgresql-b33265e9e6648e0d1302896cb944e500364497ea.zip |
Adjust hash table sizing algorithm to avoid integer overflow in
ExecHashJoinGetBatch(). Fixes core dump on large hash joins, as in
example from Rae Stiening.
Diffstat (limited to 'src/backend/executor/nodeHash.c')
-rw-r--r-- | src/backend/executor/nodeHash.c | 36 |
1 files changed, 23 insertions, 13 deletions
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index efdd3b3cabb..9e0e4f0b8cb 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.71 2002/12/15 16:17:46 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.72 2002/12/29 22:28:50 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,6 +20,7 @@ */ #include "postgres.h" +#include <limits.h> #include <math.h> #include "access/hash.h" @@ -344,7 +345,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, { int tupsize; double inner_rel_bytes; - double hash_table_bytes; + long hash_table_bytes; + double dtmp; int nbatch; int nbuckets; int totalbuckets; @@ -362,20 +364,22 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, inner_rel_bytes = ntuples * tupsize * FUDGE_FAC; /* - * Target hashtable size is SortMem kilobytes, but not less than - * sqrt(estimated inner rel size), so as to avoid horrible - * performance. + * Target in-memory hashtable size is SortMem kilobytes. */ - hash_table_bytes = sqrt(inner_rel_bytes); - if (hash_table_bytes < (SortMem * 1024L)) - hash_table_bytes = SortMem * 1024L; + hash_table_bytes = SortMem * 1024L; /* * Count the number of hash buckets we want for the whole relation, * for an average bucket load of NTUP_PER_BUCKET (per virtual - * bucket!). + * bucket!). It has to fit in an int, however. */ - totalbuckets = (int) ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET); + dtmp = ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET); + if (dtmp < INT_MAX) + totalbuckets = (int) dtmp; + else + totalbuckets = INT_MAX; + if (totalbuckets <= 0) + totalbuckets = 1; /* * Count the number of buckets we think will actually fit in the @@ -409,10 +413,16 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, * that nbatch doesn't have to have anything to do with the ratio * totalbuckets/nbuckets; in fact, it is the number of groups we * will use for the part of the data that doesn't fall into the - * first nbuckets hash buckets. + * first nbuckets hash buckets. We try to set it to make all the + * batches the same size. But we have to keep nbatch small + * enough to avoid integer overflow in ExecHashJoinGetBatch(). */ - nbatch = (int) ceil((inner_rel_bytes - hash_table_bytes) / - hash_table_bytes); + dtmp = ceil((inner_rel_bytes - hash_table_bytes) / + hash_table_bytes); + if (dtmp < INT_MAX / totalbuckets) + nbatch = (int) dtmp; + else + nbatch = INT_MAX / totalbuckets; if (nbatch <= 0) nbatch = 1; } |