Adjust hash table sizing algorithm to avoid integer overflow in

ExecHashJoinGetBatch(). Fixes core dump on large hash joins, as in example from Rae Stiening.
author: Tom Lane <tgl@sss.pgh.pa.us> 2002-12-29 22:28:50 +0000
committer: Tom Lane <tgl@sss.pgh.pa.us> 2002-12-29 22:28:50 +0000
commit: b33265e9e6648e0d1302896cb944e500364497ea (patch)
tree: 312d3803b3197359cf83db25ab4bda53c1d23d6a /src/backend/executor/nodeHash.c
parent: c9d87120430f7137e074a1acdb753161d56e88bb (diff)
download: postgresql-b33265e9e6648e0d1302896cb944e500364497ea.tar.gz
postgresql-b33265e9e6648e0d1302896cb944e500364497ea.zip
1 files changed, 23 insertions, 13 deletions
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index efdd3b3cabb..9e0e4f0b8cb 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.71 2002/12/15 16:17:46 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.72 2002/12/29 22:28:50 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,6 +20,7 @@
  */
 #include "postgres.h"
 
+#include <limits.h>
 #include <math.h>
 
 #include "access/hash.h"
@@ -344,7 +345,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
 {
 	int			tupsize;
 	double		inner_rel_bytes;
-	double		hash_table_bytes;
+	long		hash_table_bytes;
+	double		dtmp;
 	int			nbatch;
 	int			nbuckets;
 	int			totalbuckets;
@@ -362,20 +364,22 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
 	inner_rel_bytes = ntuples * tupsize * FUDGE_FAC;
 
 	/*
-	 * Target hashtable size is SortMem kilobytes, but not less than
-	 * sqrt(estimated inner rel size), so as to avoid horrible
-	 * performance.
+	 * Target in-memory hashtable size is SortMem kilobytes.
 	 */
-	hash_table_bytes = sqrt(inner_rel_bytes);
-	if (hash_table_bytes < (SortMem * 1024L))
-		hash_table_bytes = SortMem * 1024L;
+	hash_table_bytes = SortMem * 1024L;
 
 	/*
 	 * Count the number of hash buckets we want for the whole relation,
 	 * for an average bucket load of NTUP_PER_BUCKET (per virtual
-	 * bucket!).
+	 * bucket!).  It has to fit in an int, however.
 	 */
-	totalbuckets = (int) ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);
+	dtmp = ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);
+	if (dtmp < INT_MAX)
+		totalbuckets = (int) dtmp;
+	else
+		totalbuckets = INT_MAX;
+	if (totalbuckets <= 0)
+		totalbuckets = 1;
 
 	/*
 	 * Count the number of buckets we think will actually fit in the
@@ -409,10 +413,16 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
 		 * that nbatch doesn't have to have anything to do with the ratio
 		 * totalbuckets/nbuckets; in fact, it is the number of groups we
 		 * will use for the part of the data that doesn't fall into the
-		 * first nbuckets hash buckets.
+		 * first nbuckets hash buckets.  We try to set it to make all the
+		 * batches the same size.  But we have to keep nbatch small
+		 * enough to avoid integer overflow in ExecHashJoinGetBatch().
 		 */
-		nbatch = (int) ceil((inner_rel_bytes - hash_table_bytes) /
-							hash_table_bytes);
+		dtmp = ceil((inner_rel_bytes - hash_table_bytes) /
+					hash_table_bytes);
+		if (dtmp < INT_MAX / totalbuckets)
+			nbatch = (int) dtmp;
+		else
+			nbatch = INT_MAX / totalbuckets;
 		if (nbatch <= 0)
 			nbatch = 1;
 	}
author	Tom Lane <tgl@sss.pgh.pa.us>	2002-12-29 22:28:50 +0000
committer	Tom Lane <tgl@sss.pgh.pa.us>	2002-12-29 22:28:50 +0000
commit	b33265e9e6648e0d1302896cb944e500364497ea (patch)
tree	312d3803b3197359cf83db25ab4bda53c1d23d6a /src/backend/executor/nodeHash.c
parent	c9d87120430f7137e074a1acdb753161d56e88bb (diff)
download	postgresql-b33265e9e6648e0d1302896cb944e500364497ea.tar.gz postgresql-b33265e9e6648e0d1302896cb944e500364497ea.zip