Buy back some of the cycles spent in more-expensive hash functions by

selecting power-of-2, rather than prime, numbers of buckets in hash joins. If the hash functions are doing their jobs properly by making all hash bits equally random, this is good enough, and it saves expensive integer division and modulus operations.
author: Tom Lane <tgl@sss.pgh.pa.us> 2007-06-01 17:38:44 +0000
committer: Tom Lane <tgl@sss.pgh.pa.us> 2007-06-01 17:38:44 +0000
commit: bd2c980b229eb5cf411b1d55bf9b26cca34875d7 (patch)
tree: 0677ee408573e19f5c1a2acc6766695aea08143a /src/backend/executor/nodeHash.c
parent: 1f559b7d3aa411e08d2ad46a4bd7d9213945b103 (diff)
download: postgresql-bd2c980b229eb5cf411b1d55bf9b26cca34875d7.tar.gz
postgresql-bd2c980b229eb5cf411b1d55bf9b26cca34875d7.zip
1 files changed, 27 insertions, 29 deletions
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index a6da272d47c..3f13b199c9e 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.111 2007/02/22 22:49:27 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.112 2007/06/01 17:38:44 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -31,6 +31,7 @@
 #include "executor/nodeHashjoin.h"
 #include "miscadmin.h"
 #include "parser/parse_expr.h"
+#include "utils/dynahash.h"
 #include "utils/memutils.h"
 #include "utils/lsyscache.h"
 
@@ -223,6 +224,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
 	Plan	   *outerNode;
 	int			nbuckets;
 	int			nbatch;
+	int			log2_nbuckets;
 	int			nkeys;
 	int			i;
 	ListCell   *ho;
@@ -242,6 +244,10 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
 	printf("nbatch = %d, nbuckets = %d\n", nbatch, nbuckets);
 #endif
 
+	/* nbuckets must be a power of 2 */
+	log2_nbuckets = my_log2(nbuckets);
+	Assert(nbuckets == (1 << log2_nbuckets));
+
 	/*
 	 * Initialize the hash table control block.
 	 *
@@ -250,6 +256,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
 	 */
 	hashtable = (HashJoinTable) palloc(sizeof(HashJoinTableData));
 	hashtable->nbuckets = nbuckets;
+	hashtable->log2_nbuckets = log2_nbuckets;
 	hashtable->buckets = NULL;
 	hashtable->nbatch = nbatch;
 	hashtable->curbatch = 0;
@@ -345,13 +352,6 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
 /* Target bucket loading (tuples per bucket) */
 #define NTUP_PER_BUCKET			10
 
-/* Prime numbers that we like to use as nbuckets values */
-static const int hprimes[] = {
-	1033, 2063, 4111, 8219, 16417, 32779, 65539, 131111,
-	262151, 524341, 1048589, 2097211, 4194329, 8388619, 16777289, 33554473,
-	67108913, 134217773, 268435463, 536870951, 1073741831
-};
-
 void
 ExecChooseHashTableSize(double ntuples, int tupwidth,
 						int *numbuckets,
@@ -396,7 +396,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
 		int			minbatch;
 
 		lbuckets = (hash_table_bytes / tupsize) / NTUP_PER_BUCKET;
-		lbuckets = Min(lbuckets, INT_MAX);
+		lbuckets = Min(lbuckets, INT_MAX / 2);
 		nbuckets = (int) lbuckets;
 
 		dbatch = ceil(inner_rel_bytes / hash_table_bytes);
@@ -412,27 +412,22 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
 		double		dbuckets;
 
 		dbuckets = ceil(ntuples / NTUP_PER_BUCKET);
-		dbuckets = Min(dbuckets, INT_MAX);
+		dbuckets = Min(dbuckets, INT_MAX / 2);
 		nbuckets = (int) dbuckets;
 
 		nbatch = 1;
 	}
 
 	/*
-	 * We want nbuckets to be prime so as to avoid having bucket and batch
-	 * numbers depend on only some bits of the hash code.  Choose the next
-	 * larger prime from the list in hprimes[].  (This also enforces that
-	 * nbuckets is not very small, by the simple expedient of not putting any
-	 * very small entries in hprimes[].)
+	 * Both nbuckets and nbatch must be powers of 2 to make
+	 * ExecHashGetBucketAndBatch fast.  We already fixed nbatch; now inflate
+	 * nbuckets to the next larger power of 2.  We also force nbuckets to not
+	 * be real small, by starting the search at 2^10.
 	 */
-	for (i = 0; i < (int) lengthof(hprimes); i++)
-	{
-		if (hprimes[i] >= nbuckets)
-		{
-			nbuckets = hprimes[i];
-			break;
-		}
-	}
+	i = 10;
+	while ((1 << i) < nbuckets)
+		i++;
+	nbuckets = (1 << i);
 
 	*numbuckets = nbuckets;
 	*numbatches = nbatch;
@@ -765,8 +760,11 @@ ExecHashGetHashValue(HashJoinTable hashtable,
  * increase.  Our algorithm is
  *		bucketno = hashvalue MOD nbuckets
  *		batchno = (hashvalue DIV nbuckets) MOD nbatch
- * where nbuckets should preferably be prime so that all bits of the
- * hash value can affect both bucketno and batchno.
+ * where nbuckets and nbatch are both expected to be powers of 2, so we can
+ * do the computations by shifting and masking.  (This assumes that all hash
+ * functions are good about randomizing all their output bits, else we are
+ * likely to have very skewed bucket or batch occupancy.)
+ *
  * nbuckets doesn't change over the course of the join.
  *
  * nbatch is always a power of 2; we increase it only by doubling it.  This
@@ -783,13 +781,13 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable,
 
 	if (nbatch > 1)
 	{
-		*bucketno = hashvalue % nbuckets;
-		/* since nbatch is a power of 2, can do MOD by masking */
-		*batchno = (hashvalue / nbuckets) & (nbatch - 1);
+		/* we can do MOD by masking, DIV by shifting */
+		*bucketno = hashvalue & (nbuckets - 1);
+		*batchno = (hashvalue >> hashtable->log2_nbuckets) & (nbatch - 1);
 	}
 	else
 	{
-		*bucketno = hashvalue % nbuckets;
+		*bucketno = hashvalue & (nbuckets - 1);
 		*batchno = 0;
 	}
 }
author	Tom Lane <tgl@sss.pgh.pa.us>	2007-06-01 17:38:44 +0000
committer	Tom Lane <tgl@sss.pgh.pa.us>	2007-06-01 17:38:44 +0000
commit	bd2c980b229eb5cf411b1d55bf9b26cca34875d7 (patch)
tree	0677ee408573e19f5c1a2acc6766695aea08143a /src/backend/executor/nodeHash.c
parent	1f559b7d3aa411e08d2ad46a4bd7d9213945b103 (diff)
download	postgresql-bd2c980b229eb5cf411b1d55bf9b26cca34875d7.tar.gz postgresql-bd2c980b229eb5cf411b1d55bf9b26cca34875d7.zip