aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/hash/hashfunc.c52
-rw-r--r--src/backend/nodes/bitmapset.c38
-rw-r--r--src/backend/utils/hash/hashfn.c11
-rw-r--r--src/include/access/hash.h3
-rw-r--r--src/include/catalog/catversion.h4
5 files changed, 67 insertions, 41 deletions
diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c
index d9b5524b0ab..39a58c0899a 100644
--- a/src/backend/access/hash/hashfunc.c
+++ b/src/backend/access/hash/hashfunc.c
@@ -1,19 +1,26 @@
/*-------------------------------------------------------------------------
*
* hashfunc.c
- * Comparison functions for hash access method.
+ * Support functions for hash access method.
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.51 2007/04/02 03:49:37 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.52 2007/06/01 15:33:18 tgl Exp $
*
* NOTES
* These functions are stored in pg_amproc. For each operator class
- * defined on hash tables, they compute the hash value of the argument.
+ * defined for hash indexes, they compute the hash value of the argument.
*
+ * Additional hash functions appear in /utils/adt/ files for various
+ * specialized datatypes.
+ *
+ * It is expected that every bit of a hash function's 32-bit result is
+ * as random as every other; failure to ensure this is likely to lead
+ * to poor performance of hash joins, for example. In most cases a hash
+ * function should use hash_any() or its variant hash_uint32().
*-------------------------------------------------------------------------
*/
@@ -26,19 +33,19 @@
Datum
hashchar(PG_FUNCTION_ARGS)
{
- PG_RETURN_UINT32(~((uint32) PG_GETARG_CHAR(0)));
+ return hash_uint32((int32) PG_GETARG_CHAR(0));
}
Datum
hashint2(PG_FUNCTION_ARGS)
{
- PG_RETURN_UINT32(~((uint32) PG_GETARG_INT16(0)));
+ return hash_uint32((int32) PG_GETARG_INT16(0));
}
Datum
hashint4(PG_FUNCTION_ARGS)
{
- PG_RETURN_UINT32(~PG_GETARG_UINT32(0));
+ return hash_uint32(PG_GETARG_INT32(0));
}
Datum
@@ -59,23 +66,23 @@ hashint8(PG_FUNCTION_ARGS)
lohalf ^= (val >= 0) ? hihalf : ~hihalf;
- PG_RETURN_UINT32(~lohalf);
+ return hash_uint32(lohalf);
#else
/* here if we can't count on "x >> 32" to work sanely */
- PG_RETURN_UINT32(~((uint32) PG_GETARG_INT64(0)));
+ return hash_uint32((int32) PG_GETARG_INT64(0));
#endif
}
Datum
hashoid(PG_FUNCTION_ARGS)
{
- PG_RETURN_UINT32(~((uint32) PG_GETARG_OID(0)));
+ return hash_uint32((uint32) PG_GETARG_OID(0));
}
Datum
hashenum(PG_FUNCTION_ARGS)
{
- PG_RETURN_UINT32(~((uint32) PG_GETARG_OID(0)));
+ return hash_uint32((uint32) PG_GETARG_OID(0));
}
Datum
@@ -283,6 +290,31 @@ hash_any(register const unsigned char *k, register int keylen)
/* case 0: nothing left to add */
}
mix(a, b, c);
+
+ /* report the result */
+ return UInt32GetDatum(c);
+}
+
+/*
+ * hash_uint32() -- hash a 32-bit value
+ *
+ * This has the same result (at least on little-endian machines) as
+ * hash_any(&k, sizeof(uint32))
+ * but is faster and doesn't force the caller to store k into memory.
+ */
+Datum
+hash_uint32(uint32 k)
+{
+ register uint32 a,
+ b,
+ c;
+
+ a = 0x9e3779b9 + k;
+ b = 0x9e3779b9;
+ c = 3923095 + (uint32) sizeof(uint32);
+
+ mix(a, b, c);
+
/* report the result */
return UInt32GetDatum(c);
}
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 86a9fb332c3..bc77363fbf4 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -14,13 +14,14 @@
* Copyright (c) 2003-2007, PostgreSQL Global Development Group
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.12 2007/01/05 22:19:29 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/bitmapset.c,v 1.13 2007/06/01 15:33:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "nodes/bitmapset.h"
+#include "access/hash.h"
#define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD)
@@ -769,36 +770,23 @@ bms_first_member(Bitmapset *a)
*
* Note: we must ensure that any two bitmapsets that are bms_equal() will
* hash to the same value; in practice this means that trailing all-zero
- * words cannot affect the result. The circular-shift-and-XOR hash method
- * used here has this property, so long as we work from back to front.
- *
- * Note: you might wonder why we bother with the circular shift; at first
- * glance a straight longitudinal XOR seems as good and much simpler. The
- * reason is empirical: this gives a better distribution of hash values on
- * the bitmapsets actually generated by the planner. A common way to have
- * multiword bitmapsets is "a JOIN b JOIN c JOIN d ...", which gives rise
- * to rangetables in which base tables and JOIN nodes alternate; so
- * bitmapsets of base table RT indexes tend to use only odd-numbered or only
- * even-numbered bits. A straight longitudinal XOR would preserve this
- * property, leading to a much smaller set of possible outputs than if
- * we include a shift.
+ * words must not affect the result. Hence we strip those before applying
+ * hash_any().
*/
uint32
bms_hash_value(const Bitmapset *a)
{
- bitmapword result = 0;
- int wordnum;
+ int lastword;
- if (a == NULL || a->nwords <= 0)
+ if (a == NULL)
return 0; /* All empty sets hash to 0 */
- for (wordnum = a->nwords; --wordnum > 0;)
+ for (lastword = a->nwords; --lastword >= 0;)
{
- result ^= a->words[wordnum];
- if (result & ((bitmapword) 1 << (BITS_PER_BITMAPWORD - 1)))
- result = (result << 1) | 1;
- else
- result = (result << 1);
+ if (a->words[lastword] != 0)
+ break;
}
- result ^= a->words[0];
- return (uint32) result;
+ if (lastword < 0)
+ return 0; /* All empty sets hash to 0 */
+ return DatumGetUInt32(hash_any((const unsigned char *) a->words,
+ (lastword + 1) * sizeof(bitmapword)));
}
diff --git a/src/backend/utils/hash/hashfn.c b/src/backend/utils/hash/hashfn.c
index 6eeedc9383c..e1e65c03507 100644
--- a/src/backend/utils/hash/hashfn.c
+++ b/src/backend/utils/hash/hashfn.c
@@ -9,7 +9,13 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/hash/hashfn.c,v 1.30 2007/01/05 22:19:43 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/hash/hashfn.c,v 1.31 2007/06/01 15:33:18 tgl Exp $
+ *
+ * NOTES
+ * It is expected that every bit of a hash function's 32-bit result is
+ * as random as every other; failure to ensure this is likely to lead
+ * to poor performance of hash tables. In most cases a hash
+ * function should use hash_any() or its variant hash_uint32().
*
*-------------------------------------------------------------------------
*/
@@ -58,8 +64,7 @@ uint32
oid_hash(const void *key, Size keysize)
{
Assert(keysize == sizeof(Oid));
- /* We don't actually bother to do anything to the OID value ... */
- return (uint32) *((const Oid *) key);
+ return DatumGetUInt32(hash_uint32((uint32) *((const Oid *) key)));
}
/*
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index 2bd314a8aa3..e7f59e2da82 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.81 2007/05/30 20:12:02 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/hash.h,v 1.82 2007/06/01 15:33:18 tgl Exp $
*
* NOTES
* modeled after Margo Seltzer's hash implementation for unix.
@@ -265,6 +265,7 @@ extern Datum hashname(PG_FUNCTION_ARGS);
extern Datum hashtext(PG_FUNCTION_ARGS);
extern Datum hashvarlena(PG_FUNCTION_ARGS);
extern Datum hash_any(register const unsigned char *k, register int keylen);
+extern Datum hash_uint32(uint32 k);
/* private routines */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 9ae142387ca..b71f718776b 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.407 2007/05/21 17:10:29 petere Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.408 2007/06/01 15:33:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 200705211
+#define CATALOG_VERSION_NO 200706011
#endif