Make abbreviated key comparisons for text a bit cheaper.

If we do some byte-swapping while abbreviating, we can do comparisons using integer arithmetic rather than memcmp. Peter Geoghegan, reviewed and slightly revised by me.
author: Robert Haas <rhaas@postgresql.org> 2015-10-09 15:06:06 -0400
committer: Robert Haas <rhaas@postgresql.org> 2015-10-09 15:06:06 -0400
commit: bfb54ff15a447fb22e9deae096e0d45b3e4bd56f (patch)
tree: be464bb5a57e39b94e9a17f2baf7bf28237a01e8 /src
parent: db0f6cad4884bd4c835156d3a720d9a79dbd63a9 (diff)
download: postgresql-bfb54ff15a447fb22e9deae096e0d45b3e4bd56f.tar.gz
postgresql-bfb54ff15a447fb22e9deae096e0d45b3e4bd56f.zip
2 files changed, 44 insertions, 11 deletions
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 2fbbf5475ec..49a4898987e 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -26,6 +26,7 @@
 #include "libpq/pqformat.h"
 #include "miscadmin.h"
 #include "parser/scansup.h"
+#include "port/pg_bswap.h"
 #include "regex/regex.h"
 #include "utils/builtins.h"
 #include "utils/bytea.h"
@@ -1967,25 +1968,25 @@ done:
 static int
 bttextcmp_abbrev(Datum x, Datum y, SortSupport ssup)
 {
-	char	   *a = (char *) &x;
-	char	   *b = (char *) &y;
-	int			result;
-
-	result = memcmp(a, b, sizeof(Datum));
-
 	/*
-	 * When result = 0, the core system will call bttextfastcmp_c() or
+	 * When 0 is returned, the core system will call bttextfastcmp_c() or
 	 * bttextfastcmp_locale().  Even a strcmp() on two non-truncated strxfrm()
 	 * blobs cannot indicate *equality* authoritatively, for the same reason
 	 * that there is a strcoll() tie-breaker call to strcmp() in varstr_cmp().
 	 */
-	return result;
+	if (x > y)
+		return 1;
+	else if (x == y)
+		return 0;
+	else
+		return -1;
 }
 
 /*
  * Conversion routine for sortsupport.  Converts original text to abbreviated
  * key representation.  Our encoding strategy is simple -- pack the first 8
- * bytes of a strxfrm() blob into a Datum.
+ * bytes of a strxfrm() blob into a Datum (on little-endian machines, the 8
+ * bytes are stored in reverse order), and treat it as an unsigned integer.
  */
 static Datum
 bttext_abbrev_convert(Datum original, SortSupport ssup)
@@ -2104,6 +2105,16 @@ bttext_abbrev_convert(Datum original, SortSupport ssup)
 
 	addHyperLogLog(&tss->abbr_card, hash);
 
+	/*
+	 * Byteswap on little-endian machines.
+	 *
+	 * This is needed so that bttextcmp_abbrev() (an unsigned integer 3-way
+	 * comparator) works correctly on all platforms.  If we didn't do this,
+	 * the comparator would have to call memcmp() with a pair of pointers to
+	 * the first byte of each abbreviated key, which is slower.
+	 */
+	res = DatumBigEndianToNative(res);
+
 	/* Don't leak memory here */
 	if (PointerGetDatum(authoritative) != original)
 		pfree(authoritative);
diff --git a/src/include/port/pg_bswap.h b/src/include/port/pg_bswap.h
index 6555942c921..e9cf93233fa 100644
--- a/src/include/port/pg_bswap.h
+++ b/src/include/port/pg_bswap.h
@@ -28,7 +28,7 @@
 					((x << 8) & 0x00ff0000) | \
 					((x >> 8) & 0x0000ff00) | \
 					((x >> 24) & 0x000000ff))
-#endif	/* HAVE__BUILTIN_BSWAP32 */
+#endif   /* HAVE__BUILTIN_BSWAP32 */
 
 #ifdef HAVE__BUILTIN_BSWAP64
 #define BSWAP64(x) __builtin_bswap64(x)
@@ -41,6 +41,28 @@
 					((x >> 24) & 0x0000000000ff0000UL) | \
 					((x >> 40) & 0x000000000000ff00UL) | \
 					((x >> 56) & 0x00000000000000ffUL))
-#endif	/* HAVE__BUILTIN_BSWAP64 */
+#endif   /* HAVE__BUILTIN_BSWAP64 */
+
+/*
+ * Rearrange the bytes of a Datum from big-endian order into the native byte
+ * order.  On big-endian machines, this does nothing at all.  Note that the C
+ * type Datum is an unsigned integer type on all platforms.
+ *
+ * One possible application of the DatumBigEndianToNative() macro is to make
+ * bitwise comparisons cheaper.  A simple 3-way comparison of Datums
+ * transformed by the macro (based on native, unsigned comparisons) will return
+ * the same result as a memcmp() of the corresponding original Datums, but can
+ * be much cheaper.  It's generally safe to do this on big-endian systems
+ * without any special transformation occurring first.
+ */
+#ifdef WORDS_BIGENDIAN
+#define		DatumBigEndianToNative(x)	(x)
+#else							/* !WORDS_BIGENDIAN */
+#if SIZEOF_DATUM == 8
+#define		DatumBigEndianToNative(x)	BSWAP64(x)
+#else							/* SIZEOF_DATUM != 8 */
+#define		DatumBigEndianToNative(x)	BSWAP32(x)
+#endif   /* SIZEOF_DATUM == 8 */
+#endif   /* WORDS_BIGENDIAN */
 
 #endif   /* PG_BSWAP_H */
author	Robert Haas <rhaas@postgresql.org>	2015-10-09 15:06:06 -0400
committer	Robert Haas <rhaas@postgresql.org>	2015-10-09 15:06:06 -0400
commit	bfb54ff15a447fb22e9deae096e0d45b3e4bd56f (patch)
tree	be464bb5a57e39b94e9a17f2baf7bf28237a01e8 /src
parent	db0f6cad4884bd4c835156d3a720d9a79dbd63a9 (diff)
download	postgresql-bfb54ff15a447fb22e9deae096e0d45b3e4bd56f.tar.gz postgresql-bfb54ff15a447fb22e9deae096e0d45b3e4bd56f.zip