diff options
author | Robert Haas <rhaas@postgresql.org> | 2015-10-09 15:06:06 -0400 |
---|---|---|
committer | Robert Haas <rhaas@postgresql.org> | 2015-10-09 15:06:06 -0400 |
commit | bfb54ff15a447fb22e9deae096e0d45b3e4bd56f (patch) | |
tree | be464bb5a57e39b94e9a17f2baf7bf28237a01e8 /src/backend/utils/adt/varlena.c | |
parent | db0f6cad4884bd4c835156d3a720d9a79dbd63a9 (diff) | |
download | postgresql-bfb54ff15a447fb22e9deae096e0d45b3e4bd56f.tar.gz postgresql-bfb54ff15a447fb22e9deae096e0d45b3e4bd56f.zip |
Make abbreviated key comparisons for text a bit cheaper.
If we do some byte-swapping while abbreviating, we can do comparisons
using integer arithmetic rather than memcmp.
Peter Geoghegan, reviewed and slightly revised by me.
Diffstat (limited to 'src/backend/utils/adt/varlena.c')
-rw-r--r-- | src/backend/utils/adt/varlena.c | 29 |
1 files changed, 20 insertions, 9 deletions
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 2fbbf5475ec..49a4898987e 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -26,6 +26,7 @@ #include "libpq/pqformat.h" #include "miscadmin.h" #include "parser/scansup.h" +#include "port/pg_bswap.h" #include "regex/regex.h" #include "utils/builtins.h" #include "utils/bytea.h" @@ -1967,25 +1968,25 @@ done: static int bttextcmp_abbrev(Datum x, Datum y, SortSupport ssup) { - char *a = (char *) &x; - char *b = (char *) &y; - int result; - - result = memcmp(a, b, sizeof(Datum)); - /* - * When result = 0, the core system will call bttextfastcmp_c() or + * When 0 is returned, the core system will call bttextfastcmp_c() or * bttextfastcmp_locale(). Even a strcmp() on two non-truncated strxfrm() * blobs cannot indicate *equality* authoritatively, for the same reason * that there is a strcoll() tie-breaker call to strcmp() in varstr_cmp(). */ - return result; + if (x > y) + return 1; + else if (x == y) + return 0; + else + return -1; } /* * Conversion routine for sortsupport. Converts original text to abbreviated * key representation. Our encoding strategy is simple -- pack the first 8 - * bytes of a strxfrm() blob into a Datum. + * bytes of a strxfrm() blob into a Datum (on little-endian machines, the 8 + * bytes are stored in reverse order), and treat it as an unsigned integer. */ static Datum bttext_abbrev_convert(Datum original, SortSupport ssup) @@ -2104,6 +2105,16 @@ bttext_abbrev_convert(Datum original, SortSupport ssup) addHyperLogLog(&tss->abbr_card, hash); + /* + * Byteswap on little-endian machines. + * + * This is needed so that bttextcmp_abbrev() (an unsigned integer 3-way + * comparator) works correctly on all platforms. If we didn't do this, + * the comparator would have to call memcmp() with a pair of pointers to + * the first byte of each abbreviated key, which is slower. + */ + res = DatumBigEndianToNative(res); + /* Don't leak memory here */ if (PointerGetDatum(authoritative) != original) pfree(authoritative); |