aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils')
-rw-r--r--src/backend/utils/adt/varlena.c220
1 files changed, 210 insertions, 10 deletions
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index f8d9fec34e4..1bd8abbe586 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -28,7 +28,9 @@
#include "utils/builtins.h"
#include "utils/bytea.h"
#include "utils/lsyscache.h"
+#include "utils/memutils.h"
#include "utils/pg_locale.h"
+#include "utils/sortsupport.h"
/* GUC variable */
@@ -50,12 +52,32 @@ typedef struct
int skiptable[256]; /* skip distance for given mismatched char */
} TextPositionState;
+typedef struct
+{
+ char *buf1; /* 1st string */
+ char *buf2; /* 2nd string */
+ int buflen1;
+ int buflen2;
+#ifdef HAVE_LOCALE_T
+ pg_locale_t locale;
+#endif
+} TextSortSupport;
+
+/*
+ * This should be large enough that most strings will fit, but small enough
+ * that we feel comfortable putting it on the stack
+ */
+#define TEXTBUFLEN 1024
+
#define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
#define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
#define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
#define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
#define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
+static void btsortsupport_worker(SortSupport ssup, Oid collid);
+static int bttextfastcmp_c(Datum x, Datum y, SortSupport ssup);
+static int bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup);
static int32 text_length(Datum str);
static text *text_catenate(text *t1, text *t2);
static text *text_substring(Datum str,
@@ -1356,10 +1378,8 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
}
else
{
-#define STACKBUFLEN 1024
-
- char a1buf[STACKBUFLEN];
- char a2buf[STACKBUFLEN];
+ char a1buf[TEXTBUFLEN];
+ char a2buf[TEXTBUFLEN];
char *a1p,
*a2p;
@@ -1393,24 +1413,24 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
int a2len;
int r;
- if (len1 >= STACKBUFLEN / 2)
+ if (len1 >= TEXTBUFLEN / 2)
{
a1len = len1 * 2 + 2;
a1p = palloc(a1len);
}
else
{
- a1len = STACKBUFLEN;
+ a1len = TEXTBUFLEN;
a1p = a1buf;
}
- if (len2 >= STACKBUFLEN / 2)
+ if (len2 >= TEXTBUFLEN / 2)
{
a2len = len2 * 2 + 2;
a2p = palloc(a2len);
}
else
{
- a2len = STACKBUFLEN;
+ a2len = TEXTBUFLEN;
a2p = a2buf;
}
@@ -1475,11 +1495,11 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
}
#endif /* WIN32 */
- if (len1 >= STACKBUFLEN)
+ if (len1 >= TEXTBUFLEN)
a1p = (char *) palloc(len1 + 1);
else
a1p = a1buf;
- if (len2 >= STACKBUFLEN)
+ if (len2 >= TEXTBUFLEN)
a2p = (char *) palloc(len2 + 1);
else
a2p = a2buf;
@@ -1683,6 +1703,186 @@ bttextcmp(PG_FUNCTION_ARGS)
PG_RETURN_INT32(result);
}
+Datum
+bttextsortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ Oid collid = ssup->ssup_collation;
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ btsortsupport_worker(ssup, collid);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_VOID();
+}
+
+static void
+btsortsupport_worker(SortSupport ssup, Oid collid)
+{
+ TextSortSupport *tss;
+
+ /*
+ * If LC_COLLATE = C, we can make things quite a bit faster by using
+ * memcmp() rather than strcoll(). To minimize the per-comparison
+ * overhead, we make this decision just once for the whole sort.
+ */
+ if (lc_collate_is_c(collid))
+ {
+ ssup->comparator = bttextfastcmp_c;
+ return;
+ }
+
+ /*
+ * WIN32 requires complex hacks when the database encoding is UTF-8 (except
+ * when using the "C" collation). For now, we don't optimize that case.
+ */
+#ifdef WIN32
+ if (GetDatabaseEncoding() == PG_UTF8)
+ return;
+#endif
+
+ /*
+ * We may need a collation-sensitive comparison. To make things faster,
+ * we'll figure out the collation based on the locale id and cache the
+ * result. Also, since strxfrm()/strcoll() require NUL-terminated inputs,
+ * prepare one or two palloc'd buffers to use as temporary workspace. In
+ * the ad-hoc comparison case we only use palloc'd buffers when we need
+ * more space than we're comfortable allocating on the stack, but here we
+ * can keep the buffers around for the whole sort, so it makes sense to
+ * allocate them once and use them unconditionally.
+ */
+ tss = palloc(sizeof(TextSortSupport));
+#ifdef HAVE_LOCALE_T
+ tss->locale = 0;
+#endif
+
+ if (collid != DEFAULT_COLLATION_OID)
+ {
+ if (!OidIsValid(collid))
+ {
+ /*
+ * This typically means that the parser could not resolve a
+ * conflict of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string comparison"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+#ifdef HAVE_LOCALE_T
+ tss->locale = pg_newlocale_from_collation(collid);
+#endif
+ }
+
+ tss->buf1 = palloc(TEXTBUFLEN);
+ tss->buflen1 = TEXTBUFLEN;
+ tss->buf2 = palloc(TEXTBUFLEN);
+ tss->buflen2 = TEXTBUFLEN;
+
+ ssup->ssup_extra = tss;
+ ssup->comparator = bttextfastcmp_locale;
+}
+
+/*
+ * sortsupport comparison func (for C locale case)
+ */
+static int
+bttextfastcmp_c(Datum x, Datum y, SortSupport ssup)
+{
+ text *arg1 = DatumGetTextPP(x);
+ text *arg2 = DatumGetTextPP(y);
+ char *a1p,
+ *a2p;
+ int len1,
+ len2,
+ result;
+
+ a1p = VARDATA_ANY(arg1);
+ a2p = VARDATA_ANY(arg2);
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ result = memcmp(a1p, a2p, Min(len1, len2));
+ if ((result == 0) && (len1 != len2))
+ result = (len1 < len2) ? -1 : 1;
+
+ /* We can't afford to leak memory here. */
+ if (PointerGetDatum(arg1) != x)
+ pfree(arg1);
+ if (PointerGetDatum(arg2) != y)
+ pfree(arg2);
+
+ return result;
+}
+
+/*
+ * sortsupport comparison func (for locale case)
+ */
+static int
+bttextfastcmp_locale(Datum x, Datum y, SortSupport ssup)
+{
+ text *arg1 = DatumGetTextPP(x);
+ text *arg2 = DatumGetTextPP(y);
+ TextSortSupport *tss = (TextSortSupport *) ssup->ssup_extra;
+
+ /* working state */
+ char *a1p,
+ *a2p;
+ int len1,
+ len2,
+ result;
+
+ a1p = VARDATA_ANY(arg1);
+ a2p = VARDATA_ANY(arg2);
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ if (len1 >= tss->buflen1)
+ {
+ pfree(tss->buf1);
+ tss->buflen1 = Max(len1 + 1, Min(tss->buflen1 * 2, MaxAllocSize));
+ tss->buf1 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen1);
+ }
+ if (len2 >= tss->buflen2)
+ {
+ pfree(tss->buf2);
+ tss->buflen1 = Max(len2 + 1, Min(tss->buflen2 * 2, MaxAllocSize));
+ tss->buf2 = MemoryContextAlloc(ssup->ssup_cxt, tss->buflen2);
+ }
+
+ memcpy(tss->buf1, a1p, len1);
+ tss->buf1[len1] = '\0';
+ memcpy(tss->buf2, a2p, len2);
+ tss->buf2[len2] = '\0';
+
+#ifdef HAVE_LOCALE_T
+ if (tss->locale)
+ result = strcoll_l(tss->buf1, tss->buf2, tss->locale);
+ else
+#endif
+ result = strcoll(tss->buf1, tss->buf2);
+
+ /*
+ * In some locales strcoll() can claim that nonidentical strings are equal.
+ * Believing that would be bad news for a number of reasons, so we follow
+ * Perl's lead and sort "equal" strings according to strcmp().
+ */
+ if (result == 0)
+ result = strcmp(tss->buf1, tss->buf2);
+
+ /* We can't afford to leak memory here. */
+ if (PointerGetDatum(arg1) != x)
+ pfree(arg1);
+ if (PointerGetDatum(arg2) != y)
+ pfree(arg2);
+
+ return result;
+}
Datum
text_larger(PG_FUNCTION_ARGS)