aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/varlena.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2018-12-19 17:35:12 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2018-12-19 17:46:25 -0500
commit586b98fdf1aaef4a27744f8b988479aad4bd9a01 (patch)
treea74687bfd94d9abc9a68c0f076aad66dbe619865 /src/backend/utils/adt/varlena.c
parent68f6f2b7395fe3e403034bcd97a1fcfbcc68ae10 (diff)
downloadpostgresql-586b98fdf1aaef4a27744f8b988479aad4bd9a01.tar.gz
postgresql-586b98fdf1aaef4a27744f8b988479aad4bd9a01.zip
Make type "name" collation-aware.
The "name" comparison operators now all support collations, making them functionally equivalent to "text" comparisons, except for the different physical representation of the datatype. They do, in fact, mostly share the varstr_cmp and varstr_sortsupport infrastructure, which has been slightly enlarged to handle the case. To avoid changes in the default behavior of the datatype, set name's typcollation to C_COLLATION_OID not DEFAULT_COLLATION_OID, so that by default comparisons to a name value will continue to use strcmp semantics. (This would have been the case for system catalog columns anyway, because of commit 6b0faf723, but doing this makes it true for user-created name columns as well. In particular, this avoids locale-dependent changes in our regression test results.) In consequence, tweak a couple of places that made assumptions about collatable base types always having typcollation DEFAULT_COLLATION_OID. I have not, however, attempted to relax the restriction that user- defined collatable types must have that. Hence, "name" doesn't behave quite like a user-defined type; it acts more like a domain with COLLATE "C". (Conceivably, if we ever get rid of the need for catalog name columns to be fixed-length, "name" could actually become such a domain over text. But that'd be a pretty massive undertaking, and I'm not volunteering.) Discussion: https://postgr.es/m/15938.1544377821@sss.pgh.pa.us
Diffstat (limited to 'src/backend/utils/adt/varlena.c')
-rw-r--r--src/backend/utils/adt/varlena.c126
1 files changed, 90 insertions, 36 deletions
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 0fd3b157482..a4fb5885c7e 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -69,7 +69,7 @@ typedef struct
int last_returned; /* Last comparison result (cache) */
bool cache_blob; /* Does buf2 contain strxfrm() blob, etc? */
bool collate_c;
- bool bpchar; /* Sorting bpchar, not varchar/text/bytea? */
+ Oid typeid; /* Actual datatype (text/bpchar/bytea/name) */
hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
hyperLogLogState full_card; /* Full key cardinality state */
double prop_card; /* Required cardinality proportion */
@@ -93,7 +93,10 @@ typedef struct
static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
-static int varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup);
+static int namefastcmp_c(Datum x, Datum y, SortSupport ssup);
+static int varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup);
+static int namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
+static int varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup);
static int varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup);
static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
@@ -1814,7 +1817,7 @@ bttextsortsupport(PG_FUNCTION_ARGS)
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
/* Use generic string SortSupport */
- varstr_sortsupport(ssup, collid, false);
+ varstr_sortsupport(ssup, TEXTOID, collid);
MemoryContextSwitchTo(oldcontext);
@@ -1832,7 +1835,7 @@ bttextsortsupport(PG_FUNCTION_ARGS)
* this will not work with any other collation, though.
*/
void
-varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
+varstr_sortsupport(SortSupport ssup, Oid typeid, Oid collid)
{
bool abbreviate = ssup->abbreviate;
bool collate_c = false;
@@ -1845,18 +1848,25 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
* overhead of a trip through the fmgr layer for every comparison, which
* can be substantial.
*
- * Most typically, we'll set the comparator to varstrfastcmp_locale, which
- * uses strcoll() to perform comparisons and knows about the special
- * requirements of BpChar callers. However, if LC_COLLATE = C, we can
- * make things quite a bit faster with varstrfastcmp_c or bpcharfastcmp_c,
- * both of which use memcmp() rather than strcoll().
+ * Most typically, we'll set the comparator to varlenafastcmp_locale,
+ * which uses strcoll() to perform comparisons. We use that for the
+ * BpChar case too, but type NAME uses namefastcmp_locale. However, if
+ * LC_COLLATE = C, we can make things quite a bit faster with
+ * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
+ * memcmp() rather than strcoll().
*/
if (lc_collate_is_c(collid))
{
- if (!bpchar)
- ssup->comparator = varstrfastcmp_c;
- else
+ if (typeid == BPCHAROID)
ssup->comparator = bpcharfastcmp_c;
+ else if (typeid == NAMEOID)
+ {
+ ssup->comparator = namefastcmp_c;
+ /* Not supporting abbreviation with type NAME, for now */
+ abbreviate = false;
+ }
+ else
+ ssup->comparator = varstrfastcmp_c;
collate_c = true;
}
@@ -1897,7 +1907,17 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
return;
#endif
- ssup->comparator = varstrfastcmp_locale;
+ /*
+ * We use varlenafastcmp_locale except for type NAME.
+ */
+ if (typeid == NAMEOID)
+ {
+ ssup->comparator = namefastcmp_locale;
+ /* Not supporting abbreviation with type NAME, for now */
+ abbreviate = false;
+ }
+ else
+ ssup->comparator = varlenafastcmp_locale;
}
/*
@@ -1963,7 +1983,7 @@ varstr_sortsupport(SortSupport ssup, Oid collid, bool bpchar)
*/
sss->cache_blob = true;
sss->collate_c = collate_c;
- sss->bpchar = bpchar;
+ sss->typeid = typeid;
ssup->ssup_extra = sss;
/*
@@ -2055,17 +2075,25 @@ bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
}
/*
- * sortsupport comparison func (for locale case)
+ * sortsupport comparison func (for NAME C locale case)
+ */
+static int
+namefastcmp_c(Datum x, Datum y, SortSupport ssup)
+{
+ Name arg1 = DatumGetName(x);
+ Name arg2 = DatumGetName(y);
+
+ return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
+}
+
+/*
+ * sortsupport comparison func (for locale case with all varlena types)
*/
static int
-varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
+varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)
{
VarString *arg1 = DatumGetVarStringPP(x);
VarString *arg2 = DatumGetVarStringPP(y);
- bool arg1_match;
- VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
-
- /* working state */
char *a1p,
*a2p;
int len1,
@@ -2078,6 +2106,41 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
len1 = VARSIZE_ANY_EXHDR(arg1);
len2 = VARSIZE_ANY_EXHDR(arg2);
+ result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
+
+ /* We can't afford to leak memory here. */
+ if (PointerGetDatum(arg1) != x)
+ pfree(arg1);
+ if (PointerGetDatum(arg2) != y)
+ pfree(arg2);
+
+ return result;
+}
+
+/*
+ * sortsupport comparison func (for locale case with NAME type)
+ */
+static int
+namefastcmp_locale(Datum x, Datum y, SortSupport ssup)
+{
+ Name arg1 = DatumGetName(x);
+ Name arg2 = DatumGetName(y);
+
+ return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)),
+ NameStr(*arg2), strlen(NameStr(*arg2)),
+ ssup);
+}
+
+/*
+ * sortsupport comparison func for locale cases
+ */
+static int
+varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
+{
+ VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
+ int result;
+ bool arg1_match;
+
/* Fast pre-check for equality, as discussed in varstr_cmp() */
if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
{
@@ -2094,11 +2157,10 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
* (not limited to padding), so we need make no distinction between
* padding space characters and "real" space characters.
*/
- result = 0;
- goto done;
+ return 0;
}
- if (sss->bpchar)
+ if (sss->typeid == BPCHAROID)
{
/* Get true number of bytes, ignoring trailing spaces */
len1 = bpchartruelen(a1p, len1);
@@ -2152,8 +2214,7 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
else if (arg1_match && !sss->cache_blob)
{
/* Use result cached following last actual strcoll() call */
- result = sss->last_returned;
- goto done;
+ return sss->last_returned;
}
if (sss->locale)
@@ -2222,13 +2283,6 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
/* Cache result, perhaps saving an expensive strcoll() call next time */
sss->cache_blob = false;
sss->last_returned = result;
-done:
- /* We can't afford to leak memory here. */
- if (PointerGetDatum(arg1) != x)
- pfree(arg1);
- if (PointerGetDatum(arg2) != y)
- pfree(arg2);
-
return result;
}
@@ -2240,7 +2294,7 @@ varstrcmp_abbrev(Datum x, Datum y, SortSupport ssup)
{
/*
* When 0 is returned, the core system will call varstrfastcmp_c()
- * (bpcharfastcmp_c() in BpChar case) or varstrfastcmp_locale(). Even a
+ * (bpcharfastcmp_c() in BpChar case) or varlenafastcmp_locale(). Even a
* strcmp() on two non-truncated strxfrm() blobs cannot indicate *equality*
* authoritatively, for the same reason that there is a strcoll()
* tie-breaker call to strcmp() in varstr_cmp().
@@ -2279,7 +2333,7 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
len = VARSIZE_ANY_EXHDR(authoritative);
/* Get number of bytes, ignoring trailing spaces */
- if (sss->bpchar)
+ if (sss->typeid == BPCHAROID)
len = bpchartruelen(authoritative_data, len);
/*
@@ -2758,7 +2812,7 @@ bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
/* Use generic string SortSupport, forcing "C" collation */
- varstr_sortsupport(ssup, C_COLLATION_OID, false);
+ varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
MemoryContextSwitchTo(oldcontext);
@@ -3798,7 +3852,7 @@ bytea_sortsupport(PG_FUNCTION_ARGS)
oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
/* Use generic string SortSupport, forcing "C" collation */
- varstr_sortsupport(ssup, C_COLLATION_OID, false);
+ varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
MemoryContextSwitchTo(oldcontext);