aboutsummaryrefslogtreecommitdiff
path: root/src/include
diff options
context:
space:
mode:
Diffstat (limited to 'src/include')
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/catalog/pg_statistic.h43
-rw-r--r--src/include/commands/vacuum.h11
-rw-r--r--src/include/statistics/extended_stats_internal.h2
-rw-r--r--src/include/utils/lsyscache.h1
-rw-r--r--src/include/utils/typcache.h1
6 files changed, 40 insertions, 20 deletions
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index e16ec9dd778..838e927547f 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201812091
+#define CATALOG_VERSION_NO 201812141
#endif
diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h
index 49223aab4fc..2155f51a5b1 100644
--- a/src/include/catalog/pg_statistic.h
+++ b/src/include/catalog/pg_statistic.h
@@ -74,12 +74,13 @@ CATALOG(pg_statistic,2619,StatisticRelationId)
* statistical data can be placed. Each slot includes:
* kind integer code identifying kind of data (see below)
* op OID of associated operator, if needed
+ * coll OID of relevant collation, or 0 if none
* numbers float4 array (for statistical values)
* values anyarray (for representations of data values)
- * The ID and operator fields are never NULL; they are zeroes in an
- * unused slot. The numbers and values fields are NULL in an unused
- * slot, and might also be NULL in a used slot if the slot kind has
- * no need for one or the other.
+ * The ID, operator, and collation fields are never NULL; they are zeroes
+ * in an unused slot. The numbers and values fields are NULL in an
+ * unused slot, and might also be NULL in a used slot if the slot kind
+ * has no need for one or the other.
* ----------------
*/
@@ -95,6 +96,12 @@ CATALOG(pg_statistic,2619,StatisticRelationId)
Oid staop4;
Oid staop5;
+ Oid stacoll1;
+ Oid stacoll2;
+ Oid stacoll3;
+ Oid stacoll4;
+ Oid stacoll5;
+
#ifdef CATALOG_VARLEN /* variable-length fields start here */
float4 stanumbers1[1];
float4 stanumbers2[1];
@@ -159,7 +166,8 @@ typedef FormData_pg_statistic *Form_pg_statistic;
/*
* In a "most common values" slot, staop is the OID of the "=" operator
- * used to decide whether values are the same or not. stavalues contains
+ * used to decide whether values are the same or not, and stacoll is the
+ * collation used (same as column's collation). stavalues contains
* the K most common non-null values appearing in the column, and stanumbers
* contains their frequencies (fractions of total row count). The values
* shall be ordered in decreasing frequency. Note that since the arrays are
@@ -171,9 +179,11 @@ typedef FormData_pg_statistic *Form_pg_statistic;
/*
* A "histogram" slot describes the distribution of scalar data. staop is
- * the OID of the "<" operator that describes the sort ordering. (In theory,
- * more than one histogram could appear, if a datatype has more than one
- * useful sort operator.) stavalues contains M (>=2) non-null values that
+ * the OID of the "<" operator that describes the sort ordering, and stacoll
+ * is the relevant collation. (In theory more than one histogram could appear,
+ * if a datatype has more than one useful sort operator or we care about more
+ * than one collation. Currently the collation will always be that of the
+ * underlying column.) stavalues contains M (>=2) non-null values that
* divide the non-null column data values into M-1 bins of approximately equal
* population. The first stavalues item is the MIN and the last is the MAX.
* stanumbers is not used and should be NULL. IMPORTANT POINT: if an MCV
@@ -190,11 +200,12 @@ typedef FormData_pg_statistic *Form_pg_statistic;
/*
* A "correlation" slot describes the correlation between the physical order
* of table tuples and the ordering of data values of this column, as seen
- * by the "<" operator identified by staop. (As with the histogram, more
- * than one entry could theoretically appear.) stavalues is not used and
- * should be NULL. stanumbers contains a single entry, the correlation
- * coefficient between the sequence of data values and the sequence of
- * their actual tuple positions. The coefficient ranges from +1 to -1.
+ * by the "<" operator identified by staop with the collation identified by
+ * stacoll. (As with the histogram, more than one entry could theoretically
+ * appear.) stavalues is not used and should be NULL. stanumbers contains
+ * a single entry, the correlation coefficient between the sequence of data
+ * values and the sequence of their actual tuple positions. The coefficient
+ * ranges from +1 to -1.
*/
#define STATISTIC_KIND_CORRELATION 3
@@ -203,7 +214,8 @@ typedef FormData_pg_statistic *Form_pg_statistic;
* except that it stores the most common non-null *elements* of the column
* values. This is useful when the column datatype is an array or some other
* type with identifiable elements (for instance, tsvector). staop contains
- * the equality operator appropriate to the element type. stavalues contains
+ * the equality operator appropriate to the element type, and stacoll
+ * contains the collation to use with it. stavalues contains
* the most common element values, and stanumbers their frequencies. Unlike
* MCV slots, frequencies are measured as the fraction of non-null rows the
* element value appears in, not the frequency of all rows. Also unlike
@@ -226,7 +238,8 @@ typedef FormData_pg_statistic *Form_pg_statistic;
* A "distinct elements count histogram" slot describes the distribution of
* the number of distinct element values present in each row of an array-type
* column. Only non-null rows are considered, and only non-null elements.
- * staop contains the equality operator appropriate to the element type.
+ * staop contains the equality operator appropriate to the element type,
+ * and stacoll contains the collation to use with it.
* stavalues is not used and should be NULL. The last member of stanumbers is
* the average count of distinct element values over all non-null rows. The
* preceding M (>=2) members form a histogram that divides the population of
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 2f4303e40d8..dfff23ac55b 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -52,9 +52,11 @@
* careful to allocate any pointed-to data in anl_context, which will NOT
* be CurrentMemoryContext when compute_stats is called.
*
- * Note: for the moment, all comparisons done for statistical purposes
- * should use the database's default collation (DEFAULT_COLLATION_OID).
- * This might change in some future release.
+ * Note: all comparisons done for statistical purposes should use the
+ * underlying column's collation (attcollation), except in situations
+ * where a noncollatable container type contains a collatable type;
+ * in that case use the type's default collation. Be sure to record
+ * the appropriate collation in stacoll.
*----------
*/
typedef struct VacAttrStats *VacAttrStatsP;
@@ -78,11 +80,13 @@ typedef struct VacAttrStats
* because some index opclasses store a different type than the underlying
* column/expression. Instead use attrtypid, attrtypmod, and attrtype for
* information about the datatype being fed to the typanalyze function.
+ * Likewise, use attrcollid not attr->attcollation.
*/
Form_pg_attribute attr; /* copy of pg_attribute row for column */
Oid attrtypid; /* type of data being analyzed */
int32 attrtypmod; /* typmod of data being analyzed */
Form_pg_type attrtype; /* copy of pg_type row for attrtypid */
+ Oid attrcollid; /* collation of data being analyzed */
MemoryContext anl_context; /* where to save long-lived data */
/*
@@ -103,6 +107,7 @@ typedef struct VacAttrStats
float4 stadistinct; /* # distinct values */
int16 stakind[STATISTIC_NUM_SLOTS];
Oid staop[STATISTIC_NUM_SLOTS];
+ Oid stacoll[STATISTIC_NUM_SLOTS];
int numnumbers[STATISTIC_NUM_SLOTS];
float4 *stanumbers[STATISTIC_NUM_SLOTS];
int numvalues[STATISTIC_NUM_SLOTS];
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
index b3ca0c1229f..fff6bc67991 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -59,7 +59,7 @@ extern MVDependencies *statext_dependencies_deserialize(bytea *data);
extern MultiSortSupport multi_sort_init(int ndims);
extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim,
- Oid oper);
+ Oid oper, Oid collation);
extern int multi_sort_compare(const void *a, const void *b, void *arg);
extern int multi_sort_compare_dim(int dim, const SortItem *a,
const SortItem *b, MultiSortSupport mss);
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index ff1705ad2b8..64089930019 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -44,6 +44,7 @@ typedef struct AttStatsSlot
{
/* Always filled: */
Oid staop; /* Actual staop for the found slot */
+ Oid stacoll; /* Actual collation for the found slot */
/* Filled if ATTSTATSSLOT_VALUES is specified: */
Oid valuetype; /* Actual datatype of the values */
Datum *values; /* slot's "values" array, or NULL if none */
diff --git a/src/include/utils/typcache.h b/src/include/utils/typcache.h
index 217d064da52..2b299608cfc 100644
--- a/src/include/utils/typcache.h
+++ b/src/include/utils/typcache.h
@@ -41,6 +41,7 @@ typedef struct TypeCacheEntry
char typtype;
Oid typrelid;
Oid typelem;
+ Oid typcollation;
/*
* Information obtained from opfamily entries