diff options
Diffstat (limited to 'src/include')
-rw-r--r-- | src/include/catalog/catversion.h | 2 | ||||
-rw-r--r-- | src/include/catalog/pg_statistic.h | 43 | ||||
-rw-r--r-- | src/include/commands/vacuum.h | 11 | ||||
-rw-r--r-- | src/include/statistics/extended_stats_internal.h | 2 | ||||
-rw-r--r-- | src/include/utils/lsyscache.h | 1 | ||||
-rw-r--r-- | src/include/utils/typcache.h | 1 |
6 files changed, 40 insertions, 20 deletions
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index e16ec9dd778..838e927547f 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201812091 +#define CATALOG_VERSION_NO 201812141 #endif diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h index 49223aab4fc..2155f51a5b1 100644 --- a/src/include/catalog/pg_statistic.h +++ b/src/include/catalog/pg_statistic.h @@ -74,12 +74,13 @@ CATALOG(pg_statistic,2619,StatisticRelationId) * statistical data can be placed. Each slot includes: * kind integer code identifying kind of data (see below) * op OID of associated operator, if needed + * coll OID of relevant collation, or 0 if none * numbers float4 array (for statistical values) * values anyarray (for representations of data values) - * The ID and operator fields are never NULL; they are zeroes in an - * unused slot. The numbers and values fields are NULL in an unused - * slot, and might also be NULL in a used slot if the slot kind has - * no need for one or the other. + * The ID, operator, and collation fields are never NULL; they are zeroes + * in an unused slot. The numbers and values fields are NULL in an + * unused slot, and might also be NULL in a used slot if the slot kind + * has no need for one or the other. * ---------------- */ @@ -95,6 +96,12 @@ CATALOG(pg_statistic,2619,StatisticRelationId) Oid staop4; Oid staop5; + Oid stacoll1; + Oid stacoll2; + Oid stacoll3; + Oid stacoll4; + Oid stacoll5; + #ifdef CATALOG_VARLEN /* variable-length fields start here */ float4 stanumbers1[1]; float4 stanumbers2[1]; @@ -159,7 +166,8 @@ typedef FormData_pg_statistic *Form_pg_statistic; /* * In a "most common values" slot, staop is the OID of the "=" operator - * used to decide whether values are the same or not. stavalues contains + * used to decide whether values are the same or not, and stacoll is the + * collation used (same as column's collation). stavalues contains * the K most common non-null values appearing in the column, and stanumbers * contains their frequencies (fractions of total row count). The values * shall be ordered in decreasing frequency. Note that since the arrays are @@ -171,9 +179,11 @@ typedef FormData_pg_statistic *Form_pg_statistic; /* * A "histogram" slot describes the distribution of scalar data. staop is - * the OID of the "<" operator that describes the sort ordering. (In theory, - * more than one histogram could appear, if a datatype has more than one - * useful sort operator.) stavalues contains M (>=2) non-null values that + * the OID of the "<" operator that describes the sort ordering, and stacoll + * is the relevant collation. (In theory more than one histogram could appear, + * if a datatype has more than one useful sort operator or we care about more + * than one collation. Currently the collation will always be that of the + * underlying column.) stavalues contains M (>=2) non-null values that * divide the non-null column data values into M-1 bins of approximately equal * population. The first stavalues item is the MIN and the last is the MAX. * stanumbers is not used and should be NULL. IMPORTANT POINT: if an MCV @@ -190,11 +200,12 @@ typedef FormData_pg_statistic *Form_pg_statistic; /* * A "correlation" slot describes the correlation between the physical order * of table tuples and the ordering of data values of this column, as seen - * by the "<" operator identified by staop. (As with the histogram, more - * than one entry could theoretically appear.) stavalues is not used and - * should be NULL. stanumbers contains a single entry, the correlation - * coefficient between the sequence of data values and the sequence of - * their actual tuple positions. The coefficient ranges from +1 to -1. + * by the "<" operator identified by staop with the collation identified by + * stacoll. (As with the histogram, more than one entry could theoretically + * appear.) stavalues is not used and should be NULL. stanumbers contains + * a single entry, the correlation coefficient between the sequence of data + * values and the sequence of their actual tuple positions. The coefficient + * ranges from +1 to -1. */ #define STATISTIC_KIND_CORRELATION 3 @@ -203,7 +214,8 @@ typedef FormData_pg_statistic *Form_pg_statistic; * except that it stores the most common non-null *elements* of the column * values. This is useful when the column datatype is an array or some other * type with identifiable elements (for instance, tsvector). staop contains - * the equality operator appropriate to the element type. stavalues contains + * the equality operator appropriate to the element type, and stacoll + * contains the collation to use with it. stavalues contains * the most common element values, and stanumbers their frequencies. Unlike * MCV slots, frequencies are measured as the fraction of non-null rows the * element value appears in, not the frequency of all rows. Also unlike @@ -226,7 +238,8 @@ typedef FormData_pg_statistic *Form_pg_statistic; * A "distinct elements count histogram" slot describes the distribution of * the number of distinct element values present in each row of an array-type * column. Only non-null rows are considered, and only non-null elements. - * staop contains the equality operator appropriate to the element type. + * staop contains the equality operator appropriate to the element type, + * and stacoll contains the collation to use with it. * stavalues is not used and should be NULL. The last member of stanumbers is * the average count of distinct element values over all non-null rows. The * preceding M (>=2) members form a histogram that divides the population of diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index 2f4303e40d8..dfff23ac55b 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -52,9 +52,11 @@ * careful to allocate any pointed-to data in anl_context, which will NOT * be CurrentMemoryContext when compute_stats is called. * - * Note: for the moment, all comparisons done for statistical purposes - * should use the database's default collation (DEFAULT_COLLATION_OID). - * This might change in some future release. + * Note: all comparisons done for statistical purposes should use the + * underlying column's collation (attcollation), except in situations + * where a noncollatable container type contains a collatable type; + * in that case use the type's default collation. Be sure to record + * the appropriate collation in stacoll. *---------- */ typedef struct VacAttrStats *VacAttrStatsP; @@ -78,11 +80,13 @@ typedef struct VacAttrStats * because some index opclasses store a different type than the underlying * column/expression. Instead use attrtypid, attrtypmod, and attrtype for * information about the datatype being fed to the typanalyze function. + * Likewise, use attrcollid not attr->attcollation. */ Form_pg_attribute attr; /* copy of pg_attribute row for column */ Oid attrtypid; /* type of data being analyzed */ int32 attrtypmod; /* typmod of data being analyzed */ Form_pg_type attrtype; /* copy of pg_type row for attrtypid */ + Oid attrcollid; /* collation of data being analyzed */ MemoryContext anl_context; /* where to save long-lived data */ /* @@ -103,6 +107,7 @@ typedef struct VacAttrStats float4 stadistinct; /* # distinct values */ int16 stakind[STATISTIC_NUM_SLOTS]; Oid staop[STATISTIC_NUM_SLOTS]; + Oid stacoll[STATISTIC_NUM_SLOTS]; int numnumbers[STATISTIC_NUM_SLOTS]; float4 *stanumbers[STATISTIC_NUM_SLOTS]; int numvalues[STATISTIC_NUM_SLOTS]; diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h index b3ca0c1229f..fff6bc67991 100644 --- a/src/include/statistics/extended_stats_internal.h +++ b/src/include/statistics/extended_stats_internal.h @@ -59,7 +59,7 @@ extern MVDependencies *statext_dependencies_deserialize(bytea *data); extern MultiSortSupport multi_sort_init(int ndims); extern void multi_sort_add_dimension(MultiSortSupport mss, int sortdim, - Oid oper); + Oid oper, Oid collation); extern int multi_sort_compare(const void *a, const void *b, void *arg); extern int multi_sort_compare_dim(int dim, const SortItem *a, const SortItem *b, MultiSortSupport mss); diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index ff1705ad2b8..64089930019 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -44,6 +44,7 @@ typedef struct AttStatsSlot { /* Always filled: */ Oid staop; /* Actual staop for the found slot */ + Oid stacoll; /* Actual collation for the found slot */ /* Filled if ATTSTATSSLOT_VALUES is specified: */ Oid valuetype; /* Actual datatype of the values */ Datum *values; /* slot's "values" array, or NULL if none */ diff --git a/src/include/utils/typcache.h b/src/include/utils/typcache.h index 217d064da52..2b299608cfc 100644 --- a/src/include/utils/typcache.h +++ b/src/include/utils/typcache.h @@ -41,6 +41,7 @@ typedef struct TypeCacheEntry char typtype; Oid typrelid; Oid typelem; + Oid typcollation; /* * Information obtained from opfamily entries |