aboutsummaryrefslogtreecommitdiff
path: root/src/backend/commands/tablecmds.c
diff options
context:
space:
mode:
authorTomas Vondra <tomas.vondra@postgresql.org>2021-03-26 23:22:01 +0100
committerTomas Vondra <tomas.vondra@postgresql.org>2021-03-27 00:01:11 +0100
commita4d75c86bf15220df22de0a92c819ecef9db3849 (patch)
treea736a68b1c3f022590a886b7bac45276f1f490a6 /src/backend/commands/tablecmds.c
parent98376c18f12e562421b5c77e619248e8b7aae3c6 (diff)
downloadpostgresql-a4d75c86bf15220df22de0a92c819ecef9db3849.tar.gz
postgresql-a4d75c86bf15220df22de0a92c819ecef9db3849.zip
Extended statistics on expressions
Allow defining extended statistics on expressions, not just just on simple column references. With this commit, expressions are supported by all existing extended statistics kinds, improving the same types of estimates. A simple example may look like this: CREATE TABLE t (a int); CREATE STATISTICS s ON mod(a,10), mod(a,20) FROM t; ANALYZE t; The collected statistics are useful e.g. to estimate queries with those expressions in WHERE or GROUP BY clauses: SELECT * FROM t WHERE mod(a,10) = 0 AND mod(a,20) = 0; SELECT 1 FROM t GROUP BY mod(a,10), mod(a,20); This introduces new internal statistics kind 'e' (expressions) which is built automatically when the statistics object definition includes any expressions. This represents single-expression statistics, as if there was an expression index (but without the index maintenance overhead). The statistics is stored in pg_statistics_ext_data as an array of composite types, which is possible thanks to 79f6a942bd. CREATE STATISTICS allows building statistics on a single expression, in which case in which case it's not possible to specify statistics kinds. A new system view pg_stats_ext_exprs can be used to display expression statistics, similarly to pg_stats and pg_stats_ext views. ALTER TABLE ... ALTER COLUMN ... TYPE now treats indexes the same way it treats indexes, i.e. it drops and recreates the statistics. This means all statistics are reset, and we no longer try to preserve at least the functional dependencies. This should not be a major issue in practice, as the functional dependencies actually rely on per-column statistics, which were always reset anyway. Author: Tomas Vondra Reviewed-by: Justin Pryzby, Dean Rasheed, Zhihong Yu Discussion: https://postgr.es/m/ad7891d2-e90c-b446-9fe2-7419143847d7%40enterprisedb.com
Diffstat (limited to 'src/backend/commands/tablecmds.c')
-rw-r--r--src/backend/commands/tablecmds.c104
1 files changed, 98 insertions, 6 deletions
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index efac06f72c7..88a68a4697a 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -41,6 +41,7 @@
#include "catalog/pg_namespace.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_tablespace.h"
+#include "catalog/pg_statistic_ext.h"
#include "catalog/pg_trigger.h"
#include "catalog/pg_type.h"
#include "catalog/storage.h"
@@ -188,6 +189,8 @@ typedef struct AlteredTableInfo
List *changedIndexDefs; /* string definitions of same */
char *replicaIdentityIndex; /* index to reset as REPLICA IDENTITY */
char *clusterOnIndex; /* index to use for CLUSTER */
+ List *changedStatisticsOids; /* OIDs of statistics to rebuild */
+ List *changedStatisticsDefs; /* string definitions of same */
} AlteredTableInfo;
/* Struct describing one new constraint to check in Phase 3 scan */
@@ -440,6 +443,8 @@ static ObjectAddress ATExecDropColumn(List **wqueue, Relation rel, const char *c
ObjectAddresses *addrs);
static ObjectAddress ATExecAddIndex(AlteredTableInfo *tab, Relation rel,
IndexStmt *stmt, bool is_rebuild, LOCKMODE lockmode);
+static ObjectAddress ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
+ CreateStatsStmt *stmt, bool is_rebuild, LOCKMODE lockmode);
static ObjectAddress ATExecAddConstraint(List **wqueue,
AlteredTableInfo *tab, Relation rel,
Constraint *newConstraint, bool recurse, bool is_readd,
@@ -496,6 +501,7 @@ static ObjectAddress ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel,
AlterTableCmd *cmd, LOCKMODE lockmode);
static void RememberConstraintForRebuilding(Oid conoid, AlteredTableInfo *tab);
static void RememberIndexForRebuilding(Oid indoid, AlteredTableInfo *tab);
+static void RememberStatisticsForRebuilding(Oid indoid, AlteredTableInfo *tab);
static void ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab,
LOCKMODE lockmode);
static void ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId,
@@ -4756,6 +4762,10 @@ ATExecCmd(List **wqueue, AlteredTableInfo *tab,
address = ATExecAddIndex(tab, rel, (IndexStmt *) cmd->def, true,
lockmode);
break;
+ case AT_ReAddStatistics: /* ADD STATISTICS */
+ address = ATExecAddStatistics(tab, rel, (CreateStatsStmt *) cmd->def,
+ true, lockmode);
+ break;
case AT_AddConstraint: /* ADD CONSTRAINT */
/* Transform the command only during initial examination */
if (cur_pass == AT_PASS_ADD_CONSTR)
@@ -8284,6 +8294,29 @@ ATExecAddIndex(AlteredTableInfo *tab, Relation rel,
}
/*
+ * ALTER TABLE ADD STATISTICS
+ *
+ * This is no such command in the grammar, but we use this internally to add
+ * AT_ReAddStatistics subcommands to rebuild extended statistics after a table
+ * column type change.
+ */
+static ObjectAddress
+ATExecAddStatistics(AlteredTableInfo *tab, Relation rel,
+ CreateStatsStmt *stmt, bool is_rebuild, LOCKMODE lockmode)
+{
+ ObjectAddress address;
+
+ Assert(IsA(stmt, CreateStatsStmt));
+
+ /* The CreateStatsStmt has already been through transformStatsStmt */
+ Assert(stmt->transformed);
+
+ address = CreateStatistics(stmt);
+
+ return address;
+}
+
+/*
* ALTER TABLE ADD CONSTRAINT USING INDEX
*
* Returns the address of the new constraint.
@@ -11830,9 +11863,7 @@ ATExecAlterColumnType(AlteredTableInfo *tab, Relation rel,
* Give the extended-stats machinery a chance to fix anything
* that this column type change would break.
*/
- UpdateStatisticsForTypeChange(foundObject.objectId,
- RelationGetRelid(rel), attnum,
- attTup->atttypid, targettype);
+ RememberStatisticsForRebuilding(foundObject.objectId, tab);
break;
case OCLASS_PROC:
@@ -12203,6 +12234,32 @@ RememberIndexForRebuilding(Oid indoid, AlteredTableInfo *tab)
}
/*
+ * Subroutine for ATExecAlterColumnType: remember that a statistics object
+ * needs to be rebuilt (which we might already know).
+ */
+static void
+RememberStatisticsForRebuilding(Oid stxoid, AlteredTableInfo *tab)
+{
+ /*
+ * This de-duplication check is critical for two independent reasons: we
+ * mustn't try to recreate the same statistics object twice, and if the
+ * statistics depends on more than one column whose type is to be altered,
+ * we must capture its definition string before applying any of the type
+ * changes. ruleutils.c will get confused if we ask again later.
+ */
+ if (!list_member_oid(tab->changedStatisticsOids, stxoid))
+ {
+ /* OK, capture the index's existing definition string */
+ char *defstring = pg_get_statisticsobjdef_string(stxoid);
+
+ tab->changedStatisticsOids = lappend_oid(tab->changedStatisticsOids,
+ stxoid);
+ tab->changedStatisticsDefs = lappend(tab->changedStatisticsDefs,
+ defstring);
+ }
+}
+
+/*
* Cleanup after we've finished all the ALTER TYPE operations for a
* particular relation. We have to drop and recreate all the indexes
* and constraints that depend on the altered columns. We do the
@@ -12306,6 +12363,22 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
add_exact_object_address(&obj, objects);
}
+ /* add dependencies for new statistics */
+ forboth(oid_item, tab->changedStatisticsOids,
+ def_item, tab->changedStatisticsDefs)
+ {
+ Oid oldId = lfirst_oid(oid_item);
+ Oid relid;
+
+ relid = StatisticsGetRelation(oldId, false);
+ ATPostAlterTypeParse(oldId, relid, InvalidOid,
+ (char *) lfirst(def_item),
+ wqueue, lockmode, tab->rewrite);
+
+ ObjectAddressSet(obj, StatisticExtRelationId, oldId);
+ add_exact_object_address(&obj, objects);
+ }
+
/*
* Queue up command to restore replica identity index marking
*/
@@ -12354,9 +12427,9 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
}
/*
- * Parse the previously-saved definition string for a constraint or index
- * against the newly-established column data type(s), and queue up the
- * resulting command parsetrees for execution.
+ * Parse the previously-saved definition string for a constraint, index or
+ * statistics object against the newly-established column data type(s), and
+ * queue up the resulting command parsetrees for execution.
*
* This might fail if, for example, you have a WHERE clause that uses an
* operator that's not available for the new column type.
@@ -12402,6 +12475,11 @@ ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd,
querytree_list = lappend(querytree_list, stmt);
querytree_list = list_concat(querytree_list, afterStmts);
}
+ else if (IsA(stmt, CreateStatsStmt))
+ querytree_list = lappend(querytree_list,
+ transformStatsStmt(oldRelId,
+ (CreateStatsStmt *) stmt,
+ cmd));
else
querytree_list = lappend(querytree_list, stmt);
}
@@ -12540,6 +12618,20 @@ ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd,
elog(ERROR, "unexpected statement subtype: %d",
(int) stmt->subtype);
}
+ else if (IsA(stm, CreateStatsStmt))
+ {
+ CreateStatsStmt *stmt = (CreateStatsStmt *) stm;
+ AlterTableCmd *newcmd;
+
+ /* keep the statistics object's comment */
+ stmt->stxcomment = GetComment(oldId, StatisticExtRelationId, 0);
+
+ newcmd = makeNode(AlterTableCmd);
+ newcmd->subtype = AT_ReAddStatistics;
+ newcmd->def = (Node *) stmt;
+ tab->subcmds[AT_PASS_MISC] =
+ lappend(tab->subcmds[AT_PASS_MISC], newcmd);
+ }
else
elog(ERROR, "unexpected statement type: %d",
(int) nodeTag(stm));