aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/plan/planner.c
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2024-05-21 12:42:27 -0400
committerRobert Haas <rhaas@postgresql.org>2024-05-21 12:44:51 -0400
commit12933dc6048902ba891f9572cab96981f50ef669 (patch)
tree3e14dabda4e36866611d8e2627b3925b011c00e3 /src/backend/optimizer/plan/planner.c
parent3bd7b2f465deb48c0d37ed6a41bd864771f44390 (diff)
downloadpostgresql-12933dc6048902ba891f9572cab96981f50ef669.tar.gz
postgresql-12933dc6048902ba891f9572cab96981f50ef669.zip
Re-allow planner to use Merge Append to efficiently implement UNION.
This reverts commit 7204f35919b7e021e8d1bc9f2d76fd6bfcdd2070, thus restoring 66c0185a3 (Allow planner to use Merge Append to efficiently implement UNION) as well as the follow-on commits d5d2205c8, 3b1a7eb28, 7487044d6. Per further discussion on pgsql-release, we wish to ship beta1 with this feature, and patch the bug that was found just before wrap, rather than shipping beta1 with the feature reverted.
Diffstat (limited to 'src/backend/optimizer/plan/planner.c')
-rw-r--r--src/backend/optimizer/plan/planner.c116
1 files changed, 95 insertions, 21 deletions
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index ddd23878409..032818423f6 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -54,6 +54,7 @@
#include "optimizer/tlist.h"
#include "parser/analyze.h"
#include "parser/parse_agg.h"
+#include "parser/parse_clause.h"
#include "parser/parse_relation.h"
#include "parser/parsetree.h"
#include "partitioning/partdesc.h"
@@ -119,12 +120,15 @@ typedef struct
{
List *activeWindows; /* active windows, if any */
grouping_sets_data *gset_data; /* grouping sets data, if any */
+ SetOperationStmt *setop; /* parent set operation or NULL if not a
+ * subquery belonging to a set operation */
} standard_qp_extra;
/* Local functions */
static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
-static void grouping_planner(PlannerInfo *root, double tuple_fraction);
+static void grouping_planner(PlannerInfo *root, double tuple_fraction,
+ SetOperationStmt *setops);
static grouping_sets_data *preprocess_grouping_sets(PlannerInfo *root);
static List *remap_to_groupclause_idx(List *groupClause, List *gsets,
int *tleref_to_colnum_map);
@@ -249,6 +253,8 @@ static bool group_by_has_partkey(RelOptInfo *input_rel,
List *targetList,
List *groupClause);
static int common_prefix_cmp(const void *a, const void *b);
+static List *generate_setop_child_grouplist(SetOperationStmt *op,
+ List *targetlist);
/*****************************************************************************
@@ -406,8 +412,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
}
/* primary planning entry point (may recurse for subqueries) */
- root = subquery_planner(glob, parse, NULL,
- false, tuple_fraction);
+ root = subquery_planner(glob, parse, NULL, false, tuple_fraction, NULL);
/* Select best Path and turn it into a Plan */
final_rel = fetch_upper_rel(root, UPPERREL_FINAL, NULL);
@@ -598,6 +603,10 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
* hasRecursion is true if this is a recursive WITH query.
* tuple_fraction is the fraction of tuples we expect will be retrieved.
* tuple_fraction is interpreted as explained for grouping_planner, below.
+ * setops is used for set operation subqueries to provide the subquery with
+ * the context in which it's being used so that Paths correctly sorted for the
+ * set operation can be generated. NULL when not planning a set operation
+ * child.
*
* Basically, this routine does the stuff that should only be done once
* per Query object. It then calls grouping_planner. At one time,
@@ -616,9 +625,9 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
*--------------------
*/
PlannerInfo *
-subquery_planner(PlannerGlobal *glob, Query *parse,
- PlannerInfo *parent_root,
- bool hasRecursion, double tuple_fraction)
+subquery_planner(PlannerGlobal *glob, Query *parse, PlannerInfo *parent_root,
+ bool hasRecursion, double tuple_fraction,
+ SetOperationStmt *setops)
{
PlannerInfo *root;
List *newWithCheckOptions;
@@ -1077,7 +1086,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
/*
* Do the main planning.
*/
- grouping_planner(root, tuple_fraction);
+ grouping_planner(root, tuple_fraction, setops);
/*
* Capture the set of outer-level param IDs we have access to, for use in
@@ -1275,7 +1284,11 @@ preprocess_phv_expression(PlannerInfo *root, Expr *expr)
* 0 < tuple_fraction < 1: expect the given fraction of tuples available
* from the plan to be retrieved
* tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
- * expected to be retrieved (ie, a LIMIT specification)
+ * expected to be retrieved (ie, a LIMIT specification).
+ * setops is used for set operation subqueries to provide the subquery with
+ * the context in which it's being used so that Paths correctly sorted for the
+ * set operation can be generated. NULL when not planning a set operation
+ * child.
*
* Returns nothing; the useful output is in the Paths we attach to the
* (UPPERREL_FINAL, NULL) upperrel in *root. In addition,
@@ -1286,7 +1299,8 @@ preprocess_phv_expression(PlannerInfo *root, Expr *expr)
*--------------------
*/
static void
-grouping_planner(PlannerInfo *root, double tuple_fraction)
+grouping_planner(PlannerInfo *root, double tuple_fraction,
+ SetOperationStmt *setops)
{
Query *parse = root->parse;
int64 offset_est = 0;
@@ -1322,17 +1336,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
if (parse->setOperations)
{
/*
- * If there's a top-level ORDER BY, assume we have to fetch all the
- * tuples. This might be too simplistic given all the hackery below
- * to possibly avoid the sort; but the odds of accurate estimates here
- * are pretty low anyway. XXX try to get rid of this in favor of
- * letting plan_set_operations generate both fast-start and
- * cheapest-total paths.
- */
- if (parse->sortClause)
- root->tuple_fraction = 0.0;
-
- /*
* Construct Paths for set operations. The results will not need any
* work except perhaps a top-level sort and/or LIMIT. Note that any
* special work for recursive unions is the responsibility of
@@ -1502,6 +1505,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
qp_extra.gset_data = gset_data;
/*
+ * If we're a subquery for a set operation, store the SetOperationStmt
+ * in qp_extra.
+ */
+ qp_extra.setop = setops;
+
+ /*
* Generate the best unsorted and presorted paths for the scan/join
* portion of this Query, ie the processing represented by the
* FROM/WHERE clauses. (Note there may not be any presorted paths.)
@@ -3453,6 +3462,27 @@ standard_qp_callback(PlannerInfo *root, void *extra)
parse->sortClause,
tlist);
+ /* setting setop_pathkeys might be useful to the union planner */
+ if (qp_extra->setop != NULL &&
+ set_operation_ordered_results_useful(qp_extra->setop))
+ {
+ List *groupClauses;
+ bool sortable;
+
+ groupClauses = generate_setop_child_grouplist(qp_extra->setop, tlist);
+
+ root->setop_pathkeys =
+ make_pathkeys_for_sortclauses_extended(root,
+ &groupClauses,
+ tlist,
+ false,
+ &sortable);
+ if (!sortable)
+ root->setop_pathkeys = NIL;
+ }
+ else
+ root->setop_pathkeys = NIL;
+
/*
* Figure out whether we want a sorted result from query_planner.
*
@@ -3462,7 +3492,9 @@ standard_qp_callback(PlannerInfo *root, void *extra)
* sortable DISTINCT clause that's more rigorous than the ORDER BY clause,
* we try to produce output that's sufficiently well sorted for the
* DISTINCT. Otherwise, if there is an ORDER BY clause, we want to sort
- * by the ORDER BY clause.
+ * by the ORDER BY clause. Otherwise, if we're a subquery being planned
+ * for a set operation which can benefit from presorted results and have a
+ * sortable targetlist, we want to sort by the target list.
*
* Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a superset
* of GROUP BY, it would be tempting to request sort by ORDER BY --- but
@@ -3480,6 +3512,8 @@ standard_qp_callback(PlannerInfo *root, void *extra)
root->query_pathkeys = root->distinct_pathkeys;
else if (root->sort_pathkeys)
root->query_pathkeys = root->sort_pathkeys;
+ else if (root->setop_pathkeys != NIL)
+ root->query_pathkeys = root->setop_pathkeys;
else
root->query_pathkeys = NIL;
}
@@ -7923,3 +7957,43 @@ group_by_has_partkey(RelOptInfo *input_rel,
return true;
}
+
+/*
+ * generate_setop_child_grouplist
+ * Build a SortGroupClause list defining the sort/grouping properties
+ * of the child of a set operation.
+ *
+ * This is similar to generate_setop_grouplist() but differs as the setop
+ * child query's targetlist entries may already have a tleSortGroupRef
+ * assigned for other purposes, such as GROUP BYs. Here we keep the
+ * SortGroupClause list in the same order as 'op' groupClauses and just adjust
+ * the tleSortGroupRef to reference the TargetEntry's 'ressortgroupref'.
+ */
+static List *
+generate_setop_child_grouplist(SetOperationStmt *op, List *targetlist)
+{
+ List *grouplist = copyObject(op->groupClauses);
+ ListCell *lg;
+ ListCell *lt;
+
+ lg = list_head(grouplist);
+ foreach(lt, targetlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(lt);
+ SortGroupClause *sgc;
+
+ /* resjunk columns could have sortgrouprefs. Leave these alone */
+ if (tle->resjunk)
+ continue;
+
+ /* we expect every non-resjunk target to have a SortGroupClause */
+ Assert(lg != NULL);
+ sgc = (SortGroupClause *) lfirst(lg);
+ lg = lnext(grouplist, lg);
+
+ /* assign a tleSortGroupRef, or reuse the existing one */
+ sgc->tleSortGroupRef = assignSortGroupRef(tle, targetlist);
+ }
+ Assert(lg == NULL);
+ return grouplist;
+}