aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2012-08-07 19:02:54 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2012-08-07 19:02:54 -0400
commit5ebaaa49445eb1ba7b299bbea3a477d4e4c0430b (patch)
tree1a72c939a655e9acbba1c71a1831dd38ee41db95 /src/backend/optimizer/path
parent5078be480412790e4f1b2aeda04f8c65fc7a3b93 (diff)
downloadpostgresql-5ebaaa49445eb1ba7b299bbea3a477d4e4c0430b.tar.gz
postgresql-5ebaaa49445eb1ba7b299bbea3a477d4e4c0430b.zip
Implement SQL-standard LATERAL subqueries.
This patch implements the standard syntax of LATERAL attached to a sub-SELECT in FROM, and also allows LATERAL attached to a function in FROM, since set-returning function calls are expected to be one of the principal use-cases. The main change here is a rewrite of the mechanism for keeping track of which relations are visible for column references while the FROM clause is being scanned. The parser "namespace" lists are no longer lists of bare RTEs, but are lists of ParseNamespaceItem structs, which carry an RTE pointer as well as some visibility-controlling flags. Aside from supporting LATERAL correctly, this lets us get rid of the ancient hacks that required rechecking subqueries and JOIN/ON and function-in-FROM expressions for invalid references after they were initially parsed. Invalid column references are now always correctly detected on sight. In passing, remove assorted parser error checks that are now dead code by virtue of our having gotten rid of add_missing_from, as well as some comments that are obsolete for the same reason. (It was mainly add_missing_from that caused so much fudging here in the first place.) The planner support for this feature is very minimal, and will be improved in future patches. It works well enough for testing purposes, though. catversion bump forced due to new field in RangeTblEntry.
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r--src/backend/optimizer/path/allpaths.c84
-rw-r--r--src/backend/optimizer/path/costsize.c20
-rw-r--r--src/backend/optimizer/path/joinpath.c30
3 files changed, 112 insertions, 22 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index f02954982a7..bfda05394d6 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -253,8 +253,9 @@ set_rel_size(PlannerInfo *root, RelOptInfo *rel,
case RTE_SUBQUERY:
/*
- * Subqueries don't support parameterized paths, so just go
- * ahead and build their paths immediately.
+ * Subqueries don't support making a choice between
+ * parameterized and unparameterized paths, so just go ahead
+ * and build their paths immediately.
*/
set_subquery_pathlist(root, rel, rti, rte);
break;
@@ -698,6 +699,10 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
if (IS_DUMMY_REL(childrel))
continue;
+ /* XXX need to figure out what to do for LATERAL */
+ if (childrel->cheapest_total_path == NULL)
+ elog(ERROR, "LATERAL within an append relation is not supported yet");
+
/*
* Child is live, so add its cheapest access path to the Append path
* we are constructing for the parent.
@@ -906,6 +911,10 @@ generate_mergeappend_paths(PlannerInfo *root, RelOptInfo *rel,
*/
if (cheapest_startup == NULL || cheapest_total == NULL)
{
+ /* XXX need to figure out what to do for LATERAL */
+ if (childrel->cheapest_total_path == NULL)
+ elog(ERROR, "LATERAL within an append relation is not supported yet");
+
cheapest_startup = cheapest_total =
childrel->cheapest_total_path;
Assert(cheapest_total != NULL);
@@ -1012,8 +1021,13 @@ has_multiple_baserels(PlannerInfo *root)
* set_subquery_pathlist
* Build the (single) access path for a subquery RTE
*
- * There's no need for a separate set_subquery_size phase, since we don't
- * support parameterized paths for subqueries.
+ * We don't currently support generating parameterized paths for subqueries
+ * by pushing join clauses down into them; it seems too expensive to re-plan
+ * the subquery multiple times to consider different alternatives. So the
+ * subquery will have exactly one path. (The path will be parameterized
+ * if the subquery contains LATERAL references, otherwise not.) Since there's
+ * no freedom of action here, there's no need for a separate set_subquery_size
+ * phase: we just make the path right away.
*/
static void
set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
@@ -1021,6 +1035,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
{
Query *parse = root->parse;
Query *subquery = rte->subquery;
+ Relids required_outer;
bool *differentTypes;
double tuple_fraction;
PlannerInfo *subroot;
@@ -1033,6 +1048,20 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
*/
subquery = copyObject(subquery);
+ /*
+ * If it's a LATERAL subquery, it might contain some Vars of the current
+ * query level, requiring it to be treated as parameterized.
+ */
+ if (rte->lateral)
+ {
+ required_outer = pull_varnos_of_level((Node *) subquery, 1);
+ /* Enforce convention that empty required_outer is exactly NULL */
+ if (bms_is_empty(required_outer))
+ required_outer = NULL;
+ }
+ else
+ required_outer = NULL;
+
/* We need a workspace for keeping track of set-op type coercions */
differentTypes = (bool *)
palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
@@ -1051,10 +1080,9 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
* pseudoconstant clauses; better to have the gating node above the
* subquery.
*
- * Also, if the sub-query has "security_barrier" flag, it means the
+ * Also, if the sub-query has the "security_barrier" flag, it means the
* sub-query originated from a view that must enforce row-level security.
- * We must not push down quals in order to avoid information leaks, either
- * via side-effects or error output.
+ * Then we must not push down quals that contain leaky functions.
*
* Non-pushed-down clauses will get evaluated as qpquals of the
* SubqueryScan node.
@@ -1134,7 +1162,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
pathkeys = convert_subquery_pathkeys(root, rel, subroot->query_pathkeys);
/* Generate appropriate path */
- add_path(rel, create_subqueryscan_path(root, rel, pathkeys, NULL));
+ add_path(rel, create_subqueryscan_path(root, rel, pathkeys, required_outer));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);
@@ -1143,12 +1171,32 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
/*
* set_function_pathlist
* Build the (single) access path for a function RTE
+ *
+ * As with subqueries, a function RTE's path might be parameterized due to
+ * LATERAL references, but that's inherent in the function expression and
+ * not a result of pushing down join quals.
*/
static void
set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
+ Relids required_outer;
+
+ /*
+ * If it's a LATERAL function, it might contain some Vars of the current
+ * query level, requiring it to be treated as parameterized.
+ */
+ if (rte->lateral)
+ {
+ required_outer = pull_varnos_of_level(rte->funcexpr, 0);
+ /* Enforce convention that empty required_outer is exactly NULL */
+ if (bms_is_empty(required_outer))
+ required_outer = NULL;
+ }
+ else
+ required_outer = NULL;
+
/* Generate appropriate path */
- add_path(rel, create_functionscan_path(root, rel));
+ add_path(rel, create_functionscan_path(root, rel, required_outer));
/* Select cheapest path (pretty easy in this case...) */
set_cheapest(rel);
@@ -1157,6 +1205,10 @@ set_function_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
/*
* set_values_pathlist
* Build the (single) access path for a VALUES RTE
+ *
+ * There can be no need for a parameterized path here. (Although the SQL
+ * spec does allow LATERAL (VALUES (x)), the parser will transform that
+ * into a subquery, so it doesn't end up here.)
*/
static void
set_values_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
@@ -1988,10 +2040,16 @@ debug_print_rel(PlannerInfo *root, RelOptInfo *rel)
printf("\tpath list:\n");
foreach(l, rel->pathlist)
print_path(root, lfirst(l), 1);
- printf("\n\tcheapest startup path:\n");
- print_path(root, rel->cheapest_startup_path, 1);
- printf("\n\tcheapest total path:\n");
- print_path(root, rel->cheapest_total_path, 1);
+ if (rel->cheapest_startup_path)
+ {
+ printf("\n\tcheapest startup path:\n");
+ print_path(root, rel->cheapest_startup_path, 1);
+ }
+ if (rel->cheapest_total_path)
+ {
+ printf("\n\tcheapest total path:\n");
+ print_path(root, rel->cheapest_total_path, 1);
+ }
printf("\n");
fflush(stdout);
}
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 875c611ab50..d3f04eea4b1 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -989,12 +989,17 @@ cost_subqueryscan(Path *path, PlannerInfo *root,
/*
* cost_functionscan
* Determines and returns the cost of scanning a function RTE.
+ *
+ * 'baserel' is the relation to be scanned
+ * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
*/
void
-cost_functionscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
+cost_functionscan(Path *path, PlannerInfo *root,
+ RelOptInfo *baserel, ParamPathInfo *param_info)
{
Cost startup_cost = 0;
Cost run_cost = 0;
+ QualCost qpqual_cost;
Cost cpu_per_tuple;
RangeTblEntry *rte;
QualCost exprcost;
@@ -1004,8 +1009,11 @@ cost_functionscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
rte = planner_rt_fetch(baserel->relid, root);
Assert(rte->rtekind == RTE_FUNCTION);
- /* functionscans are never parameterized */
- path->rows = baserel->rows;
+ /* Mark the path with the correct row estimate */
+ if (param_info)
+ path->rows = param_info->ppi_rows;
+ else
+ path->rows = baserel->rows;
/*
* Estimate costs of executing the function expression.
@@ -1025,8 +1033,10 @@ cost_functionscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
startup_cost += exprcost.startup + exprcost.per_tuple;
/* Add scanning CPU costs */
- startup_cost += baserel->baserestrictcost.startup;
- cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
+ get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
+
+ startup_cost += qpqual_cost.startup;
+ cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
run_cost += cpu_per_tuple * baserel->tuples;
path->startup_cost = startup_cost;
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 65f86194e15..fe0e4d7c201 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -491,12 +491,18 @@ sort_inner_and_outer(PlannerInfo *root,
* explosion of mergejoin paths of dubious value. This interacts with
* decisions elsewhere that also discriminate against mergejoins with
* parameterized inputs; see comments in src/backend/optimizer/README.
- *
- * If unique-ification is requested, do it and then handle as a plain
- * inner join.
*/
outer_path = outerrel->cheapest_total_path;
inner_path = innerrel->cheapest_total_path;
+
+ /* Punt if either rel has only parameterized paths */
+ if (!outer_path || !inner_path)
+ return;
+
+ /*
+ * If unique-ification is requested, do it and then handle as a plain
+ * inner join.
+ */
if (jointype == JOIN_UNIQUE_OUTER)
{
outer_path = (Path *) create_unique_path(root, outerrel,
@@ -696,6 +702,10 @@ match_unsorted_outer(PlannerInfo *root,
*/
if (save_jointype == JOIN_UNIQUE_INNER)
{
+ /* XXX for the moment, don't crash on LATERAL --- rethink this */
+ if (inner_cheapest_total == NULL)
+ return;
+
inner_cheapest_total = (Path *)
create_unique_path(root, innerrel, inner_cheapest_total, sjinfo);
Assert(inner_cheapest_total);
@@ -707,7 +717,7 @@ match_unsorted_outer(PlannerInfo *root,
* enable_material is off or the path in question materializes its
* output anyway.
*/
- if (enable_material &&
+ if (enable_material && inner_cheapest_total != NULL &&
!ExecMaterializesOutput(inner_cheapest_total->pathtype))
matpath = (Path *)
create_material_path(innerrel, inner_cheapest_total);
@@ -735,6 +745,8 @@ match_unsorted_outer(PlannerInfo *root,
* If we need to unique-ify the outer path, it's pointless to consider
* any but the cheapest outer. (XXX we don't consider parameterized
* outers, nor inners, for unique-ified cases. Should we?)
+ *
+ * XXX does nothing for LATERAL, rethink
*/
if (save_jointype == JOIN_UNIQUE_OUTER)
{
@@ -814,6 +826,10 @@ match_unsorted_outer(PlannerInfo *root,
if (save_jointype == JOIN_UNIQUE_OUTER)
continue;
+ /* Can't do anything else if inner has no unparameterized paths */
+ if (!inner_cheapest_total)
+ continue;
+
/* Look for useful mergeclauses (if any) */
mergeclauses = find_mergeclauses_for_pathkeys(root,
outerpath->pathkeys,
@@ -1092,6 +1108,12 @@ hash_inner_and_outer(PlannerInfo *root,
Path *cheapest_total_outer = outerrel->cheapest_total_path;
Path *cheapest_total_inner = innerrel->cheapest_total_path;
+ /* Punt if either rel has only parameterized paths */
+ if (!cheapest_startup_outer ||
+ !cheapest_total_outer ||
+ !cheapest_total_inner)
+ return;
+
/* Unique-ify if need be; we ignore parameterized possibilities */
if (jointype == JOIN_UNIQUE_OUTER)
{