aboutsummaryrefslogtreecommitdiff
path: root/contrib/postgres_fdw/postgres_fdw.c
diff options
context:
space:
mode:
authorEtsuro Fujita <efujita@postgresql.org>2019-04-02 20:30:45 +0900
committerEtsuro Fujita <efujita@postgresql.org>2019-04-02 20:30:45 +0900
commitd50d172e517c1d2aabff3ceb3ad3113b909c5017 (patch)
tree7a65adb1310d1fa0f08effc93ee75e7558cc8d9f /contrib/postgres_fdw/postgres_fdw.c
parentaef65db6769e3f2c855dd89edcf95a536a9ab74f (diff)
downloadpostgresql-d50d172e517c1d2aabff3ceb3ad3113b909c5017.tar.gz
postgresql-d50d172e517c1d2aabff3ceb3ad3113b909c5017.zip
postgres_fdw: Perform the (FINAL, NULL) upperrel operations remotely.
The upper-planner pathification allows FDWs to arrange to push down different types of upper-stage operations to the remote side. This commit teaches postgres_fdw to do it for the (FINAL, NULL) upperrel, which is responsible for doing LockRows, LIMIT, and/or ModifyTable. This provides the ability for postgres_fdw to handle SELECT commands so that it 1) skips the LockRows step (if any) (note that this is safe since it performs early locking) and 2) pushes down the LIMIT and/or OFFSET restrictions (if any) to the remote side. This doesn't handle the INSERT/UPDATE/DELETE cases. Author: Etsuro Fujita Reviewed-By: Antonin Houska and Jeff Janes Discussion: https://postgr.es/m/87pnz1aby9.fsf@news-spur.riddles.org.uk
Diffstat (limited to 'contrib/postgres_fdw/postgres_fdw.c')
-rw-r--r--contrib/postgres_fdw/postgres_fdw.c342
1 files changed, 327 insertions, 15 deletions
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 1b37332cda3..db62caf6d9f 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -251,11 +251,14 @@ typedef struct PgFdwAnalyzeState
* We store:
*
* 1) Boolean flag showing if the remote query has the final sort
+ * 2) Boolean flag showing if the remote query has the LIMIT clause
*/
enum FdwPathPrivateIndex
{
/* has-final-sort flag (as an integer Value node) */
- FdwPathPrivateHasFinalSort
+ FdwPathPrivateHasFinalSort,
+ /* has-limit flag (as an integer Value node) */
+ FdwPathPrivateHasLimit
};
/* Struct for extra information passed to estimate_path_cost_size() */
@@ -263,6 +266,10 @@ typedef struct
{
PathTarget *target;
bool has_final_sort;
+ bool has_limit;
+ double limit_tuples;
+ int64 count_est;
+ int64 offset_est;
} PgFdwPathExtraData;
/*
@@ -400,6 +407,7 @@ static void adjust_foreign_grouping_path_cost(PlannerInfo *root,
List *pathkeys,
double retrieved_rows,
double width,
+ double limit_tuples,
Cost *p_startup_cost,
Cost *p_run_cost);
static bool ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel,
@@ -481,6 +489,10 @@ static void add_foreign_grouping_paths(PlannerInfo *root,
static void add_foreign_ordered_paths(PlannerInfo *root,
RelOptInfo *input_rel,
RelOptInfo *ordered_rel);
+static void add_foreign_final_paths(PlannerInfo *root,
+ RelOptInfo *input_rel,
+ RelOptInfo *final_rel,
+ FinalPathExtraData *extra);
static void apply_server_options(PgFdwRelationInfo *fpinfo);
static void apply_table_options(PgFdwRelationInfo *fpinfo);
static void merge_fdw_options(PgFdwRelationInfo *fpinfo,
@@ -1183,14 +1195,19 @@ postgresGetForeignPlan(PlannerInfo *root,
List *retrieved_attrs;
StringInfoData sql;
bool has_final_sort = false;
+ bool has_limit = false;
ListCell *lc;
/*
* Get FDW private data created by postgresGetForeignUpperPaths(), if any.
*/
if (best_path->fdw_private)
+ {
has_final_sort = intVal(list_nth(best_path->fdw_private,
FdwPathPrivateHasFinalSort));
+ has_limit = intVal(list_nth(best_path->fdw_private,
+ FdwPathPrivateHasLimit));
+ }
if (IS_SIMPLE_REL(foreignrel))
{
@@ -1340,7 +1357,7 @@ postgresGetForeignPlan(PlannerInfo *root,
initStringInfo(&sql);
deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist,
remote_exprs, best_path->path.pathkeys,
- has_final_sort, false,
+ has_final_sort, has_limit, false,
&retrieved_attrs, &params_list);
/* Remember remote_exprs for possible use by postgresPlanDirectModify */
@@ -2526,7 +2543,7 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es)
* param_join_conds are the parameterization clauses with outer relations.
* pathkeys specify the expected sort order if any for given path being costed.
* fpextra specifies additional post-scan/join-processing steps such as the
- * final sort.
+ * final sort and the LIMIT restriction.
*
* The function returns the cost and size estimates in p_row, p_width,
* p_startup_cost and p_total_cost variables.
@@ -2603,6 +2620,7 @@ estimate_path_cost_size(PlannerInfo *root,
deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist,
remote_conds, pathkeys,
fpextra ? fpextra->has_final_sort : false,
+ fpextra ? fpextra->has_limit : false,
false, &retrieved_attrs, NULL);
/* Get the remote estimate */
@@ -2670,15 +2688,34 @@ estimate_path_cost_size(PlannerInfo *root,
retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
/*
- * We will come here again and again with different set of pathkeys
- * that caller wants to cost. We don't need to calculate the costs of
- * the underlying scan, join, or grouping each time. Instead, use the
- * costs if we have cached them already.
+ * We will come here again and again with different set of pathkeys or
+ * additional post-scan/join-processing steps that caller wants to
+ * cost. We don't need to calculate the costs of the underlying scan,
+ * join, or grouping each time. Instead, use the costs if we have
+ * cached them already.
*/
if (fpinfo->rel_startup_cost >= 0 && fpinfo->rel_total_cost >= 0)
{
startup_cost = fpinfo->rel_startup_cost;
run_cost = fpinfo->rel_total_cost - fpinfo->rel_startup_cost;
+
+ /*
+ * If we estimate the costs of a foreign scan or a foreign join
+ * with additional post-scan/join-processing steps, the scan or
+ * join costs obtained from the cache wouldn't yet contain the
+ * eval costs for the final scan/join target, which would've been
+ * updated by apply_scanjoin_target_to_paths(); add the eval costs
+ * now.
+ */
+ if (fpextra && !IS_UPPER_REL(foreignrel))
+ {
+ /* Shouldn't get here unless we have LIMIT */
+ Assert(fpextra->has_limit);
+ Assert(foreignrel->reloptkind == RELOPT_BASEREL ||
+ foreignrel->reloptkind == RELOPT_JOINREL);
+ startup_cost += foreignrel->reltarget->cost.startup;
+ run_cost += foreignrel->reltarget->cost.per_tuple * rows;
+ }
}
else if (IS_JOIN_REL(foreignrel))
{
@@ -2897,6 +2934,7 @@ estimate_path_cost_size(PlannerInfo *root,
fpinfo->stage == UPPERREL_GROUP_AGG);
adjust_foreign_grouping_path_cost(root, pathkeys,
retrieved_rows, width,
+ fpextra->limit_tuples,
&startup_cost, &run_cost);
}
else
@@ -2907,6 +2945,14 @@ estimate_path_cost_size(PlannerInfo *root,
}
total_cost = startup_cost + run_cost;
+
+ /* Adjust the cost estimates if we have LIMIT */
+ if (fpextra && fpextra->has_limit)
+ {
+ adjust_limit_rows_costs(&rows, &startup_cost, &total_cost,
+ fpextra->offset_est, fpextra->count_est);
+ retrieved_rows = rows;
+ }
}
/*
@@ -2915,7 +2961,8 @@ estimate_path_cost_size(PlannerInfo *root,
* the foreignrel's reltarget (see make_sort_input_target()); adjust tlist
* eval costs.
*/
- if (fpextra && fpextra->target != foreignrel->reltarget)
+ if (fpextra && fpextra->has_final_sort &&
+ fpextra->target != foreignrel->reltarget)
{
QualCost oldcost = foreignrel->reltarget->cost;
QualCost newcost = fpextra->target->cost;
@@ -2931,10 +2978,10 @@ estimate_path_cost_size(PlannerInfo *root,
* steps, before adding the costs for transferring data from the foreign
* server. These costs are useful for costing remote joins involving this
* relation or costing other remote operations for this relation such as
- * remote sorts, when the costs can not be obtained from the foreign
- * server. This function will be called at least once for every foreign
- * relation without any parameterization, pathkeys, or additional
- * post-scan/join-processing steps.
+ * remote sorts and remote LIMIT restrictions, when the costs can not be
+ * obtained from the foreign server. This function will be called at
+ * least once for every foreign relation without any parameterization,
+ * pathkeys, or additional post-scan/join-processing steps.
*/
if (pathkeys == NIL && param_join_conds == NIL && fpextra == NULL)
{
@@ -2953,6 +3000,30 @@ estimate_path_cost_size(PlannerInfo *root,
total_cost += fpinfo->fdw_tuple_cost * retrieved_rows;
total_cost += cpu_tuple_cost * retrieved_rows;
+ /*
+ * If we have LIMIT, we should perfer performing the restriction remotely
+ * rather than locally, as the former avoids extra row fetches from the
+ * remote that the latter might cause. But since the core code doesn't
+ * account for such fetches when estimating the costs of the local
+ * restriction (see create_limit_path()), there would be no difference
+ * between the costs of the local restriction and the costs of the remote
+ * restriction estimated above if we don't use remote estimates (except
+ * for the case where the foreignrel is a grouping relation, the given
+ * pathkeys is not NIL, and the effects of a bounded sort for that rel is
+ * accounted for in costing the remote restriction). Tweak the costs of
+ * the remote restriction to ensure we'll prefer it if LIMIT is a useful
+ * one.
+ */
+ if (!fpinfo->use_remote_estimate &&
+ fpextra && fpextra->has_limit &&
+ fpextra->limit_tuples > 0 &&
+ fpextra->limit_tuples < fpinfo->rows)
+ {
+ Assert(fpinfo->rows > 0);
+ total_cost -= (total_cost - startup_cost) * 0.05 *
+ (fpinfo->rows - fpextra->limit_tuples) / fpinfo->rows;
+ }
+
/* Return results. */
*p_rows = rows;
*p_width = width;
@@ -3020,6 +3091,7 @@ adjust_foreign_grouping_path_cost(PlannerInfo *root,
List *pathkeys,
double retrieved_rows,
double width,
+ double limit_tuples,
Cost *p_startup_cost,
Cost *p_run_cost)
{
@@ -3044,7 +3116,7 @@ adjust_foreign_grouping_path_cost(PlannerInfo *root,
width,
0.0,
work_mem,
- -1.0);
+ limit_tuples);
*p_startup_cost = sort_path.startup_cost;
*p_run_cost = sort_path.total_cost - sort_path.startup_cost;
@@ -5582,7 +5654,8 @@ postgresGetForeignUpperPaths(PlannerInfo *root, UpperRelationKind stage,
/* Ignore stages we don't support; and skip any duplicate calls. */
if ((stage != UPPERREL_GROUP_AGG &&
- stage != UPPERREL_ORDERED) ||
+ stage != UPPERREL_ORDERED &&
+ stage != UPPERREL_FINAL) ||
output_rel->fdw_private)
return;
@@ -5600,6 +5673,10 @@ postgresGetForeignUpperPaths(PlannerInfo *root, UpperRelationKind stage,
case UPPERREL_ORDERED:
add_foreign_ordered_paths(root, input_rel, output_rel);
break;
+ case UPPERREL_FINAL:
+ add_foreign_final_paths(root, input_rel, output_rel,
+ (FinalPathExtraData *) extra);
+ break;
default:
elog(ERROR, "unexpected upper relation: %d", (int) stage);
break;
@@ -5809,7 +5886,7 @@ add_foreign_ordered_paths(PlannerInfo *root, RelOptInfo *input_rel,
* Build the fdw_private list that will be used by postgresGetForeignPlan.
* Items in the list must match order in enum FdwPathPrivateIndex.
*/
- fdw_private = list_make1(makeInteger(true));
+ fdw_private = list_make2(makeInteger(true), makeInteger(false));
/* Create foreign ordering path */
ordered_path = create_foreign_upper_path(root,
@@ -5827,6 +5904,241 @@ add_foreign_ordered_paths(PlannerInfo *root, RelOptInfo *input_rel,
}
/*
+ * add_foreign_final_paths
+ * Add foreign paths for performing the final processing remotely.
+ *
+ * Given input_rel contains the source-data Paths. The paths are added to the
+ * given final_rel.
+ */
+static void
+add_foreign_final_paths(PlannerInfo *root, RelOptInfo *input_rel,
+ RelOptInfo *final_rel,
+ FinalPathExtraData *extra)
+{
+ Query *parse = root->parse;
+ PgFdwRelationInfo *ifpinfo = (PgFdwRelationInfo *) input_rel->fdw_private;
+ PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) final_rel->fdw_private;
+ bool has_final_sort = false;
+ List *pathkeys = NIL;
+ PgFdwPathExtraData *fpextra;
+ bool save_use_remote_estimate = false;
+ double rows;
+ int width;
+ Cost startup_cost;
+ Cost total_cost;
+ List *fdw_private;
+ ForeignPath *final_path;
+
+ /*
+ * Currently, we only support this for SELECT commands
+ */
+ if (parse->commandType != CMD_SELECT)
+ return;
+
+ /*
+ * No work if there is no FOR UPDATE/SHARE clause and if there is no need
+ * to add a LIMIT node
+ */
+ if (!parse->rowMarks && !extra->limit_needed)
+ return;
+
+ /* We don't support cases where there are any SRFs in the targetlist */
+ if (parse->hasTargetSRFs)
+ return;
+
+ /* Save the input_rel as outerrel in fpinfo */
+ fpinfo->outerrel = input_rel;
+
+ /*
+ * Copy foreign table, foreign server, user mapping, FDW options etc.
+ * details from the input relation's fpinfo.
+ */
+ fpinfo->table = ifpinfo->table;
+ fpinfo->server = ifpinfo->server;
+ fpinfo->user = ifpinfo->user;
+ merge_fdw_options(fpinfo, ifpinfo, NULL);
+
+ /*
+ * If there is no need to add a LIMIT node, there might be a ForeignPath
+ * in the input_rel's pathlist that implements all behavior of the query.
+ * Note: we would already have accounted for the query's FOR UPDATE/SHARE
+ * (if any) before we get here.
+ */
+ if (!extra->limit_needed)
+ {
+ ListCell *lc;
+
+ Assert(parse->rowMarks);
+
+ /*
+ * Grouping and aggregation are not supported with FOR UPDATE/SHARE,
+ * so the input_rel should be a base, join, or ordered relation; and
+ * if it's an ordered relation, its input relation should be a base
+ * or join relation.
+ */
+ Assert(input_rel->reloptkind == RELOPT_BASEREL ||
+ input_rel->reloptkind == RELOPT_JOINREL ||
+ (input_rel->reloptkind == RELOPT_UPPER_REL &&
+ ifpinfo->stage == UPPERREL_ORDERED &&
+ (ifpinfo->outerrel->reloptkind == RELOPT_BASEREL ||
+ ifpinfo->outerrel->reloptkind == RELOPT_JOINREL)));
+
+ foreach(lc, input_rel->pathlist)
+ {
+ Path *path = (Path *) lfirst(lc);
+
+ /*
+ * apply_scanjoin_target_to_paths() uses create_projection_path()
+ * to adjust each of its input paths if needed, whereas
+ * create_ordered_paths() uses apply_projection_to_path() to do
+ * that. So the former might have put a ProjectionPath on top of
+ * the ForeignPath; look through ProjectionPath and see if the
+ * path underneath it is ForeignPath.
+ */
+ if (IsA(path, ForeignPath) ||
+ (IsA(path, ProjectionPath) &&
+ IsA(((ProjectionPath *) path)->subpath, ForeignPath)))
+ {
+ /*
+ * Create foreign final path; this gets rid of a
+ * no-longer-needed outer plan (if any), which makes the
+ * EXPLAIN output look cleaner
+ */
+ final_path = create_foreign_upper_path(root,
+ path->parent,
+ path->pathtarget,
+ path->rows,
+ path->startup_cost,
+ path->total_cost,
+ path->pathkeys,
+ NULL, /* no extra plan */
+ NULL); /* no fdw_private */
+
+ /* and add it to the final_rel */
+ add_path(final_rel, (Path *) final_path);
+
+ /* Safe to push down */
+ fpinfo->pushdown_safe = true;
+
+ return;
+ }
+ }
+
+ /*
+ * If we get here it means no ForeignPaths; since we would already
+ * have considered pushing down all operations for the query to the
+ * remote server, give up on it.
+ */
+ return;
+ }
+
+ Assert(extra->limit_needed);
+
+ /*
+ * If the input_rel is an ordered relation, replace the input_rel with its
+ * input relation
+ */
+ if (input_rel->reloptkind == RELOPT_UPPER_REL &&
+ ifpinfo->stage == UPPERREL_ORDERED)
+ {
+ input_rel = ifpinfo->outerrel;
+ ifpinfo = (PgFdwRelationInfo *) input_rel->fdw_private;
+ has_final_sort = true;
+ pathkeys = root->sort_pathkeys;
+ }
+
+ /* The input_rel should be a base, join, or grouping relation */
+ Assert(input_rel->reloptkind == RELOPT_BASEREL ||
+ input_rel->reloptkind == RELOPT_JOINREL ||
+ (input_rel->reloptkind == RELOPT_UPPER_REL &&
+ ifpinfo->stage == UPPERREL_GROUP_AGG));
+
+ /*
+ * We try to create a path below by extending a simple foreign path for
+ * the underlying base, join, or grouping relation to perform the final
+ * sort (if has_final_sort) and the LIMIT restriction remotely, which is
+ * stored into the fdw_private list of the resulting path. (We
+ * re-estimate the costs of sorting the underlying relation, if
+ * has_final_sort.)
+ */
+
+ /*
+ * Assess if it is safe to push down the LIMIT and OFFSET to the remote
+ * server
+ */
+
+ /*
+ * If the underlying relation has any local conditions, the LIMIT/OFFSET
+ * cannot be pushed down.
+ */
+ if (ifpinfo->local_conds)
+ return;
+
+ /*
+ * Also, the LIMIT/OFFSET cannot be pushed down, if their expressions are
+ * not safe to remote.
+ */
+ if (!is_foreign_expr(root, input_rel, (Expr *) parse->limitOffset) ||
+ !is_foreign_expr(root, input_rel, (Expr *) parse->limitCount))
+ return;
+
+ /* Safe to push down */
+ fpinfo->pushdown_safe = true;
+
+ /* Construct PgFdwPathExtraData */
+ fpextra = (PgFdwPathExtraData *) palloc0(sizeof(PgFdwPathExtraData));
+ fpextra->target = root->upper_targets[UPPERREL_FINAL];
+ fpextra->has_final_sort = has_final_sort;
+ fpextra->has_limit = extra->limit_needed;
+ fpextra->limit_tuples = extra->limit_tuples;
+ fpextra->count_est = extra->count_est;
+ fpextra->offset_est = extra->offset_est;
+
+ /*
+ * Estimate the costs of performing the final sort and the LIMIT
+ * restriction remotely. If has_final_sort is false, we wouldn't need to
+ * execute EXPLAIN anymore if use_remote_estimate, since the costs can be
+ * roughly estimated using the costs we already have for the underlying
+ * relation, in the same way as when use_remote_estimate is false. Since
+ * it's pretty expensive to execute EXPLAIN, force use_remote_estimate to
+ * false in that case.
+ */
+ if (!fpextra->has_final_sort)
+ {
+ save_use_remote_estimate = ifpinfo->use_remote_estimate;
+ ifpinfo->use_remote_estimate = false;
+ }
+ estimate_path_cost_size(root, input_rel, NIL, pathkeys, fpextra,
+ &rows, &width, &startup_cost, &total_cost);
+ if (!fpextra->has_final_sort)
+ ifpinfo->use_remote_estimate = save_use_remote_estimate;
+
+ /*
+ * Build the fdw_private list that will be used by postgresGetForeignPlan.
+ * Items in the list must match order in enum FdwPathPrivateIndex.
+ */
+ fdw_private = list_make2(makeInteger(has_final_sort),
+ makeInteger(extra->limit_needed));
+
+ /*
+ * Create foreign final path; this gets rid of a no-longer-needed outer
+ * plan (if any), which makes the EXPLAIN output look cleaner
+ */
+ final_path = create_foreign_upper_path(root,
+ input_rel,
+ root->upper_targets[UPPERREL_FINAL],
+ rows,
+ startup_cost,
+ total_cost,
+ pathkeys,
+ NULL, /* no extra plan */
+ fdw_private);
+
+ /* and add it to the final_rel */
+ add_path(final_rel, (Path *) final_path);
+}
+
+/*
* Create a tuple from the specified row of the PGresult.
*
* rel is the local representation of the foreign table, attinmeta is