diff options
Diffstat (limited to 'contrib/postgres_fdw/postgres_fdw.c')
-rw-r--r-- | contrib/postgres_fdw/postgres_fdw.c | 342 |
1 files changed, 327 insertions, 15 deletions
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 1b37332cda3..db62caf6d9f 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -251,11 +251,14 @@ typedef struct PgFdwAnalyzeState * We store: * * 1) Boolean flag showing if the remote query has the final sort + * 2) Boolean flag showing if the remote query has the LIMIT clause */ enum FdwPathPrivateIndex { /* has-final-sort flag (as an integer Value node) */ - FdwPathPrivateHasFinalSort + FdwPathPrivateHasFinalSort, + /* has-limit flag (as an integer Value node) */ + FdwPathPrivateHasLimit }; /* Struct for extra information passed to estimate_path_cost_size() */ @@ -263,6 +266,10 @@ typedef struct { PathTarget *target; bool has_final_sort; + bool has_limit; + double limit_tuples; + int64 count_est; + int64 offset_est; } PgFdwPathExtraData; /* @@ -400,6 +407,7 @@ static void adjust_foreign_grouping_path_cost(PlannerInfo *root, List *pathkeys, double retrieved_rows, double width, + double limit_tuples, Cost *p_startup_cost, Cost *p_run_cost); static bool ec_member_matches_foreign(PlannerInfo *root, RelOptInfo *rel, @@ -481,6 +489,10 @@ static void add_foreign_grouping_paths(PlannerInfo *root, static void add_foreign_ordered_paths(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *ordered_rel); +static void add_foreign_final_paths(PlannerInfo *root, + RelOptInfo *input_rel, + RelOptInfo *final_rel, + FinalPathExtraData *extra); static void apply_server_options(PgFdwRelationInfo *fpinfo); static void apply_table_options(PgFdwRelationInfo *fpinfo); static void merge_fdw_options(PgFdwRelationInfo *fpinfo, @@ -1183,14 +1195,19 @@ postgresGetForeignPlan(PlannerInfo *root, List *retrieved_attrs; StringInfoData sql; bool has_final_sort = false; + bool has_limit = false; ListCell *lc; /* * Get FDW private data created by postgresGetForeignUpperPaths(), if any. */ if (best_path->fdw_private) + { has_final_sort = intVal(list_nth(best_path->fdw_private, FdwPathPrivateHasFinalSort)); + has_limit = intVal(list_nth(best_path->fdw_private, + FdwPathPrivateHasLimit)); + } if (IS_SIMPLE_REL(foreignrel)) { @@ -1340,7 +1357,7 @@ postgresGetForeignPlan(PlannerInfo *root, initStringInfo(&sql); deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist, remote_exprs, best_path->path.pathkeys, - has_final_sort, false, + has_final_sort, has_limit, false, &retrieved_attrs, ¶ms_list); /* Remember remote_exprs for possible use by postgresPlanDirectModify */ @@ -2526,7 +2543,7 @@ postgresExplainDirectModify(ForeignScanState *node, ExplainState *es) * param_join_conds are the parameterization clauses with outer relations. * pathkeys specify the expected sort order if any for given path being costed. * fpextra specifies additional post-scan/join-processing steps such as the - * final sort. + * final sort and the LIMIT restriction. * * The function returns the cost and size estimates in p_row, p_width, * p_startup_cost and p_total_cost variables. @@ -2603,6 +2620,7 @@ estimate_path_cost_size(PlannerInfo *root, deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist, remote_conds, pathkeys, fpextra ? fpextra->has_final_sort : false, + fpextra ? fpextra->has_limit : false, false, &retrieved_attrs, NULL); /* Get the remote estimate */ @@ -2670,15 +2688,34 @@ estimate_path_cost_size(PlannerInfo *root, retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel); /* - * We will come here again and again with different set of pathkeys - * that caller wants to cost. We don't need to calculate the costs of - * the underlying scan, join, or grouping each time. Instead, use the - * costs if we have cached them already. + * We will come here again and again with different set of pathkeys or + * additional post-scan/join-processing steps that caller wants to + * cost. We don't need to calculate the costs of the underlying scan, + * join, or grouping each time. Instead, use the costs if we have + * cached them already. */ if (fpinfo->rel_startup_cost >= 0 && fpinfo->rel_total_cost >= 0) { startup_cost = fpinfo->rel_startup_cost; run_cost = fpinfo->rel_total_cost - fpinfo->rel_startup_cost; + + /* + * If we estimate the costs of a foreign scan or a foreign join + * with additional post-scan/join-processing steps, the scan or + * join costs obtained from the cache wouldn't yet contain the + * eval costs for the final scan/join target, which would've been + * updated by apply_scanjoin_target_to_paths(); add the eval costs + * now. + */ + if (fpextra && !IS_UPPER_REL(foreignrel)) + { + /* Shouldn't get here unless we have LIMIT */ + Assert(fpextra->has_limit); + Assert(foreignrel->reloptkind == RELOPT_BASEREL || + foreignrel->reloptkind == RELOPT_JOINREL); + startup_cost += foreignrel->reltarget->cost.startup; + run_cost += foreignrel->reltarget->cost.per_tuple * rows; + } } else if (IS_JOIN_REL(foreignrel)) { @@ -2897,6 +2934,7 @@ estimate_path_cost_size(PlannerInfo *root, fpinfo->stage == UPPERREL_GROUP_AGG); adjust_foreign_grouping_path_cost(root, pathkeys, retrieved_rows, width, + fpextra->limit_tuples, &startup_cost, &run_cost); } else @@ -2907,6 +2945,14 @@ estimate_path_cost_size(PlannerInfo *root, } total_cost = startup_cost + run_cost; + + /* Adjust the cost estimates if we have LIMIT */ + if (fpextra && fpextra->has_limit) + { + adjust_limit_rows_costs(&rows, &startup_cost, &total_cost, + fpextra->offset_est, fpextra->count_est); + retrieved_rows = rows; + } } /* @@ -2915,7 +2961,8 @@ estimate_path_cost_size(PlannerInfo *root, * the foreignrel's reltarget (see make_sort_input_target()); adjust tlist * eval costs. */ - if (fpextra && fpextra->target != foreignrel->reltarget) + if (fpextra && fpextra->has_final_sort && + fpextra->target != foreignrel->reltarget) { QualCost oldcost = foreignrel->reltarget->cost; QualCost newcost = fpextra->target->cost; @@ -2931,10 +2978,10 @@ estimate_path_cost_size(PlannerInfo *root, * steps, before adding the costs for transferring data from the foreign * server. These costs are useful for costing remote joins involving this * relation or costing other remote operations for this relation such as - * remote sorts, when the costs can not be obtained from the foreign - * server. This function will be called at least once for every foreign - * relation without any parameterization, pathkeys, or additional - * post-scan/join-processing steps. + * remote sorts and remote LIMIT restrictions, when the costs can not be + * obtained from the foreign server. This function will be called at + * least once for every foreign relation without any parameterization, + * pathkeys, or additional post-scan/join-processing steps. */ if (pathkeys == NIL && param_join_conds == NIL && fpextra == NULL) { @@ -2953,6 +3000,30 @@ estimate_path_cost_size(PlannerInfo *root, total_cost += fpinfo->fdw_tuple_cost * retrieved_rows; total_cost += cpu_tuple_cost * retrieved_rows; + /* + * If we have LIMIT, we should perfer performing the restriction remotely + * rather than locally, as the former avoids extra row fetches from the + * remote that the latter might cause. But since the core code doesn't + * account for such fetches when estimating the costs of the local + * restriction (see create_limit_path()), there would be no difference + * between the costs of the local restriction and the costs of the remote + * restriction estimated above if we don't use remote estimates (except + * for the case where the foreignrel is a grouping relation, the given + * pathkeys is not NIL, and the effects of a bounded sort for that rel is + * accounted for in costing the remote restriction). Tweak the costs of + * the remote restriction to ensure we'll prefer it if LIMIT is a useful + * one. + */ + if (!fpinfo->use_remote_estimate && + fpextra && fpextra->has_limit && + fpextra->limit_tuples > 0 && + fpextra->limit_tuples < fpinfo->rows) + { + Assert(fpinfo->rows > 0); + total_cost -= (total_cost - startup_cost) * 0.05 * + (fpinfo->rows - fpextra->limit_tuples) / fpinfo->rows; + } + /* Return results. */ *p_rows = rows; *p_width = width; @@ -3020,6 +3091,7 @@ adjust_foreign_grouping_path_cost(PlannerInfo *root, List *pathkeys, double retrieved_rows, double width, + double limit_tuples, Cost *p_startup_cost, Cost *p_run_cost) { @@ -3044,7 +3116,7 @@ adjust_foreign_grouping_path_cost(PlannerInfo *root, width, 0.0, work_mem, - -1.0); + limit_tuples); *p_startup_cost = sort_path.startup_cost; *p_run_cost = sort_path.total_cost - sort_path.startup_cost; @@ -5582,7 +5654,8 @@ postgresGetForeignUpperPaths(PlannerInfo *root, UpperRelationKind stage, /* Ignore stages we don't support; and skip any duplicate calls. */ if ((stage != UPPERREL_GROUP_AGG && - stage != UPPERREL_ORDERED) || + stage != UPPERREL_ORDERED && + stage != UPPERREL_FINAL) || output_rel->fdw_private) return; @@ -5600,6 +5673,10 @@ postgresGetForeignUpperPaths(PlannerInfo *root, UpperRelationKind stage, case UPPERREL_ORDERED: add_foreign_ordered_paths(root, input_rel, output_rel); break; + case UPPERREL_FINAL: + add_foreign_final_paths(root, input_rel, output_rel, + (FinalPathExtraData *) extra); + break; default: elog(ERROR, "unexpected upper relation: %d", (int) stage); break; @@ -5809,7 +5886,7 @@ add_foreign_ordered_paths(PlannerInfo *root, RelOptInfo *input_rel, * Build the fdw_private list that will be used by postgresGetForeignPlan. * Items in the list must match order in enum FdwPathPrivateIndex. */ - fdw_private = list_make1(makeInteger(true)); + fdw_private = list_make2(makeInteger(true), makeInteger(false)); /* Create foreign ordering path */ ordered_path = create_foreign_upper_path(root, @@ -5827,6 +5904,241 @@ add_foreign_ordered_paths(PlannerInfo *root, RelOptInfo *input_rel, } /* + * add_foreign_final_paths + * Add foreign paths for performing the final processing remotely. + * + * Given input_rel contains the source-data Paths. The paths are added to the + * given final_rel. + */ +static void +add_foreign_final_paths(PlannerInfo *root, RelOptInfo *input_rel, + RelOptInfo *final_rel, + FinalPathExtraData *extra) +{ + Query *parse = root->parse; + PgFdwRelationInfo *ifpinfo = (PgFdwRelationInfo *) input_rel->fdw_private; + PgFdwRelationInfo *fpinfo = (PgFdwRelationInfo *) final_rel->fdw_private; + bool has_final_sort = false; + List *pathkeys = NIL; + PgFdwPathExtraData *fpextra; + bool save_use_remote_estimate = false; + double rows; + int width; + Cost startup_cost; + Cost total_cost; + List *fdw_private; + ForeignPath *final_path; + + /* + * Currently, we only support this for SELECT commands + */ + if (parse->commandType != CMD_SELECT) + return; + + /* + * No work if there is no FOR UPDATE/SHARE clause and if there is no need + * to add a LIMIT node + */ + if (!parse->rowMarks && !extra->limit_needed) + return; + + /* We don't support cases where there are any SRFs in the targetlist */ + if (parse->hasTargetSRFs) + return; + + /* Save the input_rel as outerrel in fpinfo */ + fpinfo->outerrel = input_rel; + + /* + * Copy foreign table, foreign server, user mapping, FDW options etc. + * details from the input relation's fpinfo. + */ + fpinfo->table = ifpinfo->table; + fpinfo->server = ifpinfo->server; + fpinfo->user = ifpinfo->user; + merge_fdw_options(fpinfo, ifpinfo, NULL); + + /* + * If there is no need to add a LIMIT node, there might be a ForeignPath + * in the input_rel's pathlist that implements all behavior of the query. + * Note: we would already have accounted for the query's FOR UPDATE/SHARE + * (if any) before we get here. + */ + if (!extra->limit_needed) + { + ListCell *lc; + + Assert(parse->rowMarks); + + /* + * Grouping and aggregation are not supported with FOR UPDATE/SHARE, + * so the input_rel should be a base, join, or ordered relation; and + * if it's an ordered relation, its input relation should be a base + * or join relation. + */ + Assert(input_rel->reloptkind == RELOPT_BASEREL || + input_rel->reloptkind == RELOPT_JOINREL || + (input_rel->reloptkind == RELOPT_UPPER_REL && + ifpinfo->stage == UPPERREL_ORDERED && + (ifpinfo->outerrel->reloptkind == RELOPT_BASEREL || + ifpinfo->outerrel->reloptkind == RELOPT_JOINREL))); + + foreach(lc, input_rel->pathlist) + { + Path *path = (Path *) lfirst(lc); + + /* + * apply_scanjoin_target_to_paths() uses create_projection_path() + * to adjust each of its input paths if needed, whereas + * create_ordered_paths() uses apply_projection_to_path() to do + * that. So the former might have put a ProjectionPath on top of + * the ForeignPath; look through ProjectionPath and see if the + * path underneath it is ForeignPath. + */ + if (IsA(path, ForeignPath) || + (IsA(path, ProjectionPath) && + IsA(((ProjectionPath *) path)->subpath, ForeignPath))) + { + /* + * Create foreign final path; this gets rid of a + * no-longer-needed outer plan (if any), which makes the + * EXPLAIN output look cleaner + */ + final_path = create_foreign_upper_path(root, + path->parent, + path->pathtarget, + path->rows, + path->startup_cost, + path->total_cost, + path->pathkeys, + NULL, /* no extra plan */ + NULL); /* no fdw_private */ + + /* and add it to the final_rel */ + add_path(final_rel, (Path *) final_path); + + /* Safe to push down */ + fpinfo->pushdown_safe = true; + + return; + } + } + + /* + * If we get here it means no ForeignPaths; since we would already + * have considered pushing down all operations for the query to the + * remote server, give up on it. + */ + return; + } + + Assert(extra->limit_needed); + + /* + * If the input_rel is an ordered relation, replace the input_rel with its + * input relation + */ + if (input_rel->reloptkind == RELOPT_UPPER_REL && + ifpinfo->stage == UPPERREL_ORDERED) + { + input_rel = ifpinfo->outerrel; + ifpinfo = (PgFdwRelationInfo *) input_rel->fdw_private; + has_final_sort = true; + pathkeys = root->sort_pathkeys; + } + + /* The input_rel should be a base, join, or grouping relation */ + Assert(input_rel->reloptkind == RELOPT_BASEREL || + input_rel->reloptkind == RELOPT_JOINREL || + (input_rel->reloptkind == RELOPT_UPPER_REL && + ifpinfo->stage == UPPERREL_GROUP_AGG)); + + /* + * We try to create a path below by extending a simple foreign path for + * the underlying base, join, or grouping relation to perform the final + * sort (if has_final_sort) and the LIMIT restriction remotely, which is + * stored into the fdw_private list of the resulting path. (We + * re-estimate the costs of sorting the underlying relation, if + * has_final_sort.) + */ + + /* + * Assess if it is safe to push down the LIMIT and OFFSET to the remote + * server + */ + + /* + * If the underlying relation has any local conditions, the LIMIT/OFFSET + * cannot be pushed down. + */ + if (ifpinfo->local_conds) + return; + + /* + * Also, the LIMIT/OFFSET cannot be pushed down, if their expressions are + * not safe to remote. + */ + if (!is_foreign_expr(root, input_rel, (Expr *) parse->limitOffset) || + !is_foreign_expr(root, input_rel, (Expr *) parse->limitCount)) + return; + + /* Safe to push down */ + fpinfo->pushdown_safe = true; + + /* Construct PgFdwPathExtraData */ + fpextra = (PgFdwPathExtraData *) palloc0(sizeof(PgFdwPathExtraData)); + fpextra->target = root->upper_targets[UPPERREL_FINAL]; + fpextra->has_final_sort = has_final_sort; + fpextra->has_limit = extra->limit_needed; + fpextra->limit_tuples = extra->limit_tuples; + fpextra->count_est = extra->count_est; + fpextra->offset_est = extra->offset_est; + + /* + * Estimate the costs of performing the final sort and the LIMIT + * restriction remotely. If has_final_sort is false, we wouldn't need to + * execute EXPLAIN anymore if use_remote_estimate, since the costs can be + * roughly estimated using the costs we already have for the underlying + * relation, in the same way as when use_remote_estimate is false. Since + * it's pretty expensive to execute EXPLAIN, force use_remote_estimate to + * false in that case. + */ + if (!fpextra->has_final_sort) + { + save_use_remote_estimate = ifpinfo->use_remote_estimate; + ifpinfo->use_remote_estimate = false; + } + estimate_path_cost_size(root, input_rel, NIL, pathkeys, fpextra, + &rows, &width, &startup_cost, &total_cost); + if (!fpextra->has_final_sort) + ifpinfo->use_remote_estimate = save_use_remote_estimate; + + /* + * Build the fdw_private list that will be used by postgresGetForeignPlan. + * Items in the list must match order in enum FdwPathPrivateIndex. + */ + fdw_private = list_make2(makeInteger(has_final_sort), + makeInteger(extra->limit_needed)); + + /* + * Create foreign final path; this gets rid of a no-longer-needed outer + * plan (if any), which makes the EXPLAIN output look cleaner + */ + final_path = create_foreign_upper_path(root, + input_rel, + root->upper_targets[UPPERREL_FINAL], + rows, + startup_cost, + total_cost, + pathkeys, + NULL, /* no extra plan */ + fdw_private); + + /* and add it to the final_rel */ + add_path(final_rel, (Path *) final_path); +} + +/* * Create a tuple from the specified row of the PGresult. * * rel is the local representation of the foreign table, attinmeta is |