aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/optimizer/path/allpaths.c19
-rw-r--r--src/test/regress/expected/inherit.out30
-rw-r--r--src/test/regress/sql/inherit.sql21
3 files changed, 64 insertions, 6 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 1115ebeee29..b5bc9b602e2 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -958,6 +958,7 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
{
int parentRTindex = rti;
bool has_live_children;
+ double parent_tuples;
double parent_rows;
double parent_size;
double *parent_attrsizes;
@@ -983,6 +984,15 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
/*
* Initialize to compute size estimates for whole append relation.
*
+ * We handle tuples estimates by setting "tuples" to the total number of
+ * tuples accumulated from each live child, rather than using "rows".
+ * Although an appendrel itself doesn't directly enforce any quals, its
+ * child relations may. Therefore, setting "tuples" equal to "rows" for
+ * an appendrel isn't always appropriate, and can lead to inaccurate cost
+ * estimates. For example, when estimating the number of distinct values
+ * from an appendrel, we would be unable to adjust the estimate based on
+ * the restriction selectivity (see estimate_num_groups).
+ *
* We handle width estimates by weighting the widths of different child
* rels proportionally to their number of rows. This is sensible because
* the use of width estimates is mainly to compute the total relation
@@ -995,6 +1005,7 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
* have zero rows and/or width, if they were excluded by constraints.
*/
has_live_children = false;
+ parent_tuples = 0;
parent_rows = 0;
parent_size = 0;
nattrs = rel->max_attr - rel->min_attr + 1;
@@ -1161,6 +1172,7 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
*/
Assert(childrel->rows > 0);
+ parent_tuples += childrel->tuples;
parent_rows += childrel->rows;
parent_size += childrel->reltarget->width * childrel->rows;
@@ -1207,18 +1219,13 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
int i;
Assert(parent_rows > 0);
+ rel->tuples = parent_tuples;
rel->rows = parent_rows;
rel->reltarget->width = rint(parent_size / parent_rows);
for (i = 0; i < nattrs; i++)
rel->attr_widths[i] = rint(parent_attrsizes[i] / parent_rows);
/*
- * Set "raw tuples" count equal to "rows" for the appendrel; needed
- * because some places assume rel->tuples is valid for any baserel.
- */
- rel->tuples = parent_rows;
-
- /*
* Note that we leave rel->pages as zero; this is important to avoid
* double-counting the appendrel tree in total_table_pages.
*/
diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
index dbf3835cb14..420b6ae5996 100644
--- a/src/test/regress/expected/inherit.out
+++ b/src/test/regress/expected/inherit.out
@@ -3666,3 +3666,33 @@ UPDATE errtst_parent SET partid = 30, data = data + 10 WHERE partid = 20;
ERROR: no partition of relation "errtst_parent" found for row
DETAIL: Partition key of the failing row contains (partid) = (30).
DROP TABLE errtst_parent;
+-- Check that we have the correct tuples estimate for an appendrel
+create table tuplesest_parted (a int, b int, c float) partition by range(a);
+create table tuplesest_parted1 partition of tuplesest_parted for values from (0) to (100);
+create table tuplesest_parted2 partition of tuplesest_parted for values from (100) to (200);
+create table tuplesest_tab (a int, b int);
+insert into tuplesest_parted select i%200, i%300, i%400 from generate_series(1, 1000)i;
+insert into tuplesest_tab select i, i from generate_series(1, 100)i;
+analyze tuplesest_parted;
+analyze tuplesest_tab;
+explain (costs off)
+select * from tuplesest_tab join
+ (select b from tuplesest_parted where c < 100 group by b) sub
+ on tuplesest_tab.a = sub.b;
+ QUERY PLAN
+--------------------------------------------------------------------
+ Hash Join
+ Hash Cond: (tuplesest_parted.b = tuplesest_tab.a)
+ -> HashAggregate
+ Group Key: tuplesest_parted.b
+ -> Append
+ -> Seq Scan on tuplesest_parted1 tuplesest_parted_1
+ Filter: (c < '100'::double precision)
+ -> Seq Scan on tuplesest_parted2 tuplesest_parted_2
+ Filter: (c < '100'::double precision)
+ -> Hash
+ -> Seq Scan on tuplesest_tab
+(11 rows)
+
+drop table tuplesest_parted;
+drop table tuplesest_tab;
diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql
index 49aae426f3c..30fba16231c 100644
--- a/src/test/regress/sql/inherit.sql
+++ b/src/test/regress/sql/inherit.sql
@@ -1483,3 +1483,24 @@ UPDATE errtst_parent SET partid = 0, data = data + 10 WHERE partid = 20;
UPDATE errtst_parent SET partid = 30, data = data + 10 WHERE partid = 20;
DROP TABLE errtst_parent;
+
+-- Check that we have the correct tuples estimate for an appendrel
+create table tuplesest_parted (a int, b int, c float) partition by range(a);
+create table tuplesest_parted1 partition of tuplesest_parted for values from (0) to (100);
+create table tuplesest_parted2 partition of tuplesest_parted for values from (100) to (200);
+
+create table tuplesest_tab (a int, b int);
+
+insert into tuplesest_parted select i%200, i%300, i%400 from generate_series(1, 1000)i;
+insert into tuplesest_tab select i, i from generate_series(1, 100)i;
+
+analyze tuplesest_parted;
+analyze tuplesest_tab;
+
+explain (costs off)
+select * from tuplesest_tab join
+ (select b from tuplesest_parted where c < 100 group by b) sub
+ on tuplesest_tab.a = sub.b;
+
+drop table tuplesest_parted;
+drop table tuplesest_tab;