aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/executor/nodeSetOp.c135
-rw-r--r--src/backend/nodes/copyfuncs.c3
-rw-r--r--src/backend/nodes/outfuncs.c3
-rw-r--r--src/backend/optimizer/plan/createplan.c7
-rw-r--r--src/backend/optimizer/prep/prepunion.c169
-rw-r--r--src/backend/optimizer/util/tlist.c104
-rw-r--r--src/include/nodes/plannodes.h3
-rw-r--r--src/include/optimizer/planmain.h6
-rw-r--r--src/include/optimizer/tlist.h7
9 files changed, 278 insertions, 159 deletions
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
index 2421b0a6df4..f0ebe1fc0aa 100644
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -12,11 +12,16 @@
* relation. Then it is a simple matter to emit the output demanded by the
* SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
*
- * In SETOP_HASHED mode, the input is delivered in no particular order.
- * We build a hash table in memory with one entry for each group of
- * identical tuples, and count the number of tuples in the group from
- * each relation. After seeing all the input, we scan the hashtable and
- * generate the correct output using those counts.
+ * In SETOP_HASHED mode, the input is delivered in no particular order,
+ * except that we know all the tuples from one input relation will come before
+ * all the tuples of the other. The planner guarantees that the first input
+ * relation is the left-hand one for EXCEPT, and tries to make the smaller
+ * input relation come first for INTERSECT. We build a hash table in memory
+ * with one entry for each group of identical tuples, and count the number of
+ * tuples in the group from each relation. After seeing all the input, we
+ * scan the hashtable and generate the correct output using those counts.
+ * We can avoid making hashtable entries for any tuples appearing only in the
+ * second input relation, since they cannot result in any output.
*
* This node type is not used for UNION or UNION ALL, since those can be
* implemented more cheaply (there's no need for the junk attribute to
@@ -32,7 +37,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/nodeSetOp.c,v 1.26 2008/08/07 03:04:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/nodeSetOp.c,v 1.27 2008/08/07 19:35:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -82,8 +87,8 @@ static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
/*
* Initialize state for a new group of input values.
*/
-static void
-initialize_counts(SetOpState *setopstate, SetOpStatePerGroup pergroup)
+static inline void
+initialize_counts(SetOpStatePerGroup pergroup)
{
pergroup->numLeft = pergroup->numRight = 0;
}
@@ -91,9 +96,21 @@ initialize_counts(SetOpState *setopstate, SetOpStatePerGroup pergroup)
/*
* Advance the appropriate counter for one input tuple.
*/
-static void
-advance_counts(SetOpState *setopstate, SetOpStatePerGroup pergroup,
- TupleTableSlot *inputslot)
+static inline void
+advance_counts(SetOpStatePerGroup pergroup, int flag)
+{
+ if (flag)
+ pergroup->numRight++;
+ else
+ pergroup->numLeft++;
+}
+
+/*
+ * Fetch the "flag" column from an input tuple.
+ * This is an integer column with value 0 for left side, 1 for right side.
+ */
+static int
+fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
{
SetOp *node = (SetOp *) setopstate->ps.plan;
int flag;
@@ -103,10 +120,8 @@ advance_counts(SetOpState *setopstate, SetOpStatePerGroup pergroup,
node->flagColIdx,
&isNull));
Assert(!isNull);
- if (flag)
- pergroup->numRight++;
- else
- pergroup->numLeft++;
+ Assert(flag == 0 || flag == 1);
+ return flag;
}
/*
@@ -131,33 +146,6 @@ build_hash_table(SetOpState *setopstate)
}
/*
- * Find or create a hashtable entry for the tuple group containing the
- * given tuple.
- */
-static SetOpHashEntry
-lookup_hash_entry(SetOpState *setopstate, TupleTableSlot *inputslot)
-{
- SetOpHashEntry entry;
- bool isnew;
-
- /* find or create the hashtable entry */
- entry = (SetOpHashEntry) LookupTupleHashEntry(setopstate->hashtable,
- inputslot,
- &isnew);
-
- if (isnew)
- {
- /* initialize counts for new tuple group */
- initialize_counts(setopstate, &entry->pergroup);
- }
-
- /* Must reset temp context after each hashtable lookup */
- MemoryContextReset(setopstate->tempContext);
-
- return entry;
-}
-
-/*
* We've completed processing a tuple group. Decide how many copies (if any)
* of its representative row to emit, and store the count into numOutput.
* This logic is straight from the SQL92 specification.
@@ -289,10 +277,11 @@ setop_retrieve_direct(SetOpState *setopstate)
setopstate->grp_firstTuple = NULL; /* don't keep two pointers */
/* Initialize working state for a new input tuple group */
- initialize_counts(setopstate, pergroup);
+ initialize_counts(pergroup);
/* Count the first input tuple */
- advance_counts(setopstate, pergroup, resultTupleSlot);
+ advance_counts(pergroup,
+ fetch_tuple_flag(setopstate, resultTupleSlot));
/*
* Scan the outer plan until we exhaust it or cross a group boundary.
@@ -324,7 +313,8 @@ setop_retrieve_direct(SetOpState *setopstate)
}
/* Still in same group, so count this tuple */
- advance_counts(setopstate, pergroup, outerslot);
+ advance_counts(pergroup,
+ fetch_tuple_flag(setopstate, outerslot));
}
/*
@@ -351,30 +341,73 @@ setop_retrieve_direct(SetOpState *setopstate)
static void
setop_fill_hash_table(SetOpState *setopstate)
{
+ SetOp *node = (SetOp *) setopstate->ps.plan;
PlanState *outerPlan;
- SetOpHashEntry entry;
- TupleTableSlot *outerslot;
+ int firstFlag;
+ bool in_first_rel;
/*
* get state info from node
*/
outerPlan = outerPlanState(setopstate);
+ firstFlag = node->firstFlag;
+ /* verify planner didn't mess up */
+ Assert(firstFlag == 0 ||
+ (firstFlag == 1 &&
+ (node->cmd == SETOPCMD_INTERSECT ||
+ node->cmd == SETOPCMD_INTERSECT_ALL)));
/*
* Process each outer-plan tuple, and then fetch the next one, until we
* exhaust the outer plan.
*/
+ in_first_rel = true;
for (;;)
{
+ TupleTableSlot *outerslot;
+ int flag;
+ SetOpHashEntry entry;
+ bool isnew;
+
outerslot = ExecProcNode(outerPlan);
if (TupIsNull(outerslot))
break;
- /* Find or build hashtable entry for this tuple's group */
- entry = lookup_hash_entry(setopstate, outerslot);
+ /* Identify whether it's left or right input */
+ flag = fetch_tuple_flag(setopstate, outerslot);
+
+ if (flag == firstFlag)
+ {
+ /* (still) in first input relation */
+ Assert(in_first_rel);
+
+ /* Find or build hashtable entry for this tuple's group */
+ entry = (SetOpHashEntry)
+ LookupTupleHashEntry(setopstate->hashtable, outerslot, &isnew);
+
+ /* If new tuple group, initialize counts */
+ if (isnew)
+ initialize_counts(&entry->pergroup);
+
+ /* Advance the counts */
+ advance_counts(&entry->pergroup, flag);
+ }
+ else
+ {
+ /* reached second relation */
+ in_first_rel = false;
+
+ /* For tuples not seen previously, do not make hashtable entry */
+ entry = (SetOpHashEntry)
+ LookupTupleHashEntry(setopstate->hashtable, outerslot, NULL);
+
+ /* Advance the counts if entry is already present */
+ if (entry)
+ advance_counts(&entry->pergroup, flag);
+ }
- /* Advance the counts */
- advance_counts(setopstate, &entry->pergroup, outerslot);
+ /* Must reset temp context after each hashtable lookup */
+ MemoryContextReset(setopstate->tempContext);
}
setopstate->table_filled = true;
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 4767b924e9e..90ebd7819a7 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.398 2008/08/07 03:04:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.399 2008/08/07 19:35:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -657,6 +657,7 @@ _copySetOp(SetOp *from)
COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber));
COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid));
COPY_SCALAR_FIELD(flagColIdx);
+ COPY_SCALAR_FIELD(firstFlag);
COPY_SCALAR_FIELD(numGroups);
return newnode;
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 8440864b0e6..4728117be24 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.332 2008/08/07 03:04:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.333 2008/08/07 19:35:02 tgl Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
@@ -611,6 +611,7 @@ _outSetOp(StringInfo str, SetOp *node)
appendStringInfo(str, " %u", node->dupOperators[i]);
WRITE_INT_FIELD(flagColIdx);
+ WRITE_INT_FIELD(firstFlag);
WRITE_LONG_FIELD(numGroups);
}
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 4cf2a5208b1..0c200e05795 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.243 2008/08/07 03:04:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.244 2008/08/07 19:35:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -3109,8 +3109,8 @@ make_unique(Plan *lefttree, List *distinctList)
*/
SetOp *
make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
- List *distinctList, AttrNumber flagColIdx, long numGroups,
- double outputRows)
+ List *distinctList, AttrNumber flagColIdx, int firstFlag,
+ long numGroups, double outputRows)
{
SetOp *node = makeNode(SetOp);
Plan *plan = &node->plan;
@@ -3159,6 +3159,7 @@ make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
node->dupColIdx = dupColIdx;
node->dupOperators = dupOperators;
node->flagColIdx = flagColIdx;
+ node->firstFlag = firstFlag;
node->numGroups = numGroups;
return node;
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 3a155c0d0ad..dda41770c8d 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -22,7 +22,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.151 2008/08/07 03:04:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.152 2008/08/07 19:35:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -49,19 +49,22 @@
#include "parser/parsetree.h"
#include "utils/lsyscache.h"
#include "utils/rel.h"
+#include "utils/selfuncs.h"
static Plan *recurse_set_operations(Node *setOp, PlannerInfo *root,
double tuple_fraction,
List *colTypes, bool junkOK,
int flag, List *refnames_tlist,
- List **sortClauses);
+ List **sortClauses, double *pNumGroups);
static Plan *generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
double tuple_fraction,
- List *refnames_tlist, List **sortClauses);
+ List *refnames_tlist,
+ List **sortClauses, double *pNumGroups);
static Plan *generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
double tuple_fraction,
- List *refnames_tlist, List **sortClauses);
+ List *refnames_tlist,
+ List **sortClauses, double *pNumGroups);
static List *recurse_union_children(Node *setOp, PlannerInfo *root,
double tuple_fraction,
SetOperationStmt *top_union,
@@ -71,7 +74,8 @@ static Plan *make_union_unique(SetOperationStmt *op, Plan *plan,
List **sortClauses);
static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
Plan *input_plan,
- double tuple_fraction, double dNumDistinctRows,
+ double dNumGroups, double dNumOutputRows,
+ double tuple_fraction,
const char *construct);
static List *generate_setop_tlist(List *colTypes, int flag,
Index varno,
@@ -153,7 +157,7 @@ plan_set_operations(PlannerInfo *root, double tuple_fraction,
return recurse_set_operations((Node *) topop, root, tuple_fraction,
topop->colTypes, true, -1,
leftmostQuery->targetList,
- sortClauses);
+ sortClauses, NULL);
}
/*
@@ -165,7 +169,11 @@ plan_set_operations(PlannerInfo *root, double tuple_fraction,
* junkOK: if true, child resjunk columns may be left in the result
* flag: if >= 0, add a resjunk output column indicating value of flag
* refnames_tlist: targetlist to take column names from
+ *
+ * Returns a plan for the subtree, as well as these output parameters:
* *sortClauses: receives list of SortGroupClauses for result plan, if any
+ * *pNumGroups: if not NULL, we estimate the number of distinct groups
+ * in the result, and store it there
*
* We don't have to care about typmods here: the only allowed difference
* between set-op input and output typmods is input is a specific typmod
@@ -176,7 +184,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
double tuple_fraction,
List *colTypes, bool junkOK,
int flag, List *refnames_tlist,
- List **sortClauses)
+ List **sortClauses, double *pNumGroups)
{
if (IsA(setOp, RangeTblRef))
{
@@ -198,6 +206,22 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
&subroot);
/*
+ * Estimate number of groups if caller wants it. If the subquery
+ * used grouping or aggregation, its output is probably mostly
+ * unique anyway; otherwise do statistical estimation.
+ */
+ if (pNumGroups)
+ {
+ if (subquery->groupClause || subquery->distinctClause ||
+ subroot->hasHavingQual || subquery->hasAggs)
+ *pNumGroups = subplan->plan_rows;
+ else
+ *pNumGroups = estimate_num_groups(subroot,
+ get_tlist_exprs(subquery->targetList, false),
+ subplan->plan_rows);
+ }
+
+ /*
* Add a SubqueryScan with the caller-requested targetlist
*/
plan = (Plan *)
@@ -228,11 +252,11 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
if (op->op == SETOP_UNION)
plan = generate_union_plan(op, root, tuple_fraction,
refnames_tlist,
- sortClauses);
+ sortClauses, pNumGroups);
else
plan = generate_nonunion_plan(op, root, tuple_fraction,
refnames_tlist,
- sortClauses);
+ sortClauses, pNumGroups);
/*
* If necessary, add a Result node to project the caller-requested
@@ -277,7 +301,7 @@ static Plan *
generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
double tuple_fraction,
List *refnames_tlist,
- List **sortClauses)
+ List **sortClauses, double *pNumGroups)
{
List *planlist;
List *tlist;
@@ -334,6 +358,14 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
else
plan = make_union_unique(op, plan, root, tuple_fraction, sortClauses);
+ /*
+ * Estimate number of groups if caller wants it. For now we just
+ * assume the output is unique --- this is certainly true for the
+ * UNION case, and we want worst-case estimates anyway.
+ */
+ if (pNumGroups)
+ *pNumGroups = plan->plan_rows;
+
return plan;
}
@@ -344,7 +376,7 @@ static Plan *
generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
double tuple_fraction,
List *refnames_tlist,
- List **sortClauses)
+ List **sortClauses, double *pNumGroups)
{
Plan *lplan,
*rplan,
@@ -353,24 +385,43 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
*groupList,
*planlist,
*child_sortclauses;
- double dNumDistinctRows;
- double dNumOutputRows;
- long numDistinctRows;
+ double dLeftGroups,
+ dRightGroups,
+ dNumGroups,
+ dNumOutputRows;
+ long numGroups;
bool use_hash;
SetOpCmd cmd;
+ int firstFlag;
/* Recurse on children, ensuring their outputs are marked */
lplan = recurse_set_operations(op->larg, root,
0.0 /* all tuples needed */ ,
op->colTypes, false, 0,
refnames_tlist,
- &child_sortclauses);
+ &child_sortclauses, &dLeftGroups);
rplan = recurse_set_operations(op->rarg, root,
0.0 /* all tuples needed */ ,
op->colTypes, false, 1,
refnames_tlist,
- &child_sortclauses);
- planlist = list_make2(lplan, rplan);
+ &child_sortclauses, &dRightGroups);
+
+ /*
+ * For EXCEPT, we must put the left input first. For INTERSECT, either
+ * order should give the same results, and we prefer to put the smaller
+ * input first in order to minimize the size of the hash table in the
+ * hashing case. "Smaller" means the one with the fewer groups.
+ */
+ if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
+ {
+ planlist = list_make2(lplan, rplan);
+ firstFlag = 0;
+ }
+ else
+ {
+ planlist = list_make2(rplan, lplan);
+ firstFlag = 1;
+ }
/*
* Generate tlist for Append plan node.
@@ -400,27 +451,32 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
}
/*
- * XXX for the moment, take the number of distinct groups as being the
- * total input size, ie, the worst case. This is too conservative, but
- * we don't want to risk having the hashtable overrun memory; also,
- * it's not clear how to get a decent estimate of the true size.
+ * Estimate number of distinct groups that we'll need hashtable entries
+ * for; this is the size of the left-hand input for EXCEPT, or the smaller
+ * input for INTERSECT. Also estimate the number of eventual output rows.
+ * In non-ALL cases, we estimate each group produces one output row;
+ * in ALL cases use the relevant relation size. These are worst-case
+ * estimates, of course, but we need to be conservative.
*/
- dNumDistinctRows = plan->plan_rows;
+ if (op->op == SETOP_EXCEPT)
+ {
+ dNumGroups = dLeftGroups;
+ dNumOutputRows = op->all ? lplan->plan_rows : dNumGroups;
+ }
+ else
+ {
+ dNumGroups = Min(dLeftGroups, dRightGroups);
+ dNumOutputRows = op->all ? Min(lplan->plan_rows, rplan->plan_rows) : dNumGroups;
+ }
/* Also convert to long int --- but 'ware overflow! */
- numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);
-
- /*
- * The output size is taken as 10% of that, which is a completely bogus
- * guess, but it's what we've used historically.
- */
- dNumOutputRows = ceil(dNumDistinctRows * 0.1);
+ numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
/*
* Decide whether to hash or sort, and add a sort node if needed.
*/
use_hash = choose_hashed_setop(root, groupList, plan,
- tuple_fraction, dNumDistinctRows,
+ dNumGroups, dNumOutputRows, tuple_fraction,
(op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
if (!use_hash)
@@ -443,12 +499,17 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
break;
}
plan = (Plan *) make_setop(cmd, use_hash ? SETOP_HASHED : SETOP_SORTED,
- plan, groupList, list_length(op->colTypes) + 1,
- numDistinctRows, dNumOutputRows);
+ plan, groupList,
+ list_length(op->colTypes) + 1,
+ use_hash ? firstFlag : -1,
+ numGroups, dNumOutputRows);
/* Result is sorted only if we're not hashing */
*sortClauses = use_hash ? NIL : groupList;
+ if (pNumGroups)
+ *pNumGroups = dNumGroups;
+
return plan;
}
@@ -499,7 +560,7 @@ recurse_union_children(Node *setOp, PlannerInfo *root,
tuple_fraction,
top_union->colTypes, false,
-1, refnames_tlist,
- &child_sortclauses));
+ &child_sortclauses, NULL));
}
/*
@@ -511,8 +572,8 @@ make_union_unique(SetOperationStmt *op, Plan *plan,
List **sortClauses)
{
List *groupList;
- double dNumDistinctRows;
- long numDistinctRows;
+ double dNumGroups;
+ long numGroups;
/* Identify the grouping semantics */
groupList = generate_setop_grouplist(op, plan->targetlist);
@@ -525,21 +586,21 @@ make_union_unique(SetOperationStmt *op, Plan *plan,
}
/*
- * XXX for the moment, take the number of distinct groups as being the
- * total input size, ie, the worst case. This is too conservative, but
- * we don't want to risk having the hashtable overrun memory; also,
+ * XXX for the moment, take the number of distinct groups as equal to
+ * the total input size, ie, the worst case. This is too conservative,
+ * but we don't want to risk having the hashtable overrun memory; also,
* it's not clear how to get a decent estimate of the true size. One
* should note as well the propensity of novices to write UNION rather
* than UNION ALL even when they don't expect any duplicates...
*/
- dNumDistinctRows = plan->plan_rows;
+ dNumGroups = plan->plan_rows;
/* Also convert to long int --- but 'ware overflow! */
- numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);
+ numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
/* Decide whether to hash or sort */
if (choose_hashed_setop(root, groupList, plan,
- tuple_fraction, dNumDistinctRows,
+ dNumGroups, dNumGroups, tuple_fraction,
"UNION"))
{
/* Hashed aggregate plan --- no sort needed */
@@ -551,7 +612,7 @@ make_union_unique(SetOperationStmt *op, Plan *plan,
extract_grouping_cols(groupList,
plan->targetlist),
extract_grouping_ops(groupList),
- numDistinctRows,
+ numGroups,
0,
plan);
/* Hashed aggregation produces randomly-ordered results */
@@ -562,7 +623,7 @@ make_union_unique(SetOperationStmt *op, Plan *plan,
/* Sort and Unique */
plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan);
plan = (Plan *) make_unique(plan, groupList);
- plan->plan_rows = dNumDistinctRows;
+ plan->plan_rows = dNumGroups;
/* We know the sort order of the result */
*sortClauses = groupList;
}
@@ -576,14 +637,14 @@ make_union_unique(SetOperationStmt *op, Plan *plan,
static bool
choose_hashed_setop(PlannerInfo *root, List *groupClauses,
Plan *input_plan,
- double tuple_fraction, double dNumDistinctRows,
+ double dNumGroups, double dNumOutputRows,
+ double tuple_fraction,
const char *construct)
{
- int numDistinctCols = list_length(groupClauses);
+ int numGroupCols = list_length(groupClauses);
bool can_sort;
bool can_hash;
Size hashentrysize;
- List *needed_pathkeys;
Path hashed_p;
Path sorted_p;
@@ -615,7 +676,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
*/
hashentrysize = MAXALIGN(input_plan->plan_width) + MAXALIGN(sizeof(MinimalTupleData));
- if (hashentrysize * dNumDistinctRows > work_mem * 1024L)
+ if (hashentrysize * dNumGroups > work_mem * 1024L)
return false;
/*
@@ -630,7 +691,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
* make actual Paths for these steps.
*/
cost_agg(&hashed_p, root, AGG_HASHED, 0,
- numDistinctCols, dNumDistinctRows,
+ numGroupCols, dNumGroups,
input_plan->startup_cost, input_plan->total_cost,
input_plan->plan_rows);
@@ -640,14 +701,10 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
*/
sorted_p.startup_cost = input_plan->startup_cost;
sorted_p.total_cost = input_plan->total_cost;
- /* XXX this is more expensive than cost_sort really needs: */
- needed_pathkeys = make_pathkeys_for_sortclauses(root,
- groupClauses,
- input_plan->targetlist,
- true);
- cost_sort(&sorted_p, root, needed_pathkeys, sorted_p.total_cost,
+ /* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
+ cost_sort(&sorted_p, root, NIL, sorted_p.total_cost,
input_plan->plan_rows, input_plan->plan_width, -1.0);
- cost_group(&sorted_p, root, numDistinctCols, dNumDistinctRows,
+ cost_group(&sorted_p, root, numGroupCols, dNumGroups,
sorted_p.startup_cost, sorted_p.total_cost,
input_plan->plan_rows);
@@ -656,7 +713,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
* have to convert an absolute count (LIMIT) into fractional form.
*/
if (tuple_fraction >= 1.0)
- tuple_fraction /= dNumDistinctRows;
+ tuple_fraction /= dNumOutputRows;
if (compare_fractional_path_costs(&hashed_p, &sorted_p,
tuple_fraction) < 0)
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c
index a2c627fd4d5..b2fb112ebef 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.80 2008/08/07 01:11:50 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/tlist.c,v 1.81 2008/08/07 19:35:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -134,6 +134,69 @@ add_to_flat_tlist(List *tlist, List *vars)
/*
+ * get_tlist_exprs
+ * Get just the expression subtrees of a tlist
+ *
+ * Resjunk columns are ignored unless includeJunk is true
+ */
+List *
+get_tlist_exprs(List *tlist, bool includeJunk)
+{
+ List *result = NIL;
+ ListCell *l;
+
+ foreach(l, tlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(l);
+
+ if (tle->resjunk && !includeJunk)
+ continue;
+
+ result = lappend(result, tle->expr);
+ }
+ return result;
+}
+
+
+/*
+ * Does tlist have same output datatypes as listed in colTypes?
+ *
+ * Resjunk columns are ignored if junkOK is true; otherwise presence of
+ * a resjunk column will always cause a 'false' result.
+ *
+ * Note: currently no callers care about comparing typmods.
+ */
+bool
+tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK)
+{
+ ListCell *l;
+ ListCell *curColType = list_head(colTypes);
+
+ foreach(l, tlist)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(l);
+
+ if (tle->resjunk)
+ {
+ if (!junkOK)
+ return false;
+ }
+ else
+ {
+ if (curColType == NULL)
+ return false; /* tlist longer than colTypes */
+ if (exprType((Node *) tle->expr) != lfirst_oid(curColType))
+ return false;
+ curColType = lnext(curColType);
+ }
+ }
+ if (curColType != NULL)
+ return false; /* tlist shorter than colTypes */
+ return true;
+}
+
+
+/*
* get_sortgroupref_tle
* Find the targetlist entry matching the given SortGroupRef index,
* and return it.
@@ -303,42 +366,3 @@ grouping_is_hashable(List *groupClause)
}
return true;
}
-
-
-
-/*
- * Does tlist have same output datatypes as listed in colTypes?
- *
- * Resjunk columns are ignored if junkOK is true; otherwise presence of
- * a resjunk column will always cause a 'false' result.
- *
- * Note: currently no callers care about comparing typmods.
- */
-bool
-tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK)
-{
- ListCell *l;
- ListCell *curColType = list_head(colTypes);
-
- foreach(l, tlist)
- {
- TargetEntry *tle = (TargetEntry *) lfirst(l);
-
- if (tle->resjunk)
- {
- if (!junkOK)
- return false;
- }
- else
- {
- if (curColType == NULL)
- return false; /* tlist longer than colTypes */
- if (exprType((Node *) tle->expr) != lfirst_oid(curColType))
- return false;
- curColType = lnext(curColType);
- }
- }
- if (curColType != NULL)
- return false; /* tlist shorter than colTypes */
- return true;
-}
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index e2eaadd29ff..f0e0d08e03a 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.101 2008/08/07 03:04:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.102 2008/08/07 19:35:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -541,6 +541,7 @@ typedef struct SetOp
AttrNumber *dupColIdx; /* their indexes in the target list */
Oid *dupOperators; /* equality operators to compare with */
AttrNumber flagColIdx; /* where is the flag column, if any */
+ int firstFlag; /* flag value for first input relation */
long numGroups; /* estimated number of groups in input */
} SetOp;
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 616f0ea2ad0..0aa1d32f947 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.109 2008/08/07 03:04:04 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.110 2008/08/07 19:35:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -62,8 +62,8 @@ extern Unique *make_unique(Plan *lefttree, List *distinctList);
extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount,
int64 offset_est, int64 count_est);
extern SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
- List *distinctList, AttrNumber flagColIdx, long numGroups,
- double outputRows);
+ List *distinctList, AttrNumber flagColIdx, int firstFlag,
+ long numGroups, double outputRows);
extern Result *make_result(PlannerInfo *root, List *tlist,
Node *resconstantqual, Plan *subplan);
extern bool is_projection_capable_plan(Plan *plan);
diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h
index 9899b14c82f..d2c7f42e05f 100644
--- a/src/include/optimizer/tlist.h
+++ b/src/include/optimizer/tlist.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.51 2008/08/07 01:11:52 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/tlist.h,v 1.52 2008/08/07 19:35:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -23,6 +23,9 @@ extern TargetEntry *tlist_member_ignore_relabel(Node *node, List *targetlist);
extern List *flatten_tlist(List *tlist);
extern List *add_to_flat_tlist(List *tlist, List *vars);
+extern List *get_tlist_exprs(List *tlist, bool includeJunk);
+extern bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK);
+
extern TargetEntry *get_sortgroupref_tle(Index sortref,
List *targetList);
extern TargetEntry *get_sortgroupclause_tle(SortGroupClause *sgClause,
@@ -37,6 +40,4 @@ extern AttrNumber *extract_grouping_cols(List *groupClause, List *tlist);
extern bool grouping_is_sortable(List *groupClause);
extern bool grouping_is_hashable(List *groupClause);
-extern bool tlist_same_datatypes(List *tlist, List *colTypes, bool junkOK);
-
#endif /* TLIST_H */