aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/commands/explain.c52
-rw-r--r--src/backend/executor/nodeSetOp.c602
-rw-r--r--src/backend/nodes/copyfuncs.c4
-rw-r--r--src/backend/nodes/outfuncs.c4
-rw-r--r--src/backend/optimizer/plan/createplan.c20
-rw-r--r--src/backend/optimizer/prep/prepunion.c41
-rw-r--r--src/include/nodes/execnodes.h28
-rw-r--r--src/include/nodes/plannodes.h10
-rw-r--r--src/include/optimizer/planmain.h7
-rw-r--r--src/test/regress/expected/union.out14
-rw-r--r--src/test/regress/sql/union.sql14
11 files changed, 593 insertions, 203 deletions
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 0892cdbe3eb..1fd6f0cb331 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994-5, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.175 2008/05/14 19:10:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/explain.c,v 1.176 2008/08/07 03:04:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -558,19 +558,47 @@ explain_outNode(StringInfo str,
pname = "Unique";
break;
case T_SetOp:
- switch (((SetOp *) plan)->cmd)
+ switch (((SetOp *) plan)->strategy)
{
- case SETOPCMD_INTERSECT:
- pname = "SetOp Intersect";
+ case SETOP_SORTED:
+ switch (((SetOp *) plan)->cmd)
+ {
+ case SETOPCMD_INTERSECT:
+ pname = "SetOp Intersect";
+ break;
+ case SETOPCMD_INTERSECT_ALL:
+ pname = "SetOp Intersect All";
+ break;
+ case SETOPCMD_EXCEPT:
+ pname = "SetOp Except";
+ break;
+ case SETOPCMD_EXCEPT_ALL:
+ pname = "SetOp Except All";
+ break;
+ default:
+ pname = "SetOp ???";
+ break;
+ }
break;
- case SETOPCMD_INTERSECT_ALL:
- pname = "SetOp Intersect All";
- break;
- case SETOPCMD_EXCEPT:
- pname = "SetOp Except";
- break;
- case SETOPCMD_EXCEPT_ALL:
- pname = "SetOp Except All";
+ case SETOP_HASHED:
+ switch (((SetOp *) plan)->cmd)
+ {
+ case SETOPCMD_INTERSECT:
+ pname = "HashSetOp Intersect";
+ break;
+ case SETOPCMD_INTERSECT_ALL:
+ pname = "HashSetOp Intersect All";
+ break;
+ case SETOPCMD_EXCEPT:
+ pname = "HashSetOp Except";
+ break;
+ case SETOPCMD_EXCEPT_ALL:
+ pname = "HashSetOp Except All";
+ break;
+ default:
+ pname = "HashSetOp ???";
+ break;
+ }
break;
default:
pname = "SetOp ???";
diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c
index c60ceff1259..2421b0a6df4 100644
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -4,33 +4,38 @@
* Routines to handle INTERSECT and EXCEPT selection
*
* The input of a SetOp node consists of tuples from two relations,
- * which have been combined into one dataset and sorted on all the nonjunk
- * attributes. In addition there is a junk attribute that shows which
- * relation each tuple came from. The SetOp node scans each group of
- * identical tuples to determine how many came from each input relation.
- * Then it is a simple matter to emit the output demanded by the SQL spec
- * for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
+ * which have been combined into one dataset, with a junk attribute added
+ * that shows which relation each tuple came from. In SETOP_SORTED mode,
+ * the input has furthermore been sorted according to all the grouping
+ * columns (ie, all the non-junk attributes). The SetOp node scans each
+ * group of identical tuples to determine how many came from each input
+ * relation. Then it is a simple matter to emit the output demanded by the
+ * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
+ *
+ * In SETOP_HASHED mode, the input is delivered in no particular order.
+ * We build a hash table in memory with one entry for each group of
+ * identical tuples, and count the number of tuples in the group from
+ * each relation. After seeing all the input, we scan the hashtable and
+ * generate the correct output using those counts.
*
* This node type is not used for UNION or UNION ALL, since those can be
* implemented more cheaply (there's no need for the junk attribute to
* identify the source relation).
*
+ * Note that SetOp does no qual checking nor projection. The delivered
+ * output tuples are just copies of the first-to-arrive tuple in each
+ * input group.
+ *
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/nodeSetOp.c,v 1.25 2008/01/01 19:45:49 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/nodeSetOp.c,v 1.26 2008/08/07 03:04:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
-/*
- * INTERFACE ROUTINES
- * ExecSetOp - filter input to generate INTERSECT/EXCEPT results
- * ExecInitSetOp - initialize node and subnodes..
- * ExecEndSetOp - shutdown node and subnodes
- */
#include "postgres.h"
@@ -39,6 +44,160 @@
#include "utils/memutils.h"
+/*
+ * SetOpStatePerGroupData - per-group working state
+ *
+ * These values are working state that is initialized at the start of
+ * an input tuple group and updated for each input tuple.
+ *
+ * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
+ * the plan state node. In SETOP_HASHED mode, the hash table contains one
+ * of these for each tuple group.
+ */
+typedef struct SetOpStatePerGroupData
+{
+ long numLeft; /* number of left-input dups in group */
+ long numRight; /* number of right-input dups in group */
+} SetOpStatePerGroupData;
+
+/*
+ * To implement hashed mode, we need a hashtable that stores a
+ * representative tuple and the duplicate counts for each distinct set
+ * of grouping columns. We compute the hash key from the grouping columns.
+ */
+typedef struct SetOpHashEntryData *SetOpHashEntry;
+
+typedef struct SetOpHashEntryData
+{
+ TupleHashEntryData shared; /* common header for hash table entries */
+ SetOpStatePerGroupData pergroup;
+} SetOpHashEntryData;
+
+
+static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
+static void setop_fill_hash_table(SetOpState *setopstate);
+static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
+
+
+/*
+ * Initialize state for a new group of input values.
+ */
+static void
+initialize_counts(SetOpState *setopstate, SetOpStatePerGroup pergroup)
+{
+ pergroup->numLeft = pergroup->numRight = 0;
+}
+
+/*
+ * Advance the appropriate counter for one input tuple.
+ */
+static void
+advance_counts(SetOpState *setopstate, SetOpStatePerGroup pergroup,
+ TupleTableSlot *inputslot)
+{
+ SetOp *node = (SetOp *) setopstate->ps.plan;
+ int flag;
+ bool isNull;
+
+ flag = DatumGetInt32(slot_getattr(inputslot,
+ node->flagColIdx,
+ &isNull));
+ Assert(!isNull);
+ if (flag)
+ pergroup->numRight++;
+ else
+ pergroup->numLeft++;
+}
+
+/*
+ * Initialize the hash table to empty.
+ */
+static void
+build_hash_table(SetOpState *setopstate)
+{
+ SetOp *node = (SetOp *) setopstate->ps.plan;
+
+ Assert(node->strategy == SETOP_HASHED);
+ Assert(node->numGroups > 0);
+
+ setopstate->hashtable = BuildTupleHashTable(node->numCols,
+ node->dupColIdx,
+ setopstate->eqfunctions,
+ setopstate->hashfunctions,
+ node->numGroups,
+ sizeof(SetOpHashEntryData),
+ setopstate->tableContext,
+ setopstate->tempContext);
+}
+
+/*
+ * Find or create a hashtable entry for the tuple group containing the
+ * given tuple.
+ */
+static SetOpHashEntry
+lookup_hash_entry(SetOpState *setopstate, TupleTableSlot *inputslot)
+{
+ SetOpHashEntry entry;
+ bool isnew;
+
+ /* find or create the hashtable entry */
+ entry = (SetOpHashEntry) LookupTupleHashEntry(setopstate->hashtable,
+ inputslot,
+ &isnew);
+
+ if (isnew)
+ {
+ /* initialize counts for new tuple group */
+ initialize_counts(setopstate, &entry->pergroup);
+ }
+
+ /* Must reset temp context after each hashtable lookup */
+ MemoryContextReset(setopstate->tempContext);
+
+ return entry;
+}
+
+/*
+ * We've completed processing a tuple group. Decide how many copies (if any)
+ * of its representative row to emit, and store the count into numOutput.
+ * This logic is straight from the SQL92 specification.
+ */
+static void
+set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
+{
+ SetOp *plannode = (SetOp *) setopstate->ps.plan;
+
+ switch (plannode->cmd)
+ {
+ case SETOPCMD_INTERSECT:
+ if (pergroup->numLeft > 0 && pergroup->numRight > 0)
+ setopstate->numOutput = 1;
+ else
+ setopstate->numOutput = 0;
+ break;
+ case SETOPCMD_INTERSECT_ALL:
+ setopstate->numOutput =
+ (pergroup->numLeft < pergroup->numRight) ?
+ pergroup->numLeft : pergroup->numRight;
+ break;
+ case SETOPCMD_EXCEPT:
+ if (pergroup->numLeft > 0 && pergroup->numRight == 0)
+ setopstate->numOutput = 1;
+ else
+ setopstate->numOutput = 0;
+ break;
+ case SETOPCMD_EXCEPT_ALL:
+ setopstate->numOutput =
+ (pergroup->numLeft < pergroup->numRight) ?
+ 0 : (pergroup->numLeft - pergroup->numRight);
+ break;
+ default:
+ elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
+ break;
+ }
+}
+
+
/* ----------------------------------------------------------------
* ExecSetOp
* ----------------------------------------------------------------
@@ -47,14 +206,7 @@ TupleTableSlot * /* return: a tuple or NULL */
ExecSetOp(SetOpState *node)
{
SetOp *plannode = (SetOp *) node->ps.plan;
- TupleTableSlot *resultTupleSlot;
- PlanState *outerPlan;
-
- /*
- * get information from the node
- */
- outerPlan = outerPlanState(node);
- resultTupleSlot = node->ps.ps_ResultTupleSlot;
+ TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
/*
* If the previously-returned tuple needs to be returned more than once,
@@ -66,142 +218,218 @@ ExecSetOp(SetOpState *node)
return resultTupleSlot;
}
- /* Flag that we have no current tuple */
- ExecClearTuple(resultTupleSlot);
+ /* Otherwise, we're done if we are out of groups */
+ if (node->setop_done)
+ return NULL;
+
+ /* Fetch the next tuple group according to the correct strategy */
+ if (plannode->strategy == SETOP_HASHED)
+ {
+ if (!node->table_filled)
+ setop_fill_hash_table(node);
+ return setop_retrieve_hash_table(node);
+ }
+ else
+ return setop_retrieve_direct(node);
+}
+
+/*
+ * ExecSetOp for non-hashed case
+ */
+static TupleTableSlot *
+setop_retrieve_direct(SetOpState *setopstate)
+{
+ SetOp *node = (SetOp *) setopstate->ps.plan;
+ PlanState *outerPlan;
+ SetOpStatePerGroup pergroup;
+ TupleTableSlot *outerslot;
+ TupleTableSlot *resultTupleSlot;
/*
- * Absorb groups of duplicate tuples, counting them, and saving the first
- * of each group as a possible return value. At the end of each group,
- * decide whether to return anything.
- *
- * We assume that the tuples arrive in sorted order so we can detect
- * duplicates easily.
+ * get state info from node
*/
- for (;;)
- {
- TupleTableSlot *inputTupleSlot;
- bool endOfGroup;
+ outerPlan = outerPlanState(setopstate);
+ pergroup = setopstate->pergroup;
+ resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
+ /*
+ * We loop retrieving groups until we find one we should return
+ */
+ while (!setopstate->setop_done)
+ {
/*
- * fetch a tuple from the outer subplan, unless we already did.
+ * If we don't already have the first tuple of the new group, fetch it
+ * from the outer plan.
*/
- if (node->ps.ps_OuterTupleSlot == NULL &&
- !node->subplan_done)
- {
- node->ps.ps_OuterTupleSlot =
- ExecProcNode(outerPlan);
- if (TupIsNull(node->ps.ps_OuterTupleSlot))
- node->subplan_done = true;
- }
- inputTupleSlot = node->ps.ps_OuterTupleSlot;
-
- if (TupIsNull(resultTupleSlot))
+ if (setopstate->grp_firstTuple == NULL)
{
- /*
- * First of group: save a copy in result slot, and reset
- * duplicate-counters for new group.
- */
- if (node->subplan_done)
- return NULL; /* no more tuples */
- ExecCopySlot(resultTupleSlot, inputTupleSlot);
- node->numLeft = 0;
- node->numRight = 0;
- endOfGroup = false;
- }
- else if (node->subplan_done)
- {
- /*
- * Reached end of input, so finish processing final group
- */
- endOfGroup = true;
- }
- else
- {
- /*
- * Else test if the new tuple and the previously saved tuple
- * match.
- */
- if (execTuplesMatch(inputTupleSlot,
- resultTupleSlot,
- plannode->numCols, plannode->dupColIdx,
- node->eqfunctions,
- node->tempContext))
- endOfGroup = false;
+ outerslot = ExecProcNode(outerPlan);
+ if (!TupIsNull(outerslot))
+ {
+ /* Make a copy of the first input tuple */
+ setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
+ }
else
- endOfGroup = true;
+ {
+ /* outer plan produced no tuples at all */
+ setopstate->setop_done = true;
+ return NULL;
+ }
}
- if (endOfGroup)
+ /*
+ * Store the copied first input tuple in the tuple table slot
+ * reserved for it. The tuple will be deleted when it is cleared
+ * from the slot.
+ */
+ ExecStoreTuple(setopstate->grp_firstTuple,
+ resultTupleSlot,
+ InvalidBuffer,
+ true);
+ setopstate->grp_firstTuple = NULL; /* don't keep two pointers */
+
+ /* Initialize working state for a new input tuple group */
+ initialize_counts(setopstate, pergroup);
+
+ /* Count the first input tuple */
+ advance_counts(setopstate, pergroup, resultTupleSlot);
+
+ /*
+ * Scan the outer plan until we exhaust it or cross a group boundary.
+ */
+ for (;;)
{
+ outerslot = ExecProcNode(outerPlan);
+ if (TupIsNull(outerslot))
+ {
+ /* no more outer-plan tuples available */
+ setopstate->setop_done = true;
+ break;
+ }
+
/*
- * We've reached the end of the group containing resultTuple.
- * Decide how many copies (if any) to emit. This logic is
- * straight from the SQL92 specification.
+ * Check whether we've crossed a group boundary.
*/
- switch (plannode->cmd)
+ if (!execTuplesMatch(resultTupleSlot,
+ outerslot,
+ node->numCols, node->dupColIdx,
+ setopstate->eqfunctions,
+ setopstate->tempContext))
{
- case SETOPCMD_INTERSECT:
- if (node->numLeft > 0 && node->numRight > 0)
- node->numOutput = 1;
- else
- node->numOutput = 0;
- break;
- case SETOPCMD_INTERSECT_ALL:
- node->numOutput =
- (node->numLeft < node->numRight) ?
- node->numLeft : node->numRight;
- break;
- case SETOPCMD_EXCEPT:
- if (node->numLeft > 0 && node->numRight == 0)
- node->numOutput = 1;
- else
- node->numOutput = 0;
- break;
- case SETOPCMD_EXCEPT_ALL:
- node->numOutput =
- (node->numLeft < node->numRight) ?
- 0 : (node->numLeft - node->numRight);
- break;
- default:
- elog(ERROR, "unrecognized set op: %d",
- (int) plannode->cmd);
- break;
- }
- /* Fall out of for-loop if we have tuples to emit */
- if (node->numOutput > 0)
+ /*
+ * Save the first input tuple of the next group.
+ */
+ setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
break;
- /* Else flag that we have no current tuple, and loop around */
- ExecClearTuple(resultTupleSlot);
+ }
+
+ /* Still in same group, so count this tuple */
+ advance_counts(setopstate, pergroup, outerslot);
}
- else
+
+ /*
+ * Done scanning input tuple group. See if we should emit any
+ * copies of result tuple, and if so return the first copy.
+ */
+ set_output_count(setopstate, pergroup);
+
+ if (setopstate->numOutput > 0)
{
- /*
- * Current tuple is member of same group as resultTuple. Count it
- * in the appropriate counter.
- */
- int flag;
- bool isNull;
-
- flag = DatumGetInt32(slot_getattr(inputTupleSlot,
- plannode->flagColIdx,
- &isNull));
- Assert(!isNull);
- if (flag)
- node->numRight++;
- else
- node->numLeft++;
- /* Set flag to fetch a new input tuple, and loop around */
- node->ps.ps_OuterTupleSlot = NULL;
+ setopstate->numOutput--;
+ return resultTupleSlot;
}
}
+ /* No more groups */
+ ExecClearTuple(resultTupleSlot);
+ return NULL;
+}
+
+/*
+ * ExecSetOp for hashed case: phase 1, read input and build hash table
+ */
+static void
+setop_fill_hash_table(SetOpState *setopstate)
+{
+ PlanState *outerPlan;
+ SetOpHashEntry entry;
+ TupleTableSlot *outerslot;
+
+ /*
+ * get state info from node
+ */
+ outerPlan = outerPlanState(setopstate);
+
/*
- * If we fall out of loop, then we need to emit at least one copy of
- * resultTuple.
+ * Process each outer-plan tuple, and then fetch the next one, until we
+ * exhaust the outer plan.
*/
- Assert(node->numOutput > 0);
- node->numOutput--;
- return resultTupleSlot;
+ for (;;)
+ {
+ outerslot = ExecProcNode(outerPlan);
+ if (TupIsNull(outerslot))
+ break;
+
+ /* Find or build hashtable entry for this tuple's group */
+ entry = lookup_hash_entry(setopstate, outerslot);
+
+ /* Advance the counts */
+ advance_counts(setopstate, &entry->pergroup, outerslot);
+ }
+
+ setopstate->table_filled = true;
+ /* Initialize to walk the hash table */
+ ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
+}
+
+/*
+ * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
+ */
+static TupleTableSlot *
+setop_retrieve_hash_table(SetOpState *setopstate)
+{
+ SetOpHashEntry entry;
+ TupleTableSlot *resultTupleSlot;
+
+ /*
+ * get state info from node
+ */
+ resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
+
+ /*
+ * We loop retrieving groups until we find one we should return
+ */
+ while (!setopstate->setop_done)
+ {
+ /*
+ * Find the next entry in the hash table
+ */
+ entry = (SetOpHashEntry) ScanTupleHashTable(&setopstate->hashiter);
+ if (entry == NULL)
+ {
+ /* No more entries in hashtable, so done */
+ setopstate->setop_done = true;
+ return NULL;
+ }
+
+ /*
+ * See if we should emit any copies of this tuple, and if so return
+ * the first copy.
+ */
+ set_output_count(setopstate, &entry->pergroup);
+
+ if (setopstate->numOutput > 0)
+ {
+ setopstate->numOutput--;
+ return ExecStoreMinimalTuple(entry->shared.firstTuple,
+ resultTupleSlot,
+ false);
+ }
+ }
+
+ /* No more groups */
+ ExecClearTuple(resultTupleSlot);
+ return NULL;
}
/* ----------------------------------------------------------------
@@ -226,9 +454,14 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
setopstate->ps.plan = (Plan *) node;
setopstate->ps.state = estate;
- setopstate->ps.ps_OuterTupleSlot = NULL;
- setopstate->subplan_done = false;
+ setopstate->eqfunctions = NULL;
+ setopstate->hashfunctions = NULL;
+ setopstate->setop_done = false;
setopstate->numOutput = 0;
+ setopstate->pergroup = NULL;
+ setopstate->grp_firstTuple = NULL;
+ setopstate->hashtable = NULL;
+ setopstate->tableContext = NULL;
/*
* Miscellaneous initialization
@@ -244,6 +477,19 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
+ /*
+ * If hashing, we also need a longer-lived context to store the hash
+ * table. The table can't just be kept in the per-query context because
+ * we want to be able to throw it away in ExecReScanSetOp.
+ */
+ if (node->strategy == SETOP_HASHED)
+ setopstate->tableContext =
+ AllocSetContextCreate(CurrentMemoryContext,
+ "SetOp hash table",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+
#define SETOP_NSLOTS 1
/*
@@ -252,8 +498,13 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
ExecInitResultTupleSlot(estate, &setopstate->ps);
/*
- * then initialize outer plan
+ * initialize child nodes
+ *
+ * If we are hashing then the child plan does not need
+ * to handle REWIND efficiently; see ExecReScanSetOp.
*/
+ if (node->strategy == SETOP_HASHED)
+ eflags &= ~EXEC_FLAG_REWIND;
outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
/*
@@ -264,11 +515,30 @@ ExecInitSetOp(SetOp *node, EState *estate, int eflags)
setopstate->ps.ps_ProjInfo = NULL;
/*
- * Precompute fmgr lookup data for inner loop
+ * Precompute fmgr lookup data for inner loop. We need both equality and
+ * hashing functions to do it by hashing, but only equality if not
+ * hashing.
*/
- setopstate->eqfunctions =
- execTuplesMatchPrepare(node->numCols,
- node->dupOperators);
+ if (node->strategy == SETOP_HASHED)
+ execTuplesHashPrepare(node->numCols,
+ node->dupOperators,
+ &setopstate->eqfunctions,
+ &setopstate->hashfunctions);
+ else
+ setopstate->eqfunctions =
+ execTuplesMatchPrepare(node->numCols,
+ node->dupOperators);
+
+ if (node->strategy == SETOP_HASHED)
+ {
+ build_hash_table(setopstate);
+ setopstate->table_filled = false;
+ }
+ else
+ {
+ setopstate->pergroup =
+ (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
+ }
return setopstate;
}
@@ -293,9 +563,11 @@ ExecEndSetOp(SetOpState *node)
{
/* clean up tuple table */
ExecClearTuple(node->ps.ps_ResultTupleSlot);
- node->ps.ps_OuterTupleSlot = NULL;
+ /* free subsidiary stuff including hashtable */
MemoryContextDelete(node->tempContext);
+ if (node->tableContext)
+ MemoryContextDelete(node->tableContext);
ExecEndNode(outerPlanState(node));
}
@@ -305,10 +577,50 @@ void
ExecReScanSetOp(SetOpState *node, ExprContext *exprCtxt)
{
ExecClearTuple(node->ps.ps_ResultTupleSlot);
- node->ps.ps_OuterTupleSlot = NULL;
- node->subplan_done = false;
+ node->setop_done = false;
node->numOutput = 0;
+ if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
+ {
+ /*
+ * In the hashed case, if we haven't yet built the hash table then we
+ * can just return; nothing done yet, so nothing to undo. If subnode's
+ * chgParam is not NULL then it will be re-scanned by ExecProcNode,
+ * else no reason to re-scan it at all.
+ */
+ if (!node->table_filled)
+ return;
+
+ /*
+ * If we do have the hash table and the subplan does not have any
+ * parameter changes, then we can just rescan the existing hash table;
+ * no need to build it again.
+ */
+ if (((PlanState *) node)->lefttree->chgParam == NULL)
+ {
+ ResetTupleHashIterator(node->hashtable, &node->hashiter);
+ return;
+ }
+ }
+
+ /* Release first tuple of group, if we have made a copy */
+ if (node->grp_firstTuple != NULL)
+ {
+ heap_freetuple(node->grp_firstTuple);
+ node->grp_firstTuple = NULL;
+ }
+
+ /* Release any hashtable storage */
+ if (node->tableContext)
+ MemoryContextResetAndDeleteChildren(node->tableContext);
+
+ /* And rebuild empty hashtable if needed */
+ if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
+ {
+ build_hash_table(node);
+ node->table_filled = false;
+ }
+
/*
* if chgParam of subnode is not null then plan will be re-scanned by
* first ExecProcNode.
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index e562949d934..4767b924e9e 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.397 2008/08/07 01:11:46 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.398 2008/08/07 03:04:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -652,10 +652,12 @@ _copySetOp(SetOp *from)
* copy remainder of node
*/
COPY_SCALAR_FIELD(cmd);
+ COPY_SCALAR_FIELD(strategy);
COPY_SCALAR_FIELD(numCols);
COPY_POINTER_FIELD(dupColIdx, from->numCols * sizeof(AttrNumber));
COPY_POINTER_FIELD(dupOperators, from->numCols * sizeof(Oid));
COPY_SCALAR_FIELD(flagColIdx);
+ COPY_SCALAR_FIELD(numGroups);
return newnode;
}
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 26f8fb3810a..8440864b0e6 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.331 2008/08/07 01:11:48 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.332 2008/08/07 03:04:03 tgl Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
@@ -599,6 +599,7 @@ _outSetOp(StringInfo str, SetOp *node)
_outPlanInfo(str, (Plan *) node);
WRITE_ENUM_FIELD(cmd, SetOpCmd);
+ WRITE_ENUM_FIELD(strategy, SetOpStrategy);
WRITE_INT_FIELD(numCols);
appendStringInfo(str, " :dupColIdx");
@@ -610,6 +611,7 @@ _outSetOp(StringInfo str, SetOp *node)
appendStringInfo(str, " %u", node->dupOperators[i]);
WRITE_INT_FIELD(flagColIdx);
+ WRITE_LONG_FIELD(numGroups);
}
static void
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 80b6ed2edfe..4cf2a5208b1 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.242 2008/08/02 21:32:00 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.243 2008/08/07 03:04:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -3108,8 +3108,9 @@ make_unique(Plan *lefttree, List *distinctList)
* already be sorted accordingly.
*/
SetOp *
-make_setop(SetOpCmd cmd, Plan *lefttree,
- List *distinctList, AttrNumber flagColIdx)
+make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
+ List *distinctList, AttrNumber flagColIdx, long numGroups,
+ double outputRows)
{
SetOp *node = makeNode(SetOp);
Plan *plan = &node->plan;
@@ -3120,20 +3121,13 @@ make_setop(SetOpCmd cmd, Plan *lefttree,
ListCell *slitem;
copy_plan_costsize(plan, lefttree);
+ plan->plan_rows = outputRows;
/*
* Charge one cpu_operator_cost per comparison per input tuple. We assume
* all columns get compared at most of the tuples.
*/
- plan->total_cost += cpu_operator_cost * plan->plan_rows * numCols;
-
- /*
- * We make the unsupported assumption that there will be 10% as many
- * tuples out as in. Any way to do better?
- */
- plan->plan_rows *= 0.1;
- if (plan->plan_rows < 1)
- plan->plan_rows = 1;
+ plan->total_cost += cpu_operator_cost * lefttree->plan_rows * numCols;
plan->targetlist = lefttree->targetlist;
plan->qual = NIL;
@@ -3160,10 +3154,12 @@ make_setop(SetOpCmd cmd, Plan *lefttree,
}
node->cmd = cmd;
+ node->strategy = strategy;
node->numCols = numCols;
node->dupColIdx = dupColIdx;
node->dupOperators = dupOperators;
node->flagColIdx = flagColIdx;
+ node->numGroups = numGroups;
return node;
}
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 750d59a9515..3a155c0d0ad 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -22,7 +22,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.150 2008/08/07 01:11:50 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.151 2008/08/07 03:04:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -60,6 +60,7 @@ static Plan *generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
double tuple_fraction,
List *refnames_tlist, List **sortClauses);
static Plan *generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
+ double tuple_fraction,
List *refnames_tlist, List **sortClauses);
static List *recurse_union_children(Node *setOp, PlannerInfo *root,
double tuple_fraction,
@@ -229,7 +230,7 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
refnames_tlist,
sortClauses);
else
- plan = generate_nonunion_plan(op, root,
+ plan = generate_nonunion_plan(op, root, tuple_fraction,
refnames_tlist,
sortClauses);
@@ -341,6 +342,7 @@ generate_union_plan(SetOperationStmt *op, PlannerInfo *root,
*/
static Plan *
generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
+ double tuple_fraction,
List *refnames_tlist,
List **sortClauses)
{
@@ -351,6 +353,10 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
*groupList,
*planlist,
*child_sortclauses;
+ double dNumDistinctRows;
+ double dNumOutputRows;
+ long numDistinctRows;
+ bool use_hash;
SetOpCmd cmd;
/* Recurse on children, ensuring their outputs are marked */
@@ -394,9 +400,31 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
}
/*
+ * XXX for the moment, take the number of distinct groups as being the
+ * total input size, ie, the worst case. This is too conservative, but
+ * we don't want to risk having the hashtable overrun memory; also,
+ * it's not clear how to get a decent estimate of the true size.
+ */
+ dNumDistinctRows = plan->plan_rows;
+
+ /* Also convert to long int --- but 'ware overflow! */
+ numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);
+
+ /*
+ * The output size is taken as 10% of that, which is a completely bogus
+ * guess, but it's what we've used historically.
+ */
+ dNumOutputRows = ceil(dNumDistinctRows * 0.1);
+
+ /*
* Decide whether to hash or sort, and add a sort node if needed.
*/
- plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan);
+ use_hash = choose_hashed_setop(root, groupList, plan,
+ tuple_fraction, dNumDistinctRows,
+ (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
+
+ if (!use_hash)
+ plan = (Plan *) make_sort_from_sortclauses(root, groupList, plan);
/*
* Finally, add a SetOp plan node to generate the correct output.
@@ -414,9 +442,12 @@ generate_nonunion_plan(SetOperationStmt *op, PlannerInfo *root,
cmd = SETOPCMD_INTERSECT; /* keep compiler quiet */
break;
}
- plan = (Plan *) make_setop(cmd, plan, groupList, list_length(op->colTypes) + 1);
+ plan = (Plan *) make_setop(cmd, use_hash ? SETOP_HASHED : SETOP_SORTED,
+ plan, groupList, list_length(op->colTypes) + 1,
+ numDistinctRows, dNumOutputRows);
- *sortClauses = groupList;
+ /* Result is sorted only if we're not hashing */
+ *sortClauses = use_hash ? NIL : groupList;
return plan;
}
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 96b7a16084b..ce41c750afc 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.185 2008/07/26 19:15:35 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.186 2008/08/07 03:04:03 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1414,21 +1414,31 @@ typedef struct HashState
/* ----------------
* SetOpState information
*
- * SetOp nodes are used "on top of" sort nodes to discard
- * duplicate tuples returned from the sort phase. These are
- * more complex than a simple Unique since we have to count
- * how many duplicates to return.
+ * Even in "sorted" mode, SetOp nodes are more complex than a simple
+ * Unique, since we have to count how many duplicates to return. But
+ * we also support hashing, so this is really more like a cut-down
+ * form of Agg.
* ----------------
*/
+/* this struct is private in nodeSetOp.c: */
+typedef struct SetOpStatePerGroupData *SetOpStatePerGroup;
+
typedef struct SetOpState
{
PlanState ps; /* its first field is NodeTag */
- FmgrInfo *eqfunctions; /* per-field lookup data for equality fns */
- bool subplan_done; /* has subplan returned EOF? */
- long numLeft; /* number of left-input dups of cur group */
- long numRight; /* number of right-input dups of cur group */
+ FmgrInfo *eqfunctions; /* per-grouping-field equality fns */
+ FmgrInfo *hashfunctions; /* per-grouping-field hash fns */
+ bool setop_done; /* indicates completion of output scan */
long numOutput; /* number of dups left to output */
MemoryContext tempContext; /* short-term context for comparisons */
+ /* these fields are used in SETOP_SORTED mode: */
+ SetOpStatePerGroup pergroup; /* per-group working state */
+ HeapTuple grp_firstTuple; /* copy of first tuple of current group */
+ /* these fields are used in SETOP_HASHED mode: */
+ TupleHashTable hashtable; /* hash table with one entry per group */
+ MemoryContext tableContext; /* memory context containing hash table */
+ bool table_filled; /* hash table filled yet? */
+ TupleHashIterator hashiter; /* for iterating through hash table */
} SetOpState;
/* ----------------
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 6fb93e810da..e2eaadd29ff 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.100 2008/04/13 20:51:21 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/plannodes.h,v 1.101 2008/08/07 03:04:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -525,15 +525,23 @@ typedef enum SetOpCmd
SETOPCMD_EXCEPT_ALL
} SetOpCmd;
+typedef enum SetOpStrategy
+{
+ SETOP_SORTED, /* input must be sorted */
+ SETOP_HASHED /* use internal hashtable */
+} SetOpStrategy;
+
typedef struct SetOp
{
Plan plan;
SetOpCmd cmd; /* what to do */
+ SetOpStrategy strategy; /* how to do it */
int numCols; /* number of columns to check for
* duplicate-ness */
AttrNumber *dupColIdx; /* their indexes in the target list */
Oid *dupOperators; /* equality operators to compare with */
AttrNumber flagColIdx; /* where is the flag column, if any */
+ long numGroups; /* estimated number of groups in input */
} SetOp;
/* ----------------
diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h
index 0b46a173706..616f0ea2ad0 100644
--- a/src/include/optimizer/planmain.h
+++ b/src/include/optimizer/planmain.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.108 2008/05/02 21:26:10 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/planmain.h,v 1.109 2008/08/07 03:04:04 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -61,8 +61,9 @@ extern Plan *materialize_finished_plan(Plan *subplan);
extern Unique *make_unique(Plan *lefttree, List *distinctList);
extern Limit *make_limit(Plan *lefttree, Node *limitOffset, Node *limitCount,
int64 offset_est, int64 count_est);
-extern SetOp *make_setop(SetOpCmd cmd, Plan *lefttree,
- List *distinctList, AttrNumber flagColIdx);
+extern SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
+ List *distinctList, AttrNumber flagColIdx, long numGroups,
+ double outputRows);
extern Result *make_result(PlannerInfo *root, List *tlist,
Node *resconstantqual, Plan *subplan);
extern bool is_projection_capable_plan(Plan *plan);
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 722f9651be1..295dacea86d 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -275,21 +275,21 @@ SELECT q2 FROM int8_tbl INTERSECT ALL SELECT q1 FROM int8_tbl;
4567890123456789
(3 rows)
-SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl;
+SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1;
q2
-------------------
-4567890123456789
456
(2 rows)
-SELECT q2 FROM int8_tbl EXCEPT ALL SELECT q1 FROM int8_tbl;
+SELECT q2 FROM int8_tbl EXCEPT ALL SELECT q1 FROM int8_tbl ORDER BY 1;
q2
-------------------
-4567890123456789
456
(2 rows)
-SELECT q2 FROM int8_tbl EXCEPT ALL SELECT DISTINCT q1 FROM int8_tbl;
+SELECT q2 FROM int8_tbl EXCEPT ALL SELECT DISTINCT q1 FROM int8_tbl ORDER BY 1;
q2
-------------------
-4567890123456789
@@ -326,7 +326,7 @@ SELECT f1 FROM float8_tbl INTERSECT SELECT f1 FROM int4_tbl;
0
(1 row)
-SELECT f1 FROM float8_tbl EXCEPT SELECT f1 FROM int4_tbl;
+SELECT f1 FROM float8_tbl EXCEPT SELECT f1 FROM int4_tbl ORDER BY 1;
f1
-----------------------
-1.2345678901234e+200
@@ -369,14 +369,14 @@ SELECT q1 FROM int8_tbl INTERSECT (((SELECT q2 FROM int8_tbl UNION ALL SELECT q2
-4567890123456789
(7 rows)
-SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl;
+SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1;
q1
-------------------
-4567890123456789
456
(2 rows)
-SELECT q1 FROM int8_tbl UNION ALL (((SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl)));
+SELECT q1 FROM int8_tbl UNION ALL (((SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1)));
q1
-------------------
123
@@ -388,7 +388,7 @@ SELECT q1 FROM int8_tbl UNION ALL (((SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FR
456
(7 rows)
-(((SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl))) EXCEPT SELECT q1 FROM int8_tbl;
+(((SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl))) EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1;
q1
-------------------
-4567890123456789
diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql
index 0b83d6b185b..81e19d165c4 100644
--- a/src/test/regress/sql/union.sql
+++ b/src/test/regress/sql/union.sql
@@ -97,11 +97,11 @@ SELECT q2 FROM int8_tbl INTERSECT SELECT q1 FROM int8_tbl;
SELECT q2 FROM int8_tbl INTERSECT ALL SELECT q1 FROM int8_tbl;
-SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl;
+SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1;
-SELECT q2 FROM int8_tbl EXCEPT ALL SELECT q1 FROM int8_tbl;
+SELECT q2 FROM int8_tbl EXCEPT ALL SELECT q1 FROM int8_tbl ORDER BY 1;
-SELECT q2 FROM int8_tbl EXCEPT ALL SELECT DISTINCT q1 FROM int8_tbl;
+SELECT q2 FROM int8_tbl EXCEPT ALL SELECT DISTINCT q1 FROM int8_tbl ORDER BY 1;
SELECT q1 FROM int8_tbl EXCEPT SELECT q2 FROM int8_tbl;
@@ -115,7 +115,7 @@ SELECT q1 FROM int8_tbl EXCEPT ALL SELECT DISTINCT q2 FROM int8_tbl;
SELECT f1 FROM float8_tbl INTERSECT SELECT f1 FROM int4_tbl;
-SELECT f1 FROM float8_tbl EXCEPT SELECT f1 FROM int4_tbl;
+SELECT f1 FROM float8_tbl EXCEPT SELECT f1 FROM int4_tbl ORDER BY 1;
--
-- Operator precedence and (((((extra))))) parentheses
@@ -127,11 +127,11 @@ SELECT q1 FROM int8_tbl INTERSECT (((SELECT q2 FROM int8_tbl UNION ALL SELECT q2
(((SELECT q1 FROM int8_tbl INTERSECT SELECT q2 FROM int8_tbl))) UNION ALL SELECT q2 FROM int8_tbl;
-SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl;
+SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1;
-SELECT q1 FROM int8_tbl UNION ALL (((SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl)));
+SELECT q1 FROM int8_tbl UNION ALL (((SELECT q2 FROM int8_tbl EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1)));
-(((SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl))) EXCEPT SELECT q1 FROM int8_tbl;
+(((SELECT q1 FROM int8_tbl UNION ALL SELECT q2 FROM int8_tbl))) EXCEPT SELECT q1 FROM int8_tbl ORDER BY 1;
--
-- Subqueries with ORDER BY & LIMIT clauses