aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor/nodeSubplan.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2003-01-12 04:03:34 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2003-01-12 04:03:34 +0000
commit19b886332a76f6b1141a7c1ca1d9eacaa8ef40d2 (patch)
tree78461354b35c7517a50320b4ec4a0b6c13fead63 /src/backend/executor/nodeSubplan.c
parent3e54e26bcf310ed619c893f4b69c8cf1591b53cc (diff)
downloadpostgresql-19b886332a76f6b1141a7c1ca1d9eacaa8ef40d2.tar.gz
postgresql-19b886332a76f6b1141a7c1ca1d9eacaa8ef40d2.zip
First cut at implementing IN (and NOT IN) via hashtables. There is
more to be done yet, but this is a good start.
Diffstat (limited to 'src/backend/executor/nodeSubplan.c')
-rw-r--r--src/backend/executor/nodeSubplan.c508
1 files changed, 498 insertions, 10 deletions
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 40eca6749ec..d3f32913914 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.42 2003/01/10 21:08:08 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.43 2003/01/12 04:03:34 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -22,11 +22,24 @@
#include "access/heapam.h"
#include "executor/executor.h"
#include "executor/nodeSubplan.h"
+#include "nodes/makefuncs.h"
+#include "parser/parse_expr.h"
#include "tcop/pquery.h"
+static Datum ExecHashSubPlan(SubPlanState *node,
+ ExprContext *econtext,
+ bool *isNull);
+static Datum ExecScanSubPlan(SubPlanState *node,
+ ExprContext *econtext,
+ bool *isNull);
+static void buildSubPlanHash(SubPlanState *node);
+static bool findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot);
+static bool tupleAllNulls(HeapTuple tuple);
+
+
/* ----------------------------------------------------------------
- * ExecSubPlan(node)
+ * ExecSubPlan
* ----------------------------------------------------------------
*/
Datum
@@ -35,6 +48,155 @@ ExecSubPlan(SubPlanState *node,
bool *isNull)
{
SubPlan *subplan = (SubPlan *) node->xprstate.expr;
+
+ if (subplan->setParam != NIL)
+ elog(ERROR, "ExecSubPlan: can't set parent params from subquery");
+
+ if (subplan->useHashTable)
+ return ExecHashSubPlan(node, econtext, isNull);
+ else
+ return ExecScanSubPlan(node, econtext, isNull);
+}
+
+/*
+ * ExecHashSubPlan: store subselect result in an in-memory hash table
+ */
+static Datum
+ExecHashSubPlan(SubPlanState *node,
+ ExprContext *econtext,
+ bool *isNull)
+{
+ SubPlan *subplan = (SubPlan *) node->xprstate.expr;
+ PlanState *planstate = node->planstate;
+ ExprContext *innerecontext = node->innerecontext;
+ TupleTableSlot *slot;
+ HeapTuple tup;
+
+ /* Shouldn't have any direct correlation Vars */
+ if (subplan->parParam != NIL || node->args != NIL)
+ elog(ERROR, "ExecHashSubPlan: direct correlation not supported");
+
+ /*
+ * If first time through or we need to rescan the subplan, build
+ * the hash table.
+ */
+ if (node->hashtable == NULL || planstate->chgParam != NIL)
+ buildSubPlanHash(node);
+
+ /*
+ * The result for an empty subplan is always FALSE; no need to
+ * evaluate lefthand side.
+ */
+ *isNull = false;
+ if (!node->havehashrows && !node->havenullrows)
+ return BoolGetDatum(false);
+
+ /*
+ * Evaluate lefthand expressions and form a projection tuple.
+ * First we have to set the econtext to use (hack alert!).
+ */
+ node->projLeft->pi_exprContext = econtext;
+ slot = ExecProject(node->projLeft, NULL);
+ tup = slot->val;
+
+ /*
+ * Note: because we are typically called in a per-tuple context,
+ * we have to explicitly clear the projected tuple before returning.
+ * Otherwise, we'll have a double-free situation: the per-tuple context
+ * will probably be reset before we're called again, and then the tuple
+ * slot will think it still needs to free the tuple.
+ */
+
+ /*
+ * Since the hashtable routines will use innerecontext's per-tuple
+ * memory as working memory, be sure to reset it for each tuple.
+ */
+ ResetExprContext(innerecontext);
+
+ /*
+ * If the LHS is all non-null, probe for an exact match in the
+ * main hash table. If we find one, the result is TRUE.
+ * Otherwise, scan the partly-null table to see if there are any
+ * rows that aren't provably unequal to the LHS; if so, the result
+ * is UNKNOWN. (We skip that part if we don't care about UNKNOWN.)
+ * Otherwise, the result is FALSE.
+ *
+ * Note: the reason we can avoid a full scan of the main hash table
+ * is that the combining operators are assumed never to yield NULL
+ * when both inputs are non-null. If they were to do so, we might
+ * need to produce UNKNOWN instead of FALSE because of an UNKNOWN
+ * result in comparing the LHS to some main-table entry --- which
+ * is a comparison we will not even make, unless there's a chance
+ * match of hash keys.
+ */
+ if (HeapTupleNoNulls(tup))
+ {
+ if (node->havehashrows &&
+ LookupTupleHashEntry(node->hashtable, slot, NULL) != NULL)
+ {
+ ExecClearTuple(slot);
+ return BoolGetDatum(true);
+ }
+ if (node->havenullrows &&
+ findPartialMatch(node->hashnulls, slot))
+ {
+ ExecClearTuple(slot);
+ *isNull = true;
+ return BoolGetDatum(false);
+ }
+ ExecClearTuple(slot);
+ return BoolGetDatum(false);
+ }
+
+ /*
+ * When the LHS is partly or wholly NULL, we can never return TRUE.
+ * If we don't care about UNKNOWN, just return FALSE. Otherwise,
+ * if the LHS is wholly NULL, immediately return UNKNOWN. (Since the
+ * combining operators are strict, the result could only be FALSE if the
+ * sub-select were empty, but we already handled that case.) Otherwise,
+ * we must scan both the main and partly-null tables to see if there are
+ * any rows that aren't provably unequal to the LHS; if so, the result is
+ * UNKNOWN. Otherwise, the result is FALSE.
+ */
+ if (node->hashnulls == NULL)
+ {
+ ExecClearTuple(slot);
+ return BoolGetDatum(false);
+ }
+ if (tupleAllNulls(tup))
+ {
+ ExecClearTuple(slot);
+ *isNull = true;
+ return BoolGetDatum(false);
+ }
+ /* Scan partly-null table first, since more likely to get a match */
+ if (node->havenullrows &&
+ findPartialMatch(node->hashnulls, slot))
+ {
+ ExecClearTuple(slot);
+ *isNull = true;
+ return BoolGetDatum(false);
+ }
+ if (node->havehashrows &&
+ findPartialMatch(node->hashtable, slot))
+ {
+ ExecClearTuple(slot);
+ *isNull = true;
+ return BoolGetDatum(false);
+ }
+ ExecClearTuple(slot);
+ return BoolGetDatum(false);
+}
+
+/*
+ * ExecScanSubPlan: default case where we have to rescan subplan each time
+ */
+static Datum
+ExecScanSubPlan(SubPlanState *node,
+ ExprContext *econtext,
+ bool *isNull)
+{
+ SubPlan *subplan = (SubPlan *) node->xprstate.expr;
PlanState *planstate = node->planstate;
SubLinkType subLinkType = subplan->subLinkType;
bool useOr = subplan->useOr;
@@ -52,9 +214,6 @@ ExecSubPlan(SubPlanState *node,
*/
oldcontext = MemoryContextSwitchTo(node->sub_estate->es_query_cxt);
- if (subplan->setParam != NIL)
- elog(ERROR, "ExecSubPlan: can't set parent params from subquery");
-
/*
* Set Params of this plan from parent plan correlation Vars
*/
@@ -267,6 +426,203 @@ ExecSubPlan(SubPlanState *node,
return result;
}
+/*
+ * buildSubPlanHash: load hash table by scanning subplan output.
+ */
+static void
+buildSubPlanHash(SubPlanState *node)
+{
+ SubPlan *subplan = (SubPlan *) node->xprstate.expr;
+ PlanState *planstate = node->planstate;
+ int ncols = length(node->exprs);
+ ExprContext *innerecontext = node->innerecontext;
+ MemoryContext tempcxt = innerecontext->ecxt_per_tuple_memory;
+ MemoryContext oldcontext;
+ int nbuckets;
+ TupleTableSlot *slot;
+
+ Assert(subplan->subLinkType == ANY_SUBLINK);
+ Assert(!subplan->useOr);
+
+ /*
+ * If we already had any hash tables, destroy 'em; then create
+ * empty hash table(s).
+ *
+ * If we need to distinguish accurately between FALSE and UNKNOWN
+ * (i.e., NULL) results of the IN operation, then we have to store
+ * subplan output rows that are partly or wholly NULL. We store such
+ * rows in a separate hash table that we expect will be much smaller
+ * than the main table. (We can use hashing to eliminate partly-null
+ * rows that are not distinct. We keep them separate to minimize the
+ * cost of the inevitable full-table searches; see findPartialMatch.)
+ *
+ * If it's not necessary to distinguish FALSE and UNKNOWN, then we
+ * don't need to store subplan output rows that contain NULL.
+ */
+ MemoryContextReset(node->tablecxt);
+ node->hashtable = NULL;
+ node->hashnulls = NULL;
+ node->havehashrows = false;
+ node->havenullrows = false;
+
+ nbuckets = (int) ceil(planstate->plan->plan_rows);
+ if (nbuckets < 1)
+ nbuckets = 1;
+
+ node->hashtable = BuildTupleHashTable(ncols,
+ node->keyColIdx,
+ node->eqfunctions,
+ nbuckets,
+ sizeof(TupleHashEntryData),
+ node->tablecxt,
+ tempcxt);
+
+ if (!subplan->unknownEqFalse)
+ {
+ if (ncols == 1)
+ nbuckets = 1; /* there can only be one entry */
+ else
+ {
+ nbuckets /= 16;
+ if (nbuckets < 1)
+ nbuckets = 1;
+ }
+ node->hashnulls = BuildTupleHashTable(ncols,
+ node->keyColIdx,
+ node->eqfunctions,
+ nbuckets,
+ sizeof(TupleHashEntryData),
+ node->tablecxt,
+ tempcxt);
+ }
+
+ /*
+ * We are probably in a short-lived expression-evaluation context.
+ * Switch to the child plan's per-query context for calling ExecProcNode.
+ */
+ oldcontext = MemoryContextSwitchTo(node->sub_estate->es_query_cxt);
+
+ /*
+ * Reset subplan to start.
+ */
+ ExecReScan(planstate, NULL);
+
+ /*
+ * Scan the subplan and load the hash table(s). Note that when there are
+ * duplicate rows coming out of the sub-select, only one copy is stored.
+ */
+ for (slot = ExecProcNode(planstate);
+ !TupIsNull(slot);
+ slot = ExecProcNode(planstate))
+ {
+ HeapTuple tup = slot->val;
+ TupleDesc tdesc = slot->ttc_tupleDescriptor;
+ int col = 1;
+ List *plst;
+ bool isnew;
+
+ /*
+ * Load up the Params representing the raw sub-select outputs,
+ * then form the projection tuple to store in the hashtable.
+ */
+ foreach(plst, subplan->paramIds)
+ {
+ int paramid = lfirsti(plst);
+ ParamExecData *prmdata;
+
+ prmdata = &(innerecontext->ecxt_param_exec_vals[paramid]);
+ Assert(prmdata->execPlan == NULL);
+ prmdata->value = heap_getattr(tup, col, tdesc,
+ &(prmdata->isnull));
+ col++;
+ }
+ slot = ExecProject(node->projRight, NULL);
+ tup = slot->val;
+
+ /*
+ * If result contains any nulls, store separately or not at all.
+ * (Since we know the projection tuple has no junk columns, we
+ * can just look at the overall hasnull info bit, instead of
+ * groveling through the columns.)
+ */
+ if (HeapTupleNoNulls(tup))
+ {
+ (void) LookupTupleHashEntry(node->hashtable, slot, &isnew);
+ node->havehashrows = true;
+ }
+ else if (node->hashnulls)
+ {
+ (void) LookupTupleHashEntry(node->hashnulls, slot, &isnew);
+ node->havenullrows = true;
+ }
+
+ /*
+ * Reset innerecontext after each inner tuple to free any memory
+ * used in hash computation or comparison routines.
+ */
+ ResetExprContext(innerecontext);
+ }
+
+ /*
+ * Since the projected tuples are in the sub-query's context and not
+ * the main context, we'd better clear the tuple slot before there's
+ * any chance of a reset of the sub-query's context. Else we will
+ * have the potential for a double free attempt.
+ */
+ ExecClearTuple(node->projRight->pi_slot);
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * findPartialMatch: does the hashtable contain an entry that is not
+ * provably distinct from the tuple?
+ *
+ * We have to scan the whole hashtable; we can't usefully use hashkeys
+ * to guide probing, since we might get partial matches on tuples with
+ * hashkeys quite unrelated to what we'd get from the given tuple.
+ */
+static bool
+findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
+{
+ int numCols = hashtable->numCols;
+ AttrNumber *keyColIdx = hashtable->keyColIdx;
+ HeapTuple tuple = slot->val;
+ TupleDesc tupdesc = slot->ttc_tupleDescriptor;
+ TupleHashIterator hashiter;
+ TupleHashEntry entry;
+
+ ResetTupleHashIterator(&hashiter);
+ while ((entry = ScanTupleHashTable(hashtable, &hashiter)) != NULL)
+ {
+ if (!execTuplesUnequal(entry->firstTuple,
+ tuple,
+ tupdesc,
+ numCols, keyColIdx,
+ hashtable->eqfunctions,
+ hashtable->tempcxt))
+ return true;
+ }
+ return false;
+}
+
+/*
+ * tupleAllNulls: is the tuple completely NULL?
+ */
+static bool
+tupleAllNulls(HeapTuple tuple)
+{
+ int ncols = tuple->t_data->t_natts;
+ int i;
+
+ for (i = 1; i <= ncols; i++)
+ {
+ if (!heap_attisnull(tuple, i))
+ return false;
+ }
+ return true;
+}
+
/* ----------------------------------------------------------------
* ExecInitSubPlan
* ----------------------------------------------------------------
@@ -289,8 +645,14 @@ ExecInitSubPlan(SubPlanState *node, EState *estate)
*/
node->needShutdown = false;
node->curTuple = NULL;
+ node->projLeft = NULL;
+ node->projRight = NULL;
node->hashtable = NULL;
node->hashnulls = NULL;
+ node->tablecxt = NULL;
+ node->innerecontext = NULL;
+ node->keyColIdx = NULL;
+ node->eqfunctions = NULL;
/*
* create an EState for the subplan
@@ -343,6 +705,137 @@ ExecInitSubPlan(SubPlanState *node, EState *estate)
* it, for others - it doesn't matter...
*/
}
+
+ /*
+ * If we are going to hash the subquery output, initialize relevant
+ * stuff. (We don't create the hashtable until needed, though.)
+ */
+ if (subplan->useHashTable)
+ {
+ int ncols,
+ i;
+ TupleDesc tupDesc;
+ TupleTable tupTable;
+ TupleTableSlot *slot;
+ List *lefttlist,
+ *righttlist,
+ *leftptlist,
+ *rightptlist,
+ *lexpr;
+
+ /* We need a memory context to hold the hash table(s) */
+ node->tablecxt =
+ AllocSetContextCreate(CurrentMemoryContext,
+ "Subplan HashTable Context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ /* and a short-lived exprcontext for function evaluation */
+ node->innerecontext = CreateExprContext(estate);
+ /* Silly little array of column numbers 1..n */
+ ncols = length(node->exprs);
+ node->keyColIdx = (AttrNumber *) palloc(ncols * sizeof(AttrNumber));
+ for (i = 0; i < ncols; i++)
+ node->keyColIdx[i] = i+1;
+ /*
+ * We use ExecProject to evaluate the lefthand and righthand
+ * expression lists and form tuples. (You might think that we
+ * could use the sub-select's output tuples directly, but that is
+ * not the case if we had to insert any run-time coercions of the
+ * sub-select's output datatypes; anyway this avoids storing any
+ * resjunk columns that might be in the sub-select's output.)
+ * Run through the combining expressions to build tlists for the
+ * lefthand and righthand sides. We need both the ExprState list
+ * (for ExecProject) and the underlying parse Exprs (for
+ * ExecTypeFromTL).
+ *
+ * We also extract the combining operators themselves to initialize
+ * the equality functions for the hash tables.
+ */
+ lefttlist = righttlist = NIL;
+ leftptlist = rightptlist = NIL;
+ node->eqfunctions = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+ i = 1;
+ foreach(lexpr, node->exprs)
+ {
+ FuncExprState *fstate = (FuncExprState *) lfirst(lexpr);
+ OpExpr *opexpr = (OpExpr *) fstate->xprstate.expr;
+ ExprState *exstate;
+ Expr *expr;
+ TargetEntry *tle;
+ GenericExprState *tlestate;
+
+ Assert(IsA(fstate, FuncExprState));
+ Assert(IsA(opexpr, OpExpr));
+ Assert(length(fstate->args) == 2);
+
+ /* Process lefthand argument */
+ exstate = (ExprState *) lfirst(fstate->args);
+ expr = exstate->expr;
+ tle = makeTargetEntry(makeResdom(i,
+ exprType((Node *) expr),
+ exprTypmod((Node *) expr),
+ NULL,
+ false),
+ expr);
+ tlestate = makeNode(GenericExprState);
+ tlestate->xprstate.expr = (Expr *) tle;
+ tlestate->arg = exstate;
+ lefttlist = lappend(lefttlist, tlestate);
+ leftptlist = lappend(leftptlist, tle);
+
+ /* Process righthand argument */
+ exstate = (ExprState *) lsecond(fstate->args);
+ expr = exstate->expr;
+ tle = makeTargetEntry(makeResdom(i,
+ exprType((Node *) expr),
+ exprTypmod((Node *) expr),
+ NULL,
+ false),
+ expr);
+ tlestate = makeNode(GenericExprState);
+ tlestate->xprstate.expr = (Expr *) tle;
+ tlestate->arg = exstate;
+ righttlist = lappend(righttlist, tlestate);
+ rightptlist = lappend(rightptlist, tle);
+
+ /* Lookup the combining function */
+ fmgr_info(opexpr->opfuncid, &node->eqfunctions[i-1]);
+
+ i++;
+ }
+
+ /*
+ * Create a tupletable to hold these tuples. (Note: we never bother
+ * to free the tupletable explicitly; that's okay because it will
+ * never store raw disk tuples that might have associated buffer
+ * pins. The only resource involved is memory, which will be
+ * cleaned up by freeing the query context.)
+ */
+ tupTable = ExecCreateTupleTable(2);
+
+ /*
+ * Construct tupdescs, slots and projection nodes for left and
+ * right sides. The lefthand expressions will be evaluated in
+ * the parent plan node's exprcontext, which we don't have access
+ * to here. Fortunately we can just pass NULL for now and fill it
+ * in later (hack alert!). The righthand expressions will be
+ * evaluated in our own innerecontext.
+ */
+ tupDesc = ExecTypeFromTL(leftptlist, false);
+ slot = ExecAllocTableSlot(tupTable);
+ ExecSetSlotDescriptor(slot, tupDesc, true);
+ node->projLeft = ExecBuildProjectionInfo(lefttlist,
+ NULL,
+ slot);
+
+ tupDesc = ExecTypeFromTL(rightptlist, false);
+ slot = ExecAllocTableSlot(tupTable);
+ ExecSetSlotDescriptor(slot, tupDesc, true);
+ node->projRight = ExecBuildProjectionInfo(righttlist,
+ node->innerecontext,
+ slot);
+ }
}
/* ----------------------------------------------------------------
@@ -476,11 +969,6 @@ ExecEndSubPlan(SubPlanState *node)
node->planstate = NULL;
node->needShutdown = false;
}
- if (node->curTuple)
- {
- heap_freetuple(node->curTuple);
- node->curTuple = NULL;
- }
}
void