diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2003-01-12 04:03:34 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2003-01-12 04:03:34 +0000 |
commit | 19b886332a76f6b1141a7c1ca1d9eacaa8ef40d2 (patch) | |
tree | 78461354b35c7517a50320b4ec4a0b6c13fead63 /src/backend/executor/nodeSubplan.c | |
parent | 3e54e26bcf310ed619c893f4b69c8cf1591b53cc (diff) | |
download | postgresql-19b886332a76f6b1141a7c1ca1d9eacaa8ef40d2.tar.gz postgresql-19b886332a76f6b1141a7c1ca1d9eacaa8ef40d2.zip |
First cut at implementing IN (and NOT IN) via hashtables. There is
more to be done yet, but this is a good start.
Diffstat (limited to 'src/backend/executor/nodeSubplan.c')
-rw-r--r-- | src/backend/executor/nodeSubplan.c | 508 |
1 files changed, 498 insertions, 10 deletions
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index 40eca6749ec..d3f32913914 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.42 2003/01/10 21:08:08 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.43 2003/01/12 04:03:34 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -22,11 +22,24 @@ #include "access/heapam.h" #include "executor/executor.h" #include "executor/nodeSubplan.h" +#include "nodes/makefuncs.h" +#include "parser/parse_expr.h" #include "tcop/pquery.h" +static Datum ExecHashSubPlan(SubPlanState *node, + ExprContext *econtext, + bool *isNull); +static Datum ExecScanSubPlan(SubPlanState *node, + ExprContext *econtext, + bool *isNull); +static void buildSubPlanHash(SubPlanState *node); +static bool findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot); +static bool tupleAllNulls(HeapTuple tuple); + + /* ---------------------------------------------------------------- - * ExecSubPlan(node) + * ExecSubPlan * ---------------------------------------------------------------- */ Datum @@ -35,6 +48,155 @@ ExecSubPlan(SubPlanState *node, bool *isNull) { SubPlan *subplan = (SubPlan *) node->xprstate.expr; + + if (subplan->setParam != NIL) + elog(ERROR, "ExecSubPlan: can't set parent params from subquery"); + + if (subplan->useHashTable) + return ExecHashSubPlan(node, econtext, isNull); + else + return ExecScanSubPlan(node, econtext, isNull); +} + +/* + * ExecHashSubPlan: store subselect result in an in-memory hash table + */ +static Datum +ExecHashSubPlan(SubPlanState *node, + ExprContext *econtext, + bool *isNull) +{ + SubPlan *subplan = (SubPlan *) node->xprstate.expr; + PlanState *planstate = node->planstate; + ExprContext *innerecontext = node->innerecontext; + TupleTableSlot *slot; + HeapTuple tup; + + /* Shouldn't have any direct correlation Vars */ + if (subplan->parParam != NIL || node->args != NIL) + elog(ERROR, "ExecHashSubPlan: direct correlation not supported"); + + /* + * If first time through or we need to rescan the subplan, build + * the hash table. + */ + if (node->hashtable == NULL || planstate->chgParam != NIL) + buildSubPlanHash(node); + + /* + * The result for an empty subplan is always FALSE; no need to + * evaluate lefthand side. + */ + *isNull = false; + if (!node->havehashrows && !node->havenullrows) + return BoolGetDatum(false); + + /* + * Evaluate lefthand expressions and form a projection tuple. + * First we have to set the econtext to use (hack alert!). + */ + node->projLeft->pi_exprContext = econtext; + slot = ExecProject(node->projLeft, NULL); + tup = slot->val; + + /* + * Note: because we are typically called in a per-tuple context, + * we have to explicitly clear the projected tuple before returning. + * Otherwise, we'll have a double-free situation: the per-tuple context + * will probably be reset before we're called again, and then the tuple + * slot will think it still needs to free the tuple. + */ + + /* + * Since the hashtable routines will use innerecontext's per-tuple + * memory as working memory, be sure to reset it for each tuple. + */ + ResetExprContext(innerecontext); + + /* + * If the LHS is all non-null, probe for an exact match in the + * main hash table. If we find one, the result is TRUE. + * Otherwise, scan the partly-null table to see if there are any + * rows that aren't provably unequal to the LHS; if so, the result + * is UNKNOWN. (We skip that part if we don't care about UNKNOWN.) + * Otherwise, the result is FALSE. + * + * Note: the reason we can avoid a full scan of the main hash table + * is that the combining operators are assumed never to yield NULL + * when both inputs are non-null. If they were to do so, we might + * need to produce UNKNOWN instead of FALSE because of an UNKNOWN + * result in comparing the LHS to some main-table entry --- which + * is a comparison we will not even make, unless there's a chance + * match of hash keys. + */ + if (HeapTupleNoNulls(tup)) + { + if (node->havehashrows && + LookupTupleHashEntry(node->hashtable, slot, NULL) != NULL) + { + ExecClearTuple(slot); + return BoolGetDatum(true); + } + if (node->havenullrows && + findPartialMatch(node->hashnulls, slot)) + { + ExecClearTuple(slot); + *isNull = true; + return BoolGetDatum(false); + } + ExecClearTuple(slot); + return BoolGetDatum(false); + } + + /* + * When the LHS is partly or wholly NULL, we can never return TRUE. + * If we don't care about UNKNOWN, just return FALSE. Otherwise, + * if the LHS is wholly NULL, immediately return UNKNOWN. (Since the + * combining operators are strict, the result could only be FALSE if the + * sub-select were empty, but we already handled that case.) Otherwise, + * we must scan both the main and partly-null tables to see if there are + * any rows that aren't provably unequal to the LHS; if so, the result is + * UNKNOWN. Otherwise, the result is FALSE. + */ + if (node->hashnulls == NULL) + { + ExecClearTuple(slot); + return BoolGetDatum(false); + } + if (tupleAllNulls(tup)) + { + ExecClearTuple(slot); + *isNull = true; + return BoolGetDatum(false); + } + /* Scan partly-null table first, since more likely to get a match */ + if (node->havenullrows && + findPartialMatch(node->hashnulls, slot)) + { + ExecClearTuple(slot); + *isNull = true; + return BoolGetDatum(false); + } + if (node->havehashrows && + findPartialMatch(node->hashtable, slot)) + { + ExecClearTuple(slot); + *isNull = true; + return BoolGetDatum(false); + } + ExecClearTuple(slot); + return BoolGetDatum(false); +} + +/* + * ExecScanSubPlan: default case where we have to rescan subplan each time + */ +static Datum +ExecScanSubPlan(SubPlanState *node, + ExprContext *econtext, + bool *isNull) +{ + SubPlan *subplan = (SubPlan *) node->xprstate.expr; PlanState *planstate = node->planstate; SubLinkType subLinkType = subplan->subLinkType; bool useOr = subplan->useOr; @@ -52,9 +214,6 @@ ExecSubPlan(SubPlanState *node, */ oldcontext = MemoryContextSwitchTo(node->sub_estate->es_query_cxt); - if (subplan->setParam != NIL) - elog(ERROR, "ExecSubPlan: can't set parent params from subquery"); - /* * Set Params of this plan from parent plan correlation Vars */ @@ -267,6 +426,203 @@ ExecSubPlan(SubPlanState *node, return result; } +/* + * buildSubPlanHash: load hash table by scanning subplan output. + */ +static void +buildSubPlanHash(SubPlanState *node) +{ + SubPlan *subplan = (SubPlan *) node->xprstate.expr; + PlanState *planstate = node->planstate; + int ncols = length(node->exprs); + ExprContext *innerecontext = node->innerecontext; + MemoryContext tempcxt = innerecontext->ecxt_per_tuple_memory; + MemoryContext oldcontext; + int nbuckets; + TupleTableSlot *slot; + + Assert(subplan->subLinkType == ANY_SUBLINK); + Assert(!subplan->useOr); + + /* + * If we already had any hash tables, destroy 'em; then create + * empty hash table(s). + * + * If we need to distinguish accurately between FALSE and UNKNOWN + * (i.e., NULL) results of the IN operation, then we have to store + * subplan output rows that are partly or wholly NULL. We store such + * rows in a separate hash table that we expect will be much smaller + * than the main table. (We can use hashing to eliminate partly-null + * rows that are not distinct. We keep them separate to minimize the + * cost of the inevitable full-table searches; see findPartialMatch.) + * + * If it's not necessary to distinguish FALSE and UNKNOWN, then we + * don't need to store subplan output rows that contain NULL. + */ + MemoryContextReset(node->tablecxt); + node->hashtable = NULL; + node->hashnulls = NULL; + node->havehashrows = false; + node->havenullrows = false; + + nbuckets = (int) ceil(planstate->plan->plan_rows); + if (nbuckets < 1) + nbuckets = 1; + + node->hashtable = BuildTupleHashTable(ncols, + node->keyColIdx, + node->eqfunctions, + nbuckets, + sizeof(TupleHashEntryData), + node->tablecxt, + tempcxt); + + if (!subplan->unknownEqFalse) + { + if (ncols == 1) + nbuckets = 1; /* there can only be one entry */ + else + { + nbuckets /= 16; + if (nbuckets < 1) + nbuckets = 1; + } + node->hashnulls = BuildTupleHashTable(ncols, + node->keyColIdx, + node->eqfunctions, + nbuckets, + sizeof(TupleHashEntryData), + node->tablecxt, + tempcxt); + } + + /* + * We are probably in a short-lived expression-evaluation context. + * Switch to the child plan's per-query context for calling ExecProcNode. + */ + oldcontext = MemoryContextSwitchTo(node->sub_estate->es_query_cxt); + + /* + * Reset subplan to start. + */ + ExecReScan(planstate, NULL); + + /* + * Scan the subplan and load the hash table(s). Note that when there are + * duplicate rows coming out of the sub-select, only one copy is stored. + */ + for (slot = ExecProcNode(planstate); + !TupIsNull(slot); + slot = ExecProcNode(planstate)) + { + HeapTuple tup = slot->val; + TupleDesc tdesc = slot->ttc_tupleDescriptor; + int col = 1; + List *plst; + bool isnew; + + /* + * Load up the Params representing the raw sub-select outputs, + * then form the projection tuple to store in the hashtable. + */ + foreach(plst, subplan->paramIds) + { + int paramid = lfirsti(plst); + ParamExecData *prmdata; + + prmdata = &(innerecontext->ecxt_param_exec_vals[paramid]); + Assert(prmdata->execPlan == NULL); + prmdata->value = heap_getattr(tup, col, tdesc, + &(prmdata->isnull)); + col++; + } + slot = ExecProject(node->projRight, NULL); + tup = slot->val; + + /* + * If result contains any nulls, store separately or not at all. + * (Since we know the projection tuple has no junk columns, we + * can just look at the overall hasnull info bit, instead of + * groveling through the columns.) + */ + if (HeapTupleNoNulls(tup)) + { + (void) LookupTupleHashEntry(node->hashtable, slot, &isnew); + node->havehashrows = true; + } + else if (node->hashnulls) + { + (void) LookupTupleHashEntry(node->hashnulls, slot, &isnew); + node->havenullrows = true; + } + + /* + * Reset innerecontext after each inner tuple to free any memory + * used in hash computation or comparison routines. + */ + ResetExprContext(innerecontext); + } + + /* + * Since the projected tuples are in the sub-query's context and not + * the main context, we'd better clear the tuple slot before there's + * any chance of a reset of the sub-query's context. Else we will + * have the potential for a double free attempt. + */ + ExecClearTuple(node->projRight->pi_slot); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * findPartialMatch: does the hashtable contain an entry that is not + * provably distinct from the tuple? + * + * We have to scan the whole hashtable; we can't usefully use hashkeys + * to guide probing, since we might get partial matches on tuples with + * hashkeys quite unrelated to what we'd get from the given tuple. + */ +static bool +findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot) +{ + int numCols = hashtable->numCols; + AttrNumber *keyColIdx = hashtable->keyColIdx; + HeapTuple tuple = slot->val; + TupleDesc tupdesc = slot->ttc_tupleDescriptor; + TupleHashIterator hashiter; + TupleHashEntry entry; + + ResetTupleHashIterator(&hashiter); + while ((entry = ScanTupleHashTable(hashtable, &hashiter)) != NULL) + { + if (!execTuplesUnequal(entry->firstTuple, + tuple, + tupdesc, + numCols, keyColIdx, + hashtable->eqfunctions, + hashtable->tempcxt)) + return true; + } + return false; +} + +/* + * tupleAllNulls: is the tuple completely NULL? + */ +static bool +tupleAllNulls(HeapTuple tuple) +{ + int ncols = tuple->t_data->t_natts; + int i; + + for (i = 1; i <= ncols; i++) + { + if (!heap_attisnull(tuple, i)) + return false; + } + return true; +} + /* ---------------------------------------------------------------- * ExecInitSubPlan * ---------------------------------------------------------------- @@ -289,8 +645,14 @@ ExecInitSubPlan(SubPlanState *node, EState *estate) */ node->needShutdown = false; node->curTuple = NULL; + node->projLeft = NULL; + node->projRight = NULL; node->hashtable = NULL; node->hashnulls = NULL; + node->tablecxt = NULL; + node->innerecontext = NULL; + node->keyColIdx = NULL; + node->eqfunctions = NULL; /* * create an EState for the subplan @@ -343,6 +705,137 @@ ExecInitSubPlan(SubPlanState *node, EState *estate) * it, for others - it doesn't matter... */ } + + /* + * If we are going to hash the subquery output, initialize relevant + * stuff. (We don't create the hashtable until needed, though.) + */ + if (subplan->useHashTable) + { + int ncols, + i; + TupleDesc tupDesc; + TupleTable tupTable; + TupleTableSlot *slot; + List *lefttlist, + *righttlist, + *leftptlist, + *rightptlist, + *lexpr; + + /* We need a memory context to hold the hash table(s) */ + node->tablecxt = + AllocSetContextCreate(CurrentMemoryContext, + "Subplan HashTable Context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + /* and a short-lived exprcontext for function evaluation */ + node->innerecontext = CreateExprContext(estate); + /* Silly little array of column numbers 1..n */ + ncols = length(node->exprs); + node->keyColIdx = (AttrNumber *) palloc(ncols * sizeof(AttrNumber)); + for (i = 0; i < ncols; i++) + node->keyColIdx[i] = i+1; + /* + * We use ExecProject to evaluate the lefthand and righthand + * expression lists and form tuples. (You might think that we + * could use the sub-select's output tuples directly, but that is + * not the case if we had to insert any run-time coercions of the + * sub-select's output datatypes; anyway this avoids storing any + * resjunk columns that might be in the sub-select's output.) + * Run through the combining expressions to build tlists for the + * lefthand and righthand sides. We need both the ExprState list + * (for ExecProject) and the underlying parse Exprs (for + * ExecTypeFromTL). + * + * We also extract the combining operators themselves to initialize + * the equality functions for the hash tables. + */ + lefttlist = righttlist = NIL; + leftptlist = rightptlist = NIL; + node->eqfunctions = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo)); + i = 1; + foreach(lexpr, node->exprs) + { + FuncExprState *fstate = (FuncExprState *) lfirst(lexpr); + OpExpr *opexpr = (OpExpr *) fstate->xprstate.expr; + ExprState *exstate; + Expr *expr; + TargetEntry *tle; + GenericExprState *tlestate; + + Assert(IsA(fstate, FuncExprState)); + Assert(IsA(opexpr, OpExpr)); + Assert(length(fstate->args) == 2); + + /* Process lefthand argument */ + exstate = (ExprState *) lfirst(fstate->args); + expr = exstate->expr; + tle = makeTargetEntry(makeResdom(i, + exprType((Node *) expr), + exprTypmod((Node *) expr), + NULL, + false), + expr); + tlestate = makeNode(GenericExprState); + tlestate->xprstate.expr = (Expr *) tle; + tlestate->arg = exstate; + lefttlist = lappend(lefttlist, tlestate); + leftptlist = lappend(leftptlist, tle); + + /* Process righthand argument */ + exstate = (ExprState *) lsecond(fstate->args); + expr = exstate->expr; + tle = makeTargetEntry(makeResdom(i, + exprType((Node *) expr), + exprTypmod((Node *) expr), + NULL, + false), + expr); + tlestate = makeNode(GenericExprState); + tlestate->xprstate.expr = (Expr *) tle; + tlestate->arg = exstate; + righttlist = lappend(righttlist, tlestate); + rightptlist = lappend(rightptlist, tle); + + /* Lookup the combining function */ + fmgr_info(opexpr->opfuncid, &node->eqfunctions[i-1]); + + i++; + } + + /* + * Create a tupletable to hold these tuples. (Note: we never bother + * to free the tupletable explicitly; that's okay because it will + * never store raw disk tuples that might have associated buffer + * pins. The only resource involved is memory, which will be + * cleaned up by freeing the query context.) + */ + tupTable = ExecCreateTupleTable(2); + + /* + * Construct tupdescs, slots and projection nodes for left and + * right sides. The lefthand expressions will be evaluated in + * the parent plan node's exprcontext, which we don't have access + * to here. Fortunately we can just pass NULL for now and fill it + * in later (hack alert!). The righthand expressions will be + * evaluated in our own innerecontext. + */ + tupDesc = ExecTypeFromTL(leftptlist, false); + slot = ExecAllocTableSlot(tupTable); + ExecSetSlotDescriptor(slot, tupDesc, true); + node->projLeft = ExecBuildProjectionInfo(lefttlist, + NULL, + slot); + + tupDesc = ExecTypeFromTL(rightptlist, false); + slot = ExecAllocTableSlot(tupTable); + ExecSetSlotDescriptor(slot, tupDesc, true); + node->projRight = ExecBuildProjectionInfo(righttlist, + node->innerecontext, + slot); + } } /* ---------------------------------------------------------------- @@ -476,11 +969,6 @@ ExecEndSubPlan(SubPlanState *node) node->planstate = NULL; node->needShutdown = false; } - if (node->curTuple) - { - heap_freetuple(node->curTuple); - node->curTuple = NULL; - } } void |