diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2000-01-27 18:11:50 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2000-01-27 18:11:50 +0000 |
commit | dd979f66be20fc54aad06da743f788fbc505bbe1 (patch) | |
tree | 4d6b578e0afc7dc0198296940cd77c79c5eff66c /src/backend/executor | |
parent | 3f0074e403ac3a145c5e43db3348f45fe084703d (diff) | |
download | postgresql-dd979f66be20fc54aad06da743f788fbc505bbe1.tar.gz postgresql-dd979f66be20fc54aad06da743f788fbc505bbe1.zip |
Redesign DISTINCT ON as discussed in pgsql-sql 1/25/00: syntax is now
SELECT DISTINCT ON (expr [, expr ...]) targetlist ...
and there is a check to make sure that the user didn't specify an ORDER BY
that's incompatible with the DISTINCT operation.
Reimplement nodeUnique and nodeGroup to use the proper datatype-specific
equality function for each column being compared --- they used to do
bitwise comparisons or convert the data to text strings and strcmp().
(To add insult to injury, they'd look up the conversion functions once
for each tuple...) Parse/plan representation of DISTINCT is now a list
of SortClause nodes.
initdb forced by querytree change...
Diffstat (limited to 'src/backend/executor')
-rw-r--r-- | src/backend/executor/execTuples.c | 4 | ||||
-rw-r--r-- | src/backend/executor/nodeGroup.c | 210 | ||||
-rw-r--r-- | src/backend/executor/nodeUnique.c | 232 |
3 files changed, 181 insertions, 265 deletions
diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c index f600a78fd24..1cbae3519a9 100644 --- a/src/backend/executor/execTuples.c +++ b/src/backend/executor/execTuples.c @@ -15,7 +15,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/execTuples.c,v 1.35 2000/01/26 05:56:22 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/execTuples.c,v 1.36 2000/01/27 18:11:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -753,7 +753,7 @@ NodeGetResultTupleSlot(Plan *node) { UniqueState *uniquestate = ((Unique *) node)->uniquestate; - slot = uniquestate->cs_ResultTupleSlot; + slot = uniquestate->cstate.cs_ResultTupleSlot; } break; diff --git a/src/backend/executor/nodeGroup.c b/src/backend/executor/nodeGroup.c index 017929424bc..cad023776d8 100644 --- a/src/backend/executor/nodeGroup.c +++ b/src/backend/executor/nodeGroup.c @@ -9,12 +9,13 @@ * * DESCRIPTION * The Group node is designed for handling queries with a GROUP BY clause. - * It's outer plan must be a sort node. It assumes that the tuples it gets - * back from the outer plan is sorted in the order specified by the group - * columns. (ie. tuples from the same group are consecutive) + * Its outer plan must deliver tuples that are sorted in the order + * specified by the grouping columns (ie. tuples from the same group are + * consecutive). That way, we just have to compare adjacent tuples to + * locate group boundaries. * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.32 2000/01/26 05:56:22 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeGroup.c,v 1.33 2000/01/27 18:11:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,13 +24,14 @@ #include "access/heapam.h" #include "access/printtup.h" +#include "catalog/pg_operator.h" #include "executor/executor.h" #include "executor/nodeGroup.h" +#include "parser/parse_oper.h" +#include "parser/parse_type.h" static TupleTableSlot *ExecGroupEveryTuple(Group *node); static TupleTableSlot *ExecGroupOneTuple(Group *node); -static bool sameGroup(HeapTuple oldslot, HeapTuple newslot, - int numCols, AttrNumber *grpColIdx, TupleDesc tupdesc); /* --------------------------------------- * ExecGroup - @@ -38,11 +40,11 @@ static bool sameGroup(HeapTuple oldslot, HeapTuple newslot, * tuplePerGroup is TRUE, every tuple from the same group will be * returned, followed by a NULL at the end of each group. This is * useful for Agg node which needs to aggregate over tuples of the same - * group. (eg. SELECT salary, count{*} FROM emp GROUP BY salary) + * group. (eg. SELECT salary, count(*) FROM emp GROUP BY salary) * * If tuplePerGroup is FALSE, only one tuple per group is returned. The * tuple returned contains only the group columns. NULL is returned only - * at the end when no more groups is present. This is useful when + * at the end when no more groups are present. This is useful when * the query does not involve aggregates. (eg. SELECT salary FROM emp * GROUP BY salary) * ------------------------------------------ @@ -66,6 +68,7 @@ ExecGroupEveryTuple(Group *node) GroupState *grpstate; EState *estate; ExprContext *econtext; + TupleDesc tupdesc; HeapTuple outerTuple = NULL; HeapTuple firsttuple; @@ -87,6 +90,8 @@ ExecGroupEveryTuple(Group *node) econtext = grpstate->csstate.cstate.cs_ExprContext; + tupdesc = ExecGetScanType(&grpstate->csstate); + /* if we haven't returned first tuple of new group yet ... */ if (grpstate->grp_useFirstTuple) { @@ -110,20 +115,25 @@ ExecGroupEveryTuple(Group *node) outerTuple = outerslot->val; firsttuple = grpstate->grp_firstTuple; - /* this should occur on the first call only */ if (firsttuple == NULL) + { + /* this should occur on the first call only */ grpstate->grp_firstTuple = heap_copytuple(outerTuple); + } else { - /* * Compare with first tuple and see if this tuple is of the * same group. */ - if (!sameGroup(firsttuple, outerTuple, - node->numCols, node->grpColIdx, - ExecGetScanType(&grpstate->csstate))) + if (! execTuplesMatch(firsttuple, outerTuple, + tupdesc, + node->numCols, node->grpColIdx, + grpstate->eqfunctions)) { + /* + * No; save the tuple to return it next time, and return NULL + */ grpstate->grp_useFirstTuple = TRUE; heap_freetuple(firsttuple); grpstate->grp_firstTuple = heap_copytuple(outerTuple); @@ -164,6 +174,7 @@ ExecGroupOneTuple(Group *node) GroupState *grpstate; EState *estate; ExprContext *econtext; + TupleDesc tupdesc; HeapTuple outerTuple = NULL; HeapTuple firsttuple; @@ -185,10 +196,12 @@ ExecGroupOneTuple(Group *node) econtext = node->grpstate->csstate.cstate.cs_ExprContext; + tupdesc = ExecGetScanType(&grpstate->csstate); + firsttuple = grpstate->grp_firstTuple; - /* this should occur on the first call only */ if (firsttuple == NULL) { + /* this should occur on the first call only */ outerslot = ExecProcNode(outerPlan(node), (Plan *) node); if (TupIsNull(outerslot)) { @@ -213,14 +226,14 @@ ExecGroupOneTuple(Group *node) } outerTuple = outerslot->val; - /* ---------------- - * Compare with first tuple and see if this tuple is of - * the same group. - * ---------------- + /* + * Compare with first tuple and see if this tuple is of the + * same group. */ - if ((!sameGroup(firsttuple, outerTuple, - node->numCols, node->grpColIdx, - ExecGetScanType(&grpstate->csstate)))) + if (! execTuplesMatch(firsttuple, outerTuple, + tupdesc, + node->numCols, node->grpColIdx, + grpstate->eqfunctions)) break; } @@ -311,6 +324,14 @@ ExecInitGroup(Group *node, EState *estate, Plan *parent) ExecAssignResultTypeFromTL((Plan *) node, &grpstate->csstate.cstate); ExecAssignProjectionInfo((Plan *) node, &grpstate->csstate.cstate); + /* + * Precompute fmgr lookup data for inner loop + */ + grpstate->eqfunctions = + execTuplesMatchPrepare(ExecGetScanType(&grpstate->csstate), + node->numCols, + node->grpColIdx); + return TRUE; } @@ -347,94 +368,121 @@ ExecEndGroup(Group *node) } } +void +ExecReScanGroup(Group *node, ExprContext *exprCtxt, Plan *parent) +{ + GroupState *grpstate = node->grpstate; + + grpstate->grp_useFirstTuple = FALSE; + grpstate->grp_done = FALSE; + if (grpstate->grp_firstTuple != NULL) + { + heap_freetuple(grpstate->grp_firstTuple); + grpstate->grp_firstTuple = NULL; + } + + if (((Plan *) node)->lefttree && + ((Plan *) node)->lefttree->chgParam == NULL) + ExecReScan(((Plan *) node)->lefttree, exprCtxt, (Plan *) node); +} + /***************************************************************************** - * + * Code shared with nodeUnique.c *****************************************************************************/ /* - * code swiped from nodeUnique.c + * execTuplesMatch + * Return true if two tuples match in all the indicated fields. + * This is used to detect group boundaries in nodeGroup, and to + * decide whether two tuples are distinct or not in nodeUnique. + * + * tuple1, tuple2: the tuples to compare + * tupdesc: tuple descriptor applying to both tuples + * numCols: the number of attributes to be examined + * matchColIdx: array of attribute column numbers + * eqFunctions: array of fmgr lookup info for the equality functions to use */ -static bool -sameGroup(HeapTuple oldtuple, - HeapTuple newtuple, - int numCols, - AttrNumber *grpColIdx, - TupleDesc tupdesc) +bool +execTuplesMatch(HeapTuple tuple1, + HeapTuple tuple2, + TupleDesc tupdesc, + int numCols, + AttrNumber *matchColIdx, + FmgrInfo *eqfunctions) { - bool isNull1, - isNull2; - Datum attr1, - attr2; - char *val1, - *val2; int i; - AttrNumber att; - Oid typoutput, - typelem; - for (i = 0; i < numCols; i++) + /* + * We cannot report a match without checking all the fields, but we + * can report a non-match as soon as we find unequal fields. So, + * start comparing at the last field (least significant sort key). + * That's the most likely to be different... + */ + for (i = numCols; --i >= 0; ) { - att = grpColIdx[i]; - getTypeOutAndElem((Oid) tupdesc->attrs[att - 1]->atttypid, - &typoutput, &typelem); - - attr1 = heap_getattr(oldtuple, + AttrNumber att = matchColIdx[i]; + Datum attr1, + attr2; + bool isNull1, + isNull2; + Datum equal; + + attr1 = heap_getattr(tuple1, att, tupdesc, &isNull1); - attr2 = heap_getattr(newtuple, + attr2 = heap_getattr(tuple2, att, tupdesc, &isNull2); - if (isNull1 == isNull2) - { - if (isNull1) /* both are null, they are equal */ - continue; + if (isNull1 != isNull2) + return FALSE; /* one null and one not; they aren't equal */ - val1 = fmgr(typoutput, attr1, typelem, - tupdesc->attrs[att - 1]->atttypmod); - val2 = fmgr(typoutput, attr2, typelem, - tupdesc->attrs[att - 1]->atttypmod); + if (isNull1) + continue; /* both are null, treat as equal */ - /* - * now, val1 and val2 are ascii representations so we can use - * strcmp for comparison - */ - if (strcmp(val1, val2) != 0) - { - pfree(val1); - pfree(val2); - return FALSE; - } - pfree(val1); - pfree(val2); - } - else - { - /* one is null and the other isn't, they aren't equal */ + /* Apply the type-specific equality function */ + + equal = (Datum) (*fmgr_faddr(& eqfunctions[i])) (attr1, attr2); + + if (DatumGetInt32(equal) == 0) return FALSE; - } } return TRUE; } -void -ExecReScanGroup(Group *node, ExprContext *exprCtxt, Plan *parent) +/* + * execTuplesMatchPrepare + * Look up the equality functions needed for execTuplesMatch. + * The result is a palloc'd array. + */ +FmgrInfo * +execTuplesMatchPrepare(TupleDesc tupdesc, + int numCols, + AttrNumber *matchColIdx) { - GroupState *grpstate = node->grpstate; + FmgrInfo *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); + int i; - grpstate->grp_useFirstTuple = FALSE; - grpstate->grp_done = FALSE; - if (grpstate->grp_firstTuple != NULL) + for (i = 0; i < numCols; i++) { - heap_freetuple(grpstate->grp_firstTuple); - grpstate->grp_firstTuple = NULL; + AttrNumber att = matchColIdx[i]; + Oid typid = tupdesc->attrs[att - 1]->atttypid; + Operator eq_operator; + Form_pg_operator pgopform; + + eq_operator = oper("=", typid, typid, true); + if (!HeapTupleIsValid(eq_operator)) + { + elog(ERROR, "Unable to identify an equality operator for type '%s'", + typeidTypeName(typid)); + } + pgopform = (Form_pg_operator) GETSTRUCT(eq_operator); + fmgr_info(pgopform->oprcode, & eqfunctions[i]); } - if (((Plan *) node)->lefttree && - ((Plan *) node)->lefttree->chgParam == NULL) - ExecReScan(((Plan *) node)->lefttree, exprCtxt, (Plan *) node); + return eqfunctions; } diff --git a/src/backend/executor/nodeUnique.c b/src/backend/executor/nodeUnique.c index 6078e0f68a9..f9f1fe81ab3 100644 --- a/src/backend/executor/nodeUnique.c +++ b/src/backend/executor/nodeUnique.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeUnique.c,v 1.26 2000/01/26 05:56:24 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeUnique.c,v 1.27 2000/01/27 18:11:27 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -29,79 +29,14 @@ #include "access/heapam.h" #include "access/printtup.h" #include "executor/executor.h" +#include "executor/nodeGroup.h" #include "executor/nodeUnique.h" /* ---------------------------------------------------------------- - * ExecIdenticalTuples - * - * This is a hack function used by ExecUnique to see if - * two tuples are identical. This should be provided - * by the heap tuple code but isn't. The real problem - * is that we assume we can byte compare tuples to determine - * if they are "equal". In fact, if we have user defined - * types there may be problems because it's possible that - * an ADT may have multiple representations with the - * same ADT value. -cim - * ---------------------------------------------------------------- - */ -static bool /* true if tuples are identical, false - * otherwise */ -ExecIdenticalTuples(TupleTableSlot *t1, TupleTableSlot *t2) -{ - HeapTuple h1; - HeapTuple h2; - char *d1; - char *d2; - int len; - - h1 = t1->val; - h2 = t2->val; - - /* ---------------- - * if tuples aren't the same length then they are - * obviously different (one may have null attributes). - * ---------------- - */ - if (h1->t_len != h2->t_len) - return false; - - /* ---------------- - * if the tuples have different header offsets then - * they are different. This will prevent us from returning - * true when comparing tuples of one attribute where one of - * two we're looking at is null (t_len - t_hoff == 0). - * THE t_len FIELDS CAN BE THE SAME IN THIS CASE!! - * ---------------- - */ - if (h1->t_data->t_hoff != h2->t_data->t_hoff) - return false; - - /* ---------------- - * ok, now get the pointers to the data and the - * size of the attribute portion of the tuple. - * ---------------- - */ - d1 = (char *) GETSTRUCT(h1); - d2 = (char *) GETSTRUCT(h2); - len = (int) h1->t_len - (int) h1->t_data->t_hoff; - - /* ---------------- - * byte compare the data areas and return the result. - * ---------------- - */ - if (memcmp(d1, d2, len) != 0) - return false; - - return true; -} - -/* ---------------------------------------------------------------- * ExecUnique * * This is a very simple node which filters out duplicate * tuples from a stream of sorted tuples from a subplan. - * - * XXX see comments below regarding freeing tuples. * ---------------------------------------------------------------- */ TupleTableSlot * /* return: a tuple or NULL */ @@ -111,11 +46,7 @@ ExecUnique(Unique *node) TupleTableSlot *resultTupleSlot; TupleTableSlot *slot; Plan *outerPlan; - char *uniqueAttr; - AttrNumber uniqueAttrNum; TupleDesc tupDesc; - Oid typoutput, - typelem; /* ---------------- * get information from the node @@ -123,22 +54,8 @@ ExecUnique(Unique *node) */ uniquestate = node->uniquestate; outerPlan = outerPlan((Plan *) node); - resultTupleSlot = uniquestate->cs_ResultTupleSlot; - uniqueAttr = node->uniqueAttr; - uniqueAttrNum = node->uniqueAttrNum; - - if (uniqueAttr) - { - tupDesc = ExecGetResultType(uniquestate); - getTypeOutAndElem((Oid) tupDesc->attrs[uniqueAttrNum - 1]->atttypid, - &typoutput, &typelem); - } - else - { /* keep compiler quiet */ - tupDesc = NULL; - typoutput = InvalidOid; - typelem = InvalidOid; - } + resultTupleSlot = uniquestate->cstate.cs_ResultTupleSlot; + tupDesc = ExecGetResultType(& uniquestate->cstate); /* ---------------- * now loop, returning only non-duplicate tuples. @@ -157,83 +74,38 @@ ExecUnique(Unique *node) return NULL; /* ---------------- - * we use the result tuple slot to hold our saved tuples. - * if we haven't a saved tuple to compare our new tuple with, - * then we exit the loop. This new tuple as the saved tuple - * the next time we get here. + * Always return the first tuple from the subplan. * ---------------- */ - if (TupIsNull(resultTupleSlot)) + if (uniquestate->priorTuple == NULL) break; /* ---------------- - * now test if the new tuple and the previous + * Else test if the new tuple and the previously returned * tuple match. If so then we loop back and fetch * another new tuple from the subplan. * ---------------- */ - - if (uniqueAttr) - { - - /* - * to check equality, we check to see if the typoutput of the - * attributes are equal - */ - bool isNull1, - isNull2; - Datum attr1, - attr2; - char *val1, - *val2; - - attr1 = heap_getattr(slot->val, - uniqueAttrNum, tupDesc, &isNull1); - attr2 = heap_getattr(resultTupleSlot->val, - uniqueAttrNum, tupDesc, &isNull2); - - if (isNull1 == isNull2) - { - if (isNull1) /* both are null, they are equal */ - continue; - val1 = fmgr(typoutput, attr1, typelem, - tupDesc->attrs[uniqueAttrNum - 1]->atttypmod); - val2 = fmgr(typoutput, attr2, typelem, - tupDesc->attrs[uniqueAttrNum - 1]->atttypmod); - - /* - * now, val1 and val2 are ascii representations so we can - * use strcmp for comparison - */ - if (strcmp(val1, val2) == 0) /* they are equal */ - { - pfree(val1); - pfree(val2); - continue; - } - pfree(val1); - pfree(val2); - break; - } - else -/* one is null and the other isn't, they aren't equal */ - break; - - } - else - { - if (!ExecIdenticalTuples(slot, resultTupleSlot)) - break; - } - + if (! execTuplesMatch(slot->val, uniquestate->priorTuple, + tupDesc, + node->numCols, node->uniqColIdx, + uniquestate->eqfunctions)) + break; } /* ---------------- - * we have a new tuple different from the previous saved tuple - * so we save it in the saved tuple slot. We copy the tuple - * so we don't increment the buffer ref count. + * We have a new tuple different from the previous saved tuple (if any). + * Save it and return it. Note that we make two copies of the tuple: + * one to keep for our own future comparisons, and one to return to the + * caller. We need to copy the tuple returned by the subplan to avoid + * holding buffer refcounts, and we need our own copy because the caller + * may alter the resultTupleSlot (eg via ExecRemoveJunk). * ---------------- */ + if (uniquestate->priorTuple != NULL) + heap_freetuple(uniquestate->priorTuple); + uniquestate->priorTuple = heap_copytuple(slot->val); + ExecStoreTuple(heap_copytuple(slot->val), resultTupleSlot, InvalidBuffer, @@ -254,7 +126,6 @@ ExecInitUnique(Unique *node, EState *estate, Plan *parent) { UniqueState *uniquestate; Plan *outerPlan; - char *uniqueAttr; /* ---------------- * assign execution state to node @@ -268,10 +139,10 @@ ExecInitUnique(Unique *node, EState *estate, Plan *parent) */ uniquestate = makeNode(UniqueState); node->uniquestate = uniquestate; - uniqueAttr = node->uniqueAttr; + uniquestate->priorTuple = NULL; /* ---------------- - * Miscellanious initialization + * Miscellaneous initialization * * + assign node's base_id * + assign debugging hooks and @@ -280,14 +151,14 @@ ExecInitUnique(Unique *node, EState *estate, Plan *parent) * they never call ExecQual or ExecTargetList. * ---------------- */ - ExecAssignNodeBaseInfo(estate, uniquestate, parent); + ExecAssignNodeBaseInfo(estate, & uniquestate->cstate, parent); #define UNIQUE_NSLOTS 1 /* ------------ * Tuple table initialization * ------------ */ - ExecInitResultTupleSlot(estate, uniquestate); + ExecInitResultTupleSlot(estate, & uniquestate->cstate); /* ---------------- * then initialize outer plan @@ -301,31 +172,17 @@ ExecInitUnique(Unique *node, EState *estate, Plan *parent) * projection info for this node appropriately * ---------------- */ - ExecAssignResultTypeFromOuterPlan((Plan *) node, uniquestate); - uniquestate->cs_ProjInfo = NULL; - - if (uniqueAttr) - { - TupleDesc tupDesc; - int i = 0; - - tupDesc = ExecGetResultType(uniquestate); - - /* - * the parser should have ensured that uniqueAttr is a legal - * attribute name - */ - while (strcmp(NameStr(tupDesc->attrs[i]->attname), uniqueAttr) != 0) - i++; - node->uniqueAttrNum = i + 1; /* attribute numbers start from 1 */ - } - else - node->uniqueAttrNum = InvalidAttrNumber; + ExecAssignResultTypeFromOuterPlan((Plan *) node, & uniquestate->cstate); + uniquestate->cstate.cs_ProjInfo = NULL; - /* ---------------- - * all done. - * ---------------- + /* + * Precompute fmgr lookup data for inner loop */ + uniquestate->eqfunctions = + execTuplesMatchPrepare(ExecGetResultType(& uniquestate->cstate), + node->numCols, + node->uniqColIdx); + return TRUE; } @@ -347,11 +204,17 @@ ExecCountSlotsUnique(Unique *node) void ExecEndUnique(Unique *node) { - UniqueState *uniquestate; + UniqueState *uniquestate = node->uniquestate; - uniquestate = node->uniquestate; ExecEndNode(outerPlan((Plan *) node), (Plan *) node); - ExecClearTuple(uniquestate->cs_ResultTupleSlot); + + /* clean up tuple table */ + ExecClearTuple(uniquestate->cstate.cs_ResultTupleSlot); + if (uniquestate->priorTuple != NULL) + { + heap_freetuple(uniquestate->priorTuple); + uniquestate->priorTuple = NULL; + } } @@ -360,7 +223,12 @@ ExecReScanUnique(Unique *node, ExprContext *exprCtxt, Plan *parent) { UniqueState *uniquestate = node->uniquestate; - ExecClearTuple(uniquestate->cs_ResultTupleSlot); + ExecClearTuple(uniquestate->cstate.cs_ResultTupleSlot); + if (uniquestate->priorTuple != NULL) + { + heap_freetuple(uniquestate->priorTuple); + uniquestate->priorTuple = NULL; + } /* * if chgParam of subnode is not null then plan will be re-scanned by |