diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2011-10-11 14:20:06 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2011-10-11 14:21:30 -0400 |
commit | a0185461dd94c8d31d8d55a7f2839b0d2f172ab9 (patch) | |
tree | 3bd68d4e123336bbdefa8fd92372f0af7fb6d64f /src/backend/executor | |
parent | fa351d5a0db0672b6f586315720302e493116f27 (diff) | |
download | postgresql-a0185461dd94c8d31d8d55a7f2839b0d2f172ab9.tar.gz postgresql-a0185461dd94c8d31d8d55a7f2839b0d2f172ab9.zip |
Rearrange the implementation of index-only scans.
This commit changes index-only scans so that data is read directly from the
index tuple without first generating a faux heap tuple. The only immediate
benefit is that indexes on system columns (such as OID) can be used in
index-only scans, but this is necessary infrastructure if we are ever to
support index-only scans on expression indexes. The executor is now ready
for that, though the planner still needs substantial work to recognize
the possibility.
To do this, Vars in index-only plan nodes have to refer to index columns
not heap columns. I introduced a new special varno, INDEX_VAR, to mark
such Vars to avoid confusion. (In passing, this commit renames the two
existing special varnos to OUTER_VAR and INNER_VAR.) This allows
ruleutils.c to handle them with logic similar to what we use for subplan
reference Vars.
Since index-only scans are now fundamentally different from regular
indexscans so far as their expression subtrees are concerned, I also chose
to change them to have their own plan node type (and hence, their own
executor source file).
Diffstat (limited to 'src/backend/executor')
-rw-r--r-- | src/backend/executor/Makefile | 3 | ||||
-rw-r--r-- | src/backend/executor/execAmi.c | 21 | ||||
-rw-r--r-- | src/backend/executor/execCurrent.c | 1 | ||||
-rw-r--r-- | src/backend/executor/execProcnode.c | 14 | ||||
-rw-r--r-- | src/backend/executor/execQual.c | 24 | ||||
-rw-r--r-- | src/backend/executor/execScan.c | 9 | ||||
-rw-r--r-- | src/backend/executor/execUtils.c | 12 | ||||
-rw-r--r-- | src/backend/executor/nodeAgg.c | 4 | ||||
-rw-r--r-- | src/backend/executor/nodeBitmapIndexscan.c | 1 | ||||
-rw-r--r-- | src/backend/executor/nodeHash.c | 4 | ||||
-rw-r--r-- | src/backend/executor/nodeIndexonlyscan.c | 542 | ||||
-rw-r--r-- | src/backend/executor/nodeIndexscan.c | 142 | ||||
-rw-r--r-- | src/backend/executor/nodeNestloop.c | 4 |
13 files changed, 634 insertions, 147 deletions
diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile index a854c9a5dc6..6081b56c086 100644 --- a/src/backend/executor/Makefile +++ b/src/backend/executor/Makefile @@ -17,7 +17,8 @@ OBJS = execAmi.o execCurrent.o execGrouping.o execJunk.o execMain.o \ execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o \ nodeBitmapAnd.o nodeBitmapOr.o \ nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeHash.o \ - nodeHashjoin.o nodeIndexscan.o nodeLimit.o nodeLockRows.o \ + nodeHashjoin.o nodeIndexscan.o nodeIndexonlyscan.o \ + nodeLimit.o nodeLockRows.o \ nodeMaterial.o nodeMergeAppend.o nodeMergejoin.o nodeModifyTable.o \ nodeNestloop.o nodeFunctionscan.o nodeRecursiveunion.o nodeResult.o \ nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \ diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 711e8c77866..fa27640fed2 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -26,6 +26,7 @@ #include "executor/nodeGroup.h" #include "executor/nodeHash.h" #include "executor/nodeHashjoin.h" +#include "executor/nodeIndexonlyscan.h" #include "executor/nodeIndexscan.h" #include "executor/nodeLimit.h" #include "executor/nodeLockRows.h" @@ -155,6 +156,10 @@ ExecReScan(PlanState *node) ExecReScanIndexScan((IndexScanState *) node); break; + case T_IndexOnlyScanState: + ExecReScanIndexOnlyScan((IndexOnlyScanState *) node); + break; + case T_BitmapIndexScanState: ExecReScanBitmapIndexScan((BitmapIndexScanState *) node); break; @@ -273,6 +278,10 @@ ExecMarkPos(PlanState *node) ExecIndexMarkPos((IndexScanState *) node); break; + case T_IndexOnlyScanState: + ExecIndexOnlyMarkPos((IndexOnlyScanState *) node); + break; + case T_TidScanState: ExecTidMarkPos((TidScanState *) node); break; @@ -326,6 +335,10 @@ ExecRestrPos(PlanState *node) ExecIndexRestrPos((IndexScanState *) node); break; + case T_IndexOnlyScanState: + ExecIndexOnlyRestrPos((IndexOnlyScanState *) node); + break; + case T_TidScanState: ExecTidRestrPos((TidScanState *) node); break; @@ -371,6 +384,7 @@ ExecSupportsMarkRestore(NodeTag plantype) { case T_SeqScan: case T_IndexScan: + case T_IndexOnlyScan: case T_TidScan: case T_ValuesScan: case T_Material: @@ -442,6 +456,10 @@ ExecSupportsBackwardScan(Plan *node) return IndexSupportsBackwardScan(((IndexScan *) node)->indexid) && TargetListSupportsBackwardScan(node->targetlist); + case T_IndexOnlyScan: + return IndexSupportsBackwardScan(((IndexOnlyScan *) node)->indexid) && + TargetListSupportsBackwardScan(node->targetlist); + case T_SubqueryScan: return ExecSupportsBackwardScan(((SubqueryScan *) node)->subplan) && TargetListSupportsBackwardScan(node->targetlist); @@ -474,7 +492,8 @@ TargetListSupportsBackwardScan(List *targetlist) } /* - * An IndexScan node supports backward scan only if the index's AM does. + * An IndexScan or IndexOnlyScan node supports backward scan only if the + * index's AM does. */ static bool IndexSupportsBackwardScan(Oid indexid) diff --git a/src/backend/executor/execCurrent.c b/src/backend/executor/execCurrent.c index 61a5f471124..5d70ad60de2 100644 --- a/src/backend/executor/execCurrent.c +++ b/src/backend/executor/execCurrent.c @@ -262,6 +262,7 @@ search_plan_tree(PlanState *node, Oid table_oid) */ case T_SeqScanState: case T_IndexScanState: + case T_IndexOnlyScanState: case T_BitmapHeapScanState: case T_TidScanState: { diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 284fc6a63b7..8ab9892c850 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -89,6 +89,7 @@ #include "executor/nodeGroup.h" #include "executor/nodeHash.h" #include "executor/nodeHashjoin.h" +#include "executor/nodeIndexonlyscan.h" #include "executor/nodeIndexscan.h" #include "executor/nodeLimit.h" #include "executor/nodeLockRows.h" @@ -192,6 +193,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags) estate, eflags); break; + case T_IndexOnlyScan: + result = (PlanState *) ExecInitIndexOnlyScan((IndexOnlyScan *) node, + estate, eflags); + break; + case T_BitmapIndexScan: result = (PlanState *) ExecInitBitmapIndexScan((BitmapIndexScan *) node, estate, eflags); @@ -397,6 +403,10 @@ ExecProcNode(PlanState *node) result = ExecIndexScan((IndexScanState *) node); break; + case T_IndexOnlyScanState: + result = ExecIndexOnlyScan((IndexOnlyScanState *) node); + break; + /* BitmapIndexScanState does not yield tuples */ case T_BitmapHeapScanState: @@ -627,6 +637,10 @@ ExecEndNode(PlanState *node) ExecEndIndexScan((IndexScanState *) node); break; + case T_IndexOnlyScanState: + ExecEndIndexOnlyScan((IndexOnlyScanState *) node); + break; + case T_BitmapIndexScanState: ExecEndBitmapIndexScan((BitmapIndexScanState *) node); break; diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c index 80f08d8b92e..887e5ce82a0 100644 --- a/src/backend/executor/execQual.c +++ b/src/backend/executor/execQual.c @@ -578,14 +578,16 @@ ExecEvalVar(ExprState *exprstate, ExprContext *econtext, /* Get the input slot and attribute number we want */ switch (variable->varno) { - case INNER: /* get the tuple from the inner node */ + case INNER_VAR: /* get the tuple from the inner node */ slot = econtext->ecxt_innertuple; break; - case OUTER: /* get the tuple from the outer node */ + case OUTER_VAR: /* get the tuple from the outer node */ slot = econtext->ecxt_outertuple; break; + /* INDEX_VAR is handled by default case */ + default: /* get the tuple from the relation being * scanned */ slot = econtext->ecxt_scantuple; @@ -761,14 +763,16 @@ ExecEvalScalarVar(ExprState *exprstate, ExprContext *econtext, /* Get the input slot and attribute number we want */ switch (variable->varno) { - case INNER: /* get the tuple from the inner node */ + case INNER_VAR: /* get the tuple from the inner node */ slot = econtext->ecxt_innertuple; break; - case OUTER: /* get the tuple from the outer node */ + case OUTER_VAR: /* get the tuple from the outer node */ slot = econtext->ecxt_outertuple; break; + /* INDEX_VAR is handled by default case */ + default: /* get the tuple from the relation being * scanned */ slot = econtext->ecxt_scantuple; @@ -804,14 +808,16 @@ ExecEvalWholeRowVar(ExprState *exprstate, ExprContext *econtext, /* Get the input slot we want */ switch (variable->varno) { - case INNER: /* get the tuple from the inner node */ + case INNER_VAR: /* get the tuple from the inner node */ slot = econtext->ecxt_innertuple; break; - case OUTER: /* get the tuple from the outer node */ + case OUTER_VAR: /* get the tuple from the outer node */ slot = econtext->ecxt_outertuple; break; + /* INDEX_VAR is handled by default case */ + default: /* get the tuple from the relation being * scanned */ slot = econtext->ecxt_scantuple; @@ -873,14 +879,16 @@ ExecEvalWholeRowSlow(ExprState *exprstate, ExprContext *econtext, /* Get the input slot we want */ switch (variable->varno) { - case INNER: /* get the tuple from the inner node */ + case INNER_VAR: /* get the tuple from the inner node */ slot = econtext->ecxt_innertuple; break; - case OUTER: /* get the tuple from the outer node */ + case OUTER_VAR: /* get the tuple from the outer node */ slot = econtext->ecxt_outertuple; break; + /* INDEX_VAR is handled by default case */ + default: /* get the tuple from the relation being * scanned */ slot = econtext->ecxt_scantuple; diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c index d4ed2358564..42acc102c63 100644 --- a/src/backend/executor/execScan.c +++ b/src/backend/executor/execScan.c @@ -246,10 +246,17 @@ void ExecAssignScanProjectionInfo(ScanState *node) { Scan *scan = (Scan *) node->ps.plan; + Index varno; + + /* Vars in an index-only scan's tlist should be INDEX_VAR */ + if (IsA(scan, IndexOnlyScan)) + varno = INDEX_VAR; + else + varno = scan->scanrelid; if (tlist_matches_tupdesc(&node->ps, scan->plan.targetlist, - scan->scanrelid, + varno, node->ss_ScanTupleSlot->tts_tupleDescriptor)) node->ps.ps_ProjInfo = NULL; else diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 4dbf10b8da9..65591e2445d 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -566,20 +566,22 @@ ExecBuildProjectionInfo(List *targetList, switch (variable->varno) { - case INNER: + case INNER_VAR: varSlotOffsets[numSimpleVars] = offsetof(ExprContext, ecxt_innertuple); if (projInfo->pi_lastInnerVar < attnum) projInfo->pi_lastInnerVar = attnum; break; - case OUTER: + case OUTER_VAR: varSlotOffsets[numSimpleVars] = offsetof(ExprContext, ecxt_outertuple); if (projInfo->pi_lastOuterVar < attnum) projInfo->pi_lastOuterVar = attnum; break; + /* INDEX_VAR is handled by default case */ + default: varSlotOffsets[numSimpleVars] = offsetof(ExprContext, ecxt_scantuple); @@ -628,16 +630,18 @@ get_last_attnums(Node *node, ProjectionInfo *projInfo) switch (variable->varno) { - case INNER: + case INNER_VAR: if (projInfo->pi_lastInnerVar < attnum) projInfo->pi_lastInnerVar = attnum; break; - case OUTER: + case OUTER_VAR: if (projInfo->pi_lastOuterVar < attnum) projInfo->pi_lastOuterVar = attnum; break; + /* INDEX_VAR is handled by default case */ + default: if (projInfo->pi_lastScanVar < attnum) projInfo->pi_lastScanVar = attnum; diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index e769d6d012c..0701da40b1b 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -806,8 +806,8 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos) { Var *var = (Var *) node; - /* setrefs.c should have set the varno to OUTER */ - Assert(var->varno == OUTER); + /* setrefs.c should have set the varno to OUTER_VAR */ + Assert(var->varno == OUTER_VAR); Assert(var->varlevelsup == 0); *colnos = bms_add_member(*colnos, var->varattno); return false; diff --git a/src/backend/executor/nodeBitmapIndexscan.c b/src/backend/executor/nodeBitmapIndexscan.c index 8e1df079b37..8cc8315a457 100644 --- a/src/backend/executor/nodeBitmapIndexscan.c +++ b/src/backend/executor/nodeBitmapIndexscan.c @@ -266,7 +266,6 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags) */ ExecIndexBuildScanKeys((PlanState *) indexstate, indexstate->biss_RelationDesc, - node->scan.scanrelid, node->indexqual, false, &indexstate->biss_ScanKeys, diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index e72a71bf51b..091aef90e0a 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -755,8 +755,8 @@ ExecHashTableInsert(HashJoinTable hashtable, * Compute the hash value for a tuple * * The tuple to be tested must be in either econtext->ecxt_outertuple or - * econtext->ecxt_innertuple. Vars in the hashkeys expressions reference - * either OUTER or INNER. + * econtext->ecxt_innertuple. Vars in the hashkeys expressions should have + * varno either OUTER_VAR or INNER_VAR. * * A TRUE result means the tuple's hash value has been successfully computed * and stored at *hashvalue. A FALSE result means the tuple cannot match diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c new file mode 100644 index 00000000000..487373b4970 --- /dev/null +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -0,0 +1,542 @@ +/*------------------------------------------------------------------------- + * + * nodeIndexonlyscan.c + * Routines to support index-only scans + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/executor/nodeIndexonlyscan.c + * + *------------------------------------------------------------------------- + */ +/* + * INTERFACE ROUTINES + * ExecIndexOnlyScan scans an index + * IndexOnlyNext retrieve next tuple + * ExecInitIndexOnlyScan creates and initializes state info. + * ExecReScanIndexOnlyScan rescans the indexed relation. + * ExecEndIndexOnlyScan releases all storage. + * ExecIndexOnlyMarkPos marks scan position. + * ExecIndexOnlyRestrPos restores scan position. + */ +#include "postgres.h" + +#include "access/relscan.h" +#include "access/visibilitymap.h" +#include "catalog/pg_opfamily.h" +#include "catalog/pg_type.h" +#include "executor/execdebug.h" +#include "executor/nodeIndexonlyscan.h" +#include "executor/nodeIndexscan.h" +#include "storage/bufmgr.h" +#include "utils/memutils.h" +#include "utils/rel.h" + + +static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node); +static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, + Relation indexRel); + + +/* ---------------------------------------------------------------- + * IndexOnlyNext + * + * Retrieve a tuple from the IndexOnlyScan node's index. + * ---------------------------------------------------------------- + */ +static TupleTableSlot * +IndexOnlyNext(IndexOnlyScanState *node) +{ + EState *estate; + ExprContext *econtext; + ScanDirection direction; + IndexScanDesc scandesc; + HeapTuple tuple; + TupleTableSlot *slot; + ItemPointer tid; + + /* + * extract necessary information from index scan node + */ + estate = node->ss.ps.state; + direction = estate->es_direction; + /* flip direction if this is an overall backward scan */ + if (ScanDirectionIsBackward(((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir)) + { + if (ScanDirectionIsForward(direction)) + direction = BackwardScanDirection; + else if (ScanDirectionIsBackward(direction)) + direction = ForwardScanDirection; + } + scandesc = node->ioss_ScanDesc; + econtext = node->ss.ps.ps_ExprContext; + slot = node->ss.ss_ScanTupleSlot; + + /* + * OK, now that we have what we need, fetch the next tuple. + */ + while ((tid = index_getnext_tid(scandesc, direction)) != NULL) + { + /* + * We can skip the heap fetch if the TID references a heap page on + * which all tuples are known visible to everybody. In any case, + * we'll use the index tuple not the heap tuple as the data source. + */ + if (!visibilitymap_test(scandesc->heapRelation, + ItemPointerGetBlockNumber(tid), + &node->ioss_VMBuffer)) + { + /* + * Rats, we have to visit the heap to check visibility. + */ + tuple = index_fetch_heap(scandesc); + if (tuple == NULL) + continue; /* no visible tuple, try next index entry */ + + /* + * Only MVCC snapshots are supported here, so there should be no + * need to keep following the HOT chain once a visible entry has + * been found. If we did want to allow that, we'd need to keep + * more state to remember not to call index_getnext_tid next time. + */ + if (scandesc->xs_continue_hot) + elog(ERROR, "non-MVCC snapshots are not supported in index-only scans"); + + /* + * Note: at this point we are holding a pin on the heap page, as + * recorded in scandesc->xs_cbuf. We could release that pin now, + * but it's not clear whether it's a win to do so. The next index + * entry might require a visit to the same heap page. + */ + } + + /* + * Fill the scan tuple slot with data from the index. + */ + StoreIndexTuple(slot, scandesc->xs_itup, scandesc->indexRelation); + + /* + * If the index was lossy, we have to recheck the index quals. + * (Currently, this can never happen, but we should support the case + * for possible future use, eg with GiST indexes.) + */ + if (scandesc->xs_recheck) + { + econtext->ecxt_scantuple = slot; + ResetExprContext(econtext); + if (!ExecQual(node->indexqual, econtext, false)) + { + /* Fails recheck, so drop it and loop back for another */ + InstrCountFiltered2(node, 1); + continue; + } + } + + return slot; + } + + /* + * if we get here it means the index scan failed so we are at the end of + * the scan.. + */ + return ExecClearTuple(slot); +} + +/* + * StoreIndexTuple + * Fill the slot with data from the index tuple. + * + * At some point this might be generally-useful functionality, but + * right now we don't need it elsewhere. + */ +static void +StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, Relation indexRel) +{ + TupleDesc indexDesc = RelationGetDescr(indexRel); + int nindexatts = indexDesc->natts; + Datum *values = slot->tts_values; + bool *isnull = slot->tts_isnull; + int i; + + /* + * Note: we must use the index relation's tupdesc in index_getattr, + * not the slot's tupdesc, because of index_descriptor_hack(). + */ + Assert(slot->tts_tupleDescriptor->natts == nindexatts); + + ExecClearTuple(slot); + for (i = 0; i < nindexatts; i++) + values[i] = index_getattr(itup, i + 1, indexDesc, &isnull[i]); + ExecStoreVirtualTuple(slot); +} + +/* + * index_descriptor_hack -- ugly kluge to make index's tupdesc OK for slot + * + * This is necessary because, alone among btree opclasses, name_ops uses + * a storage type (cstring) different from its input type. The index + * tuple descriptor will show "cstring", which is correct, but we have to + * expose "name" as the slot datatype or ExecEvalVar will whine. If we + * ever want to have any other cases with a different storage type, we ought + * to think of a cleaner solution than this. + */ +static TupleDesc +index_descriptor_hack(Relation indexRel) +{ + TupleDesc tupdesc = RelationGetDescr(indexRel); + int i; + + /* copy so we can scribble on it safely */ + tupdesc = CreateTupleDescCopy(tupdesc); + + for (i = 0; i < tupdesc->natts; i++) + { + if (indexRel->rd_opfamily[i] == NAME_BTREE_FAM_OID && + tupdesc->attrs[i]->atttypid == CSTRINGOID) + { + tupdesc->attrs[i]->atttypid = NAMEOID; + + /* + * We set attlen to match the type OID just in case anything looks + * at it. Note that this is safe only because StoreIndexTuple + * will insert the data as a virtual tuple, and we don't expect + * anything will try to materialize the scan tuple slot. + */ + tupdesc->attrs[i]->attlen = NAMEDATALEN; + } + } + + return tupdesc; +} + +/* + * IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual + * + * This can't really happen, since an index can't supply CTID which would + * be necessary data for any potential EvalPlanQual target relation. If it + * did happen, the EPQ code would pass us the wrong data, namely a heap + * tuple not an index tuple. So throw an error. + */ +static bool +IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot) +{ + elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans"); + return false; /* keep compiler quiet */ +} + +/* ---------------------------------------------------------------- + * ExecIndexOnlyScan(node) + * ---------------------------------------------------------------- + */ +TupleTableSlot * +ExecIndexOnlyScan(IndexOnlyScanState *node) +{ + /* + * If we have runtime keys and they've not already been set up, do it now. + */ + if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady) + ExecReScan((PlanState *) node); + + return ExecScan(&node->ss, + (ExecScanAccessMtd) IndexOnlyNext, + (ExecScanRecheckMtd) IndexOnlyRecheck); +} + +/* ---------------------------------------------------------------- + * ExecReScanIndexOnlyScan(node) + * + * Recalculates the values of any scan keys whose value depends on + * information known at runtime, then rescans the indexed relation. + * + * Updating the scan key was formerly done separately in + * ExecUpdateIndexScanKeys. Integrating it into ReScan makes + * rescans of indices and relations/general streams more uniform. + * ---------------------------------------------------------------- + */ +void +ExecReScanIndexOnlyScan(IndexOnlyScanState *node) +{ + /* + * If we are doing runtime key calculations (ie, any of the index key + * values weren't simple Consts), compute the new key values. But first, + * reset the context so we don't leak memory as each outer tuple is + * scanned. Note this assumes that we will recalculate *all* runtime keys + * on each call. + */ + if (node->ioss_NumRuntimeKeys != 0) + { + ExprContext *econtext = node->ioss_RuntimeContext; + + ResetExprContext(econtext); + ExecIndexEvalRuntimeKeys(econtext, + node->ioss_RuntimeKeys, + node->ioss_NumRuntimeKeys); + } + node->ioss_RuntimeKeysReady = true; + + /* reset index scan */ + index_rescan(node->ioss_ScanDesc, + node->ioss_ScanKeys, node->ioss_NumScanKeys, + node->ioss_OrderByKeys, node->ioss_NumOrderByKeys); + + ExecScanReScan(&node->ss); +} + + +/* ---------------------------------------------------------------- + * ExecEndIndexOnlyScan + * ---------------------------------------------------------------- + */ +void +ExecEndIndexOnlyScan(IndexOnlyScanState *node) +{ + Relation indexRelationDesc; + IndexScanDesc indexScanDesc; + Relation relation; + + /* + * extract information from the node + */ + indexRelationDesc = node->ioss_RelationDesc; + indexScanDesc = node->ioss_ScanDesc; + relation = node->ss.ss_currentRelation; + + /* Release VM buffer pin, if any. */ + if (node->ioss_VMBuffer != InvalidBuffer) + { + ReleaseBuffer(node->ioss_VMBuffer); + node->ioss_VMBuffer = InvalidBuffer; + } + + /* + * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext + */ +#ifdef NOT_USED + ExecFreeExprContext(&node->ss.ps); + if (node->ioss_RuntimeContext) + FreeExprContext(node->ioss_RuntimeContext, true); +#endif + + /* + * clear out tuple table slots + */ + ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); + ExecClearTuple(node->ss.ss_ScanTupleSlot); + + /* + * close the index relation (no-op if we didn't open it) + */ + if (indexScanDesc) + index_endscan(indexScanDesc); + if (indexRelationDesc) + index_close(indexRelationDesc, NoLock); + + /* + * close the heap relation. + */ + ExecCloseScanRelation(relation); +} + +/* ---------------------------------------------------------------- + * ExecIndexOnlyMarkPos + * ---------------------------------------------------------------- + */ +void +ExecIndexOnlyMarkPos(IndexOnlyScanState *node) +{ + index_markpos(node->ioss_ScanDesc); +} + +/* ---------------------------------------------------------------- + * ExecIndexOnlyRestrPos + * ---------------------------------------------------------------- + */ +void +ExecIndexOnlyRestrPos(IndexOnlyScanState *node) +{ + index_restrpos(node->ioss_ScanDesc); +} + +/* ---------------------------------------------------------------- + * ExecInitIndexOnlyScan + * + * Initializes the index scan's state information, creates + * scan keys, and opens the base and index relations. + * + * Note: index scans have 2 sets of state information because + * we have to keep track of the base relation and the + * index relation. + * ---------------------------------------------------------------- + */ +IndexOnlyScanState * +ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) +{ + IndexOnlyScanState *indexstate; + Relation currentRelation; + bool relistarget; + TupleDesc tupDesc; + + /* + * create state structure + */ + indexstate = makeNode(IndexOnlyScanState); + indexstate->ss.ps.plan = (Plan *) node; + indexstate->ss.ps.state = estate; + + /* + * Miscellaneous initialization + * + * create expression context for node + */ + ExecAssignExprContext(estate, &indexstate->ss.ps); + + indexstate->ss.ps.ps_TupFromTlist = false; + + /* + * initialize child expressions + * + * Note: we don't initialize all of the indexorderby expression, only the + * sub-parts corresponding to runtime keys (see below). + */ + indexstate->ss.ps.targetlist = (List *) + ExecInitExpr((Expr *) node->scan.plan.targetlist, + (PlanState *) indexstate); + indexstate->ss.ps.qual = (List *) + ExecInitExpr((Expr *) node->scan.plan.qual, + (PlanState *) indexstate); + indexstate->indexqual = (List *) + ExecInitExpr((Expr *) node->indexqual, + (PlanState *) indexstate); + + /* + * tuple table initialization + */ + ExecInitResultTupleSlot(estate, &indexstate->ss.ps); + ExecInitScanTupleSlot(estate, &indexstate->ss); + + /* + * open the base relation and acquire appropriate lock on it. + */ + currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid); + + indexstate->ss.ss_currentRelation = currentRelation; + indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */ + + /* + * Initialize result tuple type. + */ + ExecAssignResultTypeFromTL(&indexstate->ss.ps); + + /* + * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop + * here. This allows an index-advisor plugin to EXPLAIN a plan containing + * references to nonexistent indexes. + */ + if (eflags & EXEC_FLAG_EXPLAIN_ONLY) + return indexstate; + + /* + * Open the index relation. + * + * If the parent table is one of the target relations of the query, then + * InitPlan already opened and write-locked the index, so we can avoid + * taking another lock here. Otherwise we need a normal reader's lock. + */ + relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid); + indexstate->ioss_RelationDesc = index_open(node->indexid, + relistarget ? NoLock : AccessShareLock); + + /* + * Now we can get the scan tuple's type (which is the index's rowtype, + * not the heap's) and initialize result projection info. + */ + tupDesc = index_descriptor_hack(indexstate->ioss_RelationDesc); + ExecAssignScanType(&indexstate->ss, tupDesc); + ExecAssignScanProjectionInfo(&indexstate->ss); + + /* + * Initialize index-specific scan state + */ + indexstate->ioss_RuntimeKeysReady = false; + indexstate->ioss_RuntimeKeys = NULL; + indexstate->ioss_NumRuntimeKeys = 0; + + /* + * build the index scan keys from the index qualification + */ + ExecIndexBuildScanKeys((PlanState *) indexstate, + indexstate->ioss_RelationDesc, + node->indexqual, + false, + &indexstate->ioss_ScanKeys, + &indexstate->ioss_NumScanKeys, + &indexstate->ioss_RuntimeKeys, + &indexstate->ioss_NumRuntimeKeys, + NULL, /* no ArrayKeys */ + NULL); + + /* + * any ORDER BY exprs have to be turned into scankeys in the same way + */ + ExecIndexBuildScanKeys((PlanState *) indexstate, + indexstate->ioss_RelationDesc, + node->indexorderby, + true, + &indexstate->ioss_OrderByKeys, + &indexstate->ioss_NumOrderByKeys, + &indexstate->ioss_RuntimeKeys, + &indexstate->ioss_NumRuntimeKeys, + NULL, /* no ArrayKeys */ + NULL); + + /* + * If we have runtime keys, we need an ExprContext to evaluate them. The + * node's standard context won't do because we want to reset that context + * for every tuple. So, build another context just like the other one... + * -tgl 7/11/00 + */ + if (indexstate->ioss_NumRuntimeKeys != 0) + { + ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext; + + ExecAssignExprContext(estate, &indexstate->ss.ps); + indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext; + indexstate->ss.ps.ps_ExprContext = stdecontext; + } + else + { + indexstate->ioss_RuntimeContext = NULL; + } + + /* + * Initialize scan descriptor. + */ + indexstate->ioss_ScanDesc = index_beginscan(currentRelation, + indexstate->ioss_RelationDesc, + estate->es_snapshot, + indexstate->ioss_NumScanKeys, + indexstate->ioss_NumOrderByKeys); + + /* Set it up for index-only scan */ + indexstate->ioss_ScanDesc->xs_want_itup = true; + indexstate->ioss_VMBuffer = InvalidBuffer; + + /* + * If no run-time keys to calculate, go ahead and pass the scankeys to the + * index AM. + */ + if (indexstate->ioss_NumRuntimeKeys == 0) + index_rescan(indexstate->ioss_ScanDesc, + indexstate->ioss_ScanKeys, + indexstate->ioss_NumScanKeys, + indexstate->ioss_OrderByKeys, + indexstate->ioss_NumOrderByKeys); + + /* + * all done. + */ + return indexstate; +} diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 56b9855094a..6d073bf5fdb 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -14,8 +14,8 @@ */ /* * INTERFACE ROUTINES - * ExecIndexScan scans a relation using indices - * ExecIndexNext using index to retrieve next tuple + * ExecIndexScan scans a relation using an index + * IndexNext retrieve next tuple using index * ExecInitIndexScan creates and initializes state info. * ExecReScanIndexScan rescans the indexed relation. * ExecEndIndexScan releases all storage. @@ -26,7 +26,6 @@ #include "access/nbtree.h" #include "access/relscan.h" -#include "access/visibilitymap.h" #include "executor/execdebug.h" #include "executor/nodeIndexscan.h" #include "optimizer/clauses.h" @@ -37,7 +36,6 @@ static TupleTableSlot *IndexNext(IndexScanState *node); -static void IndexStoreHeapTuple(TupleTableSlot *slot, IndexScanDesc scandesc); /* ---------------------------------------------------------------- @@ -56,7 +54,6 @@ IndexNext(IndexScanState *node) IndexScanDesc scandesc; HeapTuple tuple; TupleTableSlot *slot; - ItemPointer tid; /* * extract necessary information from index scan node @@ -76,67 +73,23 @@ IndexNext(IndexScanState *node) slot = node->ss.ss_ScanTupleSlot; /* - * OK, now that we have what we need, fetch the next TID. + * ok, now that we have what we need, fetch the next tuple. */ - while ((tid = index_getnext_tid(scandesc, direction)) != NULL) + while ((tuple = index_getnext(scandesc, direction)) != NULL) { /* - * Attempt index-only scan, if possible. For this, we need to have - * gotten an index tuple from the AM, and we need the TID to reference - * a heap page on which all tuples are known visible to everybody. - * If that's the case, we don't need to visit the heap page for tuple - * visibility testing, and we don't need any column values that are - * not available from the index. - * - * Note: in the index-only path, we are still holding pin on the - * scan's xs_cbuf, ie, the previously visited heap page. It's not - * clear whether it'd be better to release that pin. + * Store the scanned tuple in the scan tuple slot of the scan state. + * Note: we pass 'false' because tuples returned by amgetnext are + * pointers onto disk pages and must not be pfree()'d. */ - if (scandesc->xs_want_itup && - visibilitymap_test(scandesc->heapRelation, - ItemPointerGetBlockNumber(tid), - &node->iss_VMBuffer)) - { - /* - * Convert index tuple to look like a heap tuple, and store the - * results in the scan tuple slot. - */ - IndexStoreHeapTuple(slot, scandesc); - } - else - { - /* Index-only approach not possible, so fetch heap tuple. */ - tuple = index_fetch_heap(scandesc); - - /* Tuple might not be visible. */ - if (tuple == NULL) - continue; - - /* - * Only MVCC snapshots are supported here, so there should be no - * need to keep following the HOT chain once a visible entry has - * been found. If we did want to allow that, we'd need to keep - * more state to remember not to call index_getnext_tid next time. - */ - if (scandesc->xs_continue_hot) - elog(ERROR, "unsupported use of non-MVCC snapshot in executor"); - - /* - * Store the scanned tuple in the scan tuple slot of the scan - * state. - * - * Note: we pass 'false' because tuples returned by amgetnext are - * pointers onto disk pages and must not be pfree()'d. - */ - ExecStoreTuple(tuple, /* tuple to store */ - slot, /* slot to store in */ - scandesc->xs_cbuf, /* buffer containing tuple */ - false); /* don't pfree */ - } + ExecStoreTuple(tuple, /* tuple to store */ + slot, /* slot to store in */ + scandesc->xs_cbuf, /* buffer containing tuple */ + false); /* don't pfree */ /* * If the index was lossy, we have to recheck the index quals using - * the real tuple. + * the fetched tuple. */ if (scandesc->xs_recheck) { @@ -161,53 +114,6 @@ IndexNext(IndexScanState *node) } /* - * IndexStoreHeapTuple - * - * When performing an index-only scan, we build a faux heap tuple - * from the index tuple. Columns not present in the index are set to - * NULL, which is OK because we know they won't be referenced. - * - * The faux tuple is built as a virtual tuple that depends on the - * scandesc's xs_itup, so that must remain valid for as long as we - * need the slot contents. - */ -static void -IndexStoreHeapTuple(TupleTableSlot *slot, IndexScanDesc scandesc) -{ - Form_pg_index indexForm = scandesc->indexRelation->rd_index; - TupleDesc indexDesc = RelationGetDescr(scandesc->indexRelation); - int nindexatts = indexDesc->natts; - int nheapatts = slot->tts_tupleDescriptor->natts; - Datum *values = slot->tts_values; - bool *isnull = slot->tts_isnull; - int i; - - /* We must first set the slot to empty, and mark all columns as null */ - ExecClearTuple(slot); - - memset(isnull, true, nheapatts * sizeof(bool)); - - /* Transpose index tuple into heap tuple. */ - for (i = 0; i < nindexatts; i++) - { - int indexatt = indexForm->indkey.values[i]; - - /* Ignore expression columns, as well as system attributes */ - if (indexatt <= 0) - continue; - - Assert(indexatt <= nheapatts); - - values[indexatt - 1] = index_getattr(scandesc->xs_itup, i + 1, - indexDesc, - &isnull[indexatt - 1]); - } - - /* And now we can mark the slot as holding a virtual tuple. */ - ExecStoreVirtualTuple(slot); -} - -/* * IndexRecheck -- access method routine to recheck a tuple in EvalPlanQual */ static bool @@ -493,13 +399,6 @@ ExecEndIndexScan(IndexScanState *node) indexScanDesc = node->iss_ScanDesc; relation = node->ss.ss_currentRelation; - /* Release VM buffer pin, if any. */ - if (node->iss_VMBuffer != InvalidBuffer) - { - ReleaseBuffer(node->iss_VMBuffer); - node->iss_VMBuffer = InvalidBuffer; - } - /* * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext */ @@ -659,7 +558,6 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) */ ExecIndexBuildScanKeys((PlanState *) indexstate, indexstate->iss_RelationDesc, - node->scan.scanrelid, node->indexqual, false, &indexstate->iss_ScanKeys, @@ -674,7 +572,6 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) */ ExecIndexBuildScanKeys((PlanState *) indexstate, indexstate->iss_RelationDesc, - node->scan.scanrelid, node->indexorderby, true, &indexstate->iss_OrderByKeys, @@ -712,10 +609,6 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) indexstate->iss_NumScanKeys, indexstate->iss_NumOrderByKeys); - /* Prepare for possible index-only scan */ - indexstate->iss_ScanDesc->xs_want_itup = node->indexonly; - indexstate->iss_VMBuffer = InvalidBuffer; - /* * If no run-time keys to calculate, go ahead and pass the scankeys to the * index AM. @@ -772,7 +665,6 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) * * planstate: executor state node we are working for * index: the index we are building scan keys for - * scanrelid: varno of the index's relation within current query * quals: indexquals (or indexorderbys) expressions * isorderby: true if processing ORDER BY exprs, false if processing quals * *runtimeKeys: ptr to pre-existing IndexRuntimeKeyInfos, or NULL if none @@ -791,7 +683,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) * ScalarArrayOpExpr quals are not supported. */ void -ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, +ExecIndexBuildScanKeys(PlanState *planstate, Relation index, List *quals, bool isorderby, ScanKey *scanKeys, int *numScanKeys, IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys, @@ -865,7 +757,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, Assert(leftop != NULL); if (!(IsA(leftop, Var) && - ((Var *) leftop)->varno == scanrelid)) + ((Var *) leftop)->varno == INDEX_VAR)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; @@ -979,7 +871,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, Assert(leftop != NULL); if (!(IsA(leftop, Var) && - ((Var *) leftop)->varno == scanrelid)) + ((Var *) leftop)->varno == INDEX_VAR)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; @@ -1107,7 +999,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, Assert(leftop != NULL); if (!(IsA(leftop, Var) && - ((Var *) leftop)->varno == scanrelid)) + ((Var *) leftop)->varno == INDEX_VAR)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; @@ -1172,7 +1064,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, Assert(leftop != NULL); if (!(IsA(leftop, Var) && - ((Var *) leftop)->varno == scanrelid)) + ((Var *) leftop)->varno == INDEX_VAR)) elog(ERROR, "NullTest indexqual has wrong key"); varattno = ((Var *) leftop)->varattno; diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c index 49b880d0caf..d6433c7f537 100644 --- a/src/backend/executor/nodeNestloop.c +++ b/src/backend/executor/nodeNestloop.c @@ -147,8 +147,8 @@ ExecNestLoop(NestLoopState *node) ParamExecData *prm; prm = &(econtext->ecxt_param_exec_vals[paramno]); - /* Param value should be an OUTER var */ - Assert(nlp->paramval->varno == OUTER); + /* Param value should be an OUTER_VAR var */ + Assert(nlp->paramval->varno == OUTER_VAR); Assert(nlp->paramval->varattno > 0); prm->value = slot_getattr(outerTupleSlot, nlp->paramval->varattno, |