aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2011-10-11 14:20:06 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2011-10-11 14:21:30 -0400
commita0185461dd94c8d31d8d55a7f2839b0d2f172ab9 (patch)
tree3bd68d4e123336bbdefa8fd92372f0af7fb6d64f /src/backend/executor
parentfa351d5a0db0672b6f586315720302e493116f27 (diff)
downloadpostgresql-a0185461dd94c8d31d8d55a7f2839b0d2f172ab9.tar.gz
postgresql-a0185461dd94c8d31d8d55a7f2839b0d2f172ab9.zip
Rearrange the implementation of index-only scans.
This commit changes index-only scans so that data is read directly from the index tuple without first generating a faux heap tuple. The only immediate benefit is that indexes on system columns (such as OID) can be used in index-only scans, but this is necessary infrastructure if we are ever to support index-only scans on expression indexes. The executor is now ready for that, though the planner still needs substantial work to recognize the possibility. To do this, Vars in index-only plan nodes have to refer to index columns not heap columns. I introduced a new special varno, INDEX_VAR, to mark such Vars to avoid confusion. (In passing, this commit renames the two existing special varnos to OUTER_VAR and INNER_VAR.) This allows ruleutils.c to handle them with logic similar to what we use for subplan reference Vars. Since index-only scans are now fundamentally different from regular indexscans so far as their expression subtrees are concerned, I also chose to change them to have their own plan node type (and hence, their own executor source file).
Diffstat (limited to 'src/backend/executor')
-rw-r--r--src/backend/executor/Makefile3
-rw-r--r--src/backend/executor/execAmi.c21
-rw-r--r--src/backend/executor/execCurrent.c1
-rw-r--r--src/backend/executor/execProcnode.c14
-rw-r--r--src/backend/executor/execQual.c24
-rw-r--r--src/backend/executor/execScan.c9
-rw-r--r--src/backend/executor/execUtils.c12
-rw-r--r--src/backend/executor/nodeAgg.c4
-rw-r--r--src/backend/executor/nodeBitmapIndexscan.c1
-rw-r--r--src/backend/executor/nodeHash.c4
-rw-r--r--src/backend/executor/nodeIndexonlyscan.c542
-rw-r--r--src/backend/executor/nodeIndexscan.c142
-rw-r--r--src/backend/executor/nodeNestloop.c4
13 files changed, 634 insertions, 147 deletions
diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile
index a854c9a5dc6..6081b56c086 100644
--- a/src/backend/executor/Makefile
+++ b/src/backend/executor/Makefile
@@ -17,7 +17,8 @@ OBJS = execAmi.o execCurrent.o execGrouping.o execJunk.o execMain.o \
execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o \
nodeBitmapAnd.o nodeBitmapOr.o \
nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeHash.o \
- nodeHashjoin.o nodeIndexscan.o nodeLimit.o nodeLockRows.o \
+ nodeHashjoin.o nodeIndexscan.o nodeIndexonlyscan.o \
+ nodeLimit.o nodeLockRows.o \
nodeMaterial.o nodeMergeAppend.o nodeMergejoin.o nodeModifyTable.o \
nodeNestloop.o nodeFunctionscan.o nodeRecursiveunion.o nodeResult.o \
nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \
diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c
index 711e8c77866..fa27640fed2 100644
--- a/src/backend/executor/execAmi.c
+++ b/src/backend/executor/execAmi.c
@@ -26,6 +26,7 @@
#include "executor/nodeGroup.h"
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
+#include "executor/nodeIndexonlyscan.h"
#include "executor/nodeIndexscan.h"
#include "executor/nodeLimit.h"
#include "executor/nodeLockRows.h"
@@ -155,6 +156,10 @@ ExecReScan(PlanState *node)
ExecReScanIndexScan((IndexScanState *) node);
break;
+ case T_IndexOnlyScanState:
+ ExecReScanIndexOnlyScan((IndexOnlyScanState *) node);
+ break;
+
case T_BitmapIndexScanState:
ExecReScanBitmapIndexScan((BitmapIndexScanState *) node);
break;
@@ -273,6 +278,10 @@ ExecMarkPos(PlanState *node)
ExecIndexMarkPos((IndexScanState *) node);
break;
+ case T_IndexOnlyScanState:
+ ExecIndexOnlyMarkPos((IndexOnlyScanState *) node);
+ break;
+
case T_TidScanState:
ExecTidMarkPos((TidScanState *) node);
break;
@@ -326,6 +335,10 @@ ExecRestrPos(PlanState *node)
ExecIndexRestrPos((IndexScanState *) node);
break;
+ case T_IndexOnlyScanState:
+ ExecIndexOnlyRestrPos((IndexOnlyScanState *) node);
+ break;
+
case T_TidScanState:
ExecTidRestrPos((TidScanState *) node);
break;
@@ -371,6 +384,7 @@ ExecSupportsMarkRestore(NodeTag plantype)
{
case T_SeqScan:
case T_IndexScan:
+ case T_IndexOnlyScan:
case T_TidScan:
case T_ValuesScan:
case T_Material:
@@ -442,6 +456,10 @@ ExecSupportsBackwardScan(Plan *node)
return IndexSupportsBackwardScan(((IndexScan *) node)->indexid) &&
TargetListSupportsBackwardScan(node->targetlist);
+ case T_IndexOnlyScan:
+ return IndexSupportsBackwardScan(((IndexOnlyScan *) node)->indexid) &&
+ TargetListSupportsBackwardScan(node->targetlist);
+
case T_SubqueryScan:
return ExecSupportsBackwardScan(((SubqueryScan *) node)->subplan) &&
TargetListSupportsBackwardScan(node->targetlist);
@@ -474,7 +492,8 @@ TargetListSupportsBackwardScan(List *targetlist)
}
/*
- * An IndexScan node supports backward scan only if the index's AM does.
+ * An IndexScan or IndexOnlyScan node supports backward scan only if the
+ * index's AM does.
*/
static bool
IndexSupportsBackwardScan(Oid indexid)
diff --git a/src/backend/executor/execCurrent.c b/src/backend/executor/execCurrent.c
index 61a5f471124..5d70ad60de2 100644
--- a/src/backend/executor/execCurrent.c
+++ b/src/backend/executor/execCurrent.c
@@ -262,6 +262,7 @@ search_plan_tree(PlanState *node, Oid table_oid)
*/
case T_SeqScanState:
case T_IndexScanState:
+ case T_IndexOnlyScanState:
case T_BitmapHeapScanState:
case T_TidScanState:
{
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index 284fc6a63b7..8ab9892c850 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -89,6 +89,7 @@
#include "executor/nodeGroup.h"
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
+#include "executor/nodeIndexonlyscan.h"
#include "executor/nodeIndexscan.h"
#include "executor/nodeLimit.h"
#include "executor/nodeLockRows.h"
@@ -192,6 +193,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags)
estate, eflags);
break;
+ case T_IndexOnlyScan:
+ result = (PlanState *) ExecInitIndexOnlyScan((IndexOnlyScan *) node,
+ estate, eflags);
+ break;
+
case T_BitmapIndexScan:
result = (PlanState *) ExecInitBitmapIndexScan((BitmapIndexScan *) node,
estate, eflags);
@@ -397,6 +403,10 @@ ExecProcNode(PlanState *node)
result = ExecIndexScan((IndexScanState *) node);
break;
+ case T_IndexOnlyScanState:
+ result = ExecIndexOnlyScan((IndexOnlyScanState *) node);
+ break;
+
/* BitmapIndexScanState does not yield tuples */
case T_BitmapHeapScanState:
@@ -627,6 +637,10 @@ ExecEndNode(PlanState *node)
ExecEndIndexScan((IndexScanState *) node);
break;
+ case T_IndexOnlyScanState:
+ ExecEndIndexOnlyScan((IndexOnlyScanState *) node);
+ break;
+
case T_BitmapIndexScanState:
ExecEndBitmapIndexScan((BitmapIndexScanState *) node);
break;
diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c
index 80f08d8b92e..887e5ce82a0 100644
--- a/src/backend/executor/execQual.c
+++ b/src/backend/executor/execQual.c
@@ -578,14 +578,16 @@ ExecEvalVar(ExprState *exprstate, ExprContext *econtext,
/* Get the input slot and attribute number we want */
switch (variable->varno)
{
- case INNER: /* get the tuple from the inner node */
+ case INNER_VAR: /* get the tuple from the inner node */
slot = econtext->ecxt_innertuple;
break;
- case OUTER: /* get the tuple from the outer node */
+ case OUTER_VAR: /* get the tuple from the outer node */
slot = econtext->ecxt_outertuple;
break;
+ /* INDEX_VAR is handled by default case */
+
default: /* get the tuple from the relation being
* scanned */
slot = econtext->ecxt_scantuple;
@@ -761,14 +763,16 @@ ExecEvalScalarVar(ExprState *exprstate, ExprContext *econtext,
/* Get the input slot and attribute number we want */
switch (variable->varno)
{
- case INNER: /* get the tuple from the inner node */
+ case INNER_VAR: /* get the tuple from the inner node */
slot = econtext->ecxt_innertuple;
break;
- case OUTER: /* get the tuple from the outer node */
+ case OUTER_VAR: /* get the tuple from the outer node */
slot = econtext->ecxt_outertuple;
break;
+ /* INDEX_VAR is handled by default case */
+
default: /* get the tuple from the relation being
* scanned */
slot = econtext->ecxt_scantuple;
@@ -804,14 +808,16 @@ ExecEvalWholeRowVar(ExprState *exprstate, ExprContext *econtext,
/* Get the input slot we want */
switch (variable->varno)
{
- case INNER: /* get the tuple from the inner node */
+ case INNER_VAR: /* get the tuple from the inner node */
slot = econtext->ecxt_innertuple;
break;
- case OUTER: /* get the tuple from the outer node */
+ case OUTER_VAR: /* get the tuple from the outer node */
slot = econtext->ecxt_outertuple;
break;
+ /* INDEX_VAR is handled by default case */
+
default: /* get the tuple from the relation being
* scanned */
slot = econtext->ecxt_scantuple;
@@ -873,14 +879,16 @@ ExecEvalWholeRowSlow(ExprState *exprstate, ExprContext *econtext,
/* Get the input slot we want */
switch (variable->varno)
{
- case INNER: /* get the tuple from the inner node */
+ case INNER_VAR: /* get the tuple from the inner node */
slot = econtext->ecxt_innertuple;
break;
- case OUTER: /* get the tuple from the outer node */
+ case OUTER_VAR: /* get the tuple from the outer node */
slot = econtext->ecxt_outertuple;
break;
+ /* INDEX_VAR is handled by default case */
+
default: /* get the tuple from the relation being
* scanned */
slot = econtext->ecxt_scantuple;
diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c
index d4ed2358564..42acc102c63 100644
--- a/src/backend/executor/execScan.c
+++ b/src/backend/executor/execScan.c
@@ -246,10 +246,17 @@ void
ExecAssignScanProjectionInfo(ScanState *node)
{
Scan *scan = (Scan *) node->ps.plan;
+ Index varno;
+
+ /* Vars in an index-only scan's tlist should be INDEX_VAR */
+ if (IsA(scan, IndexOnlyScan))
+ varno = INDEX_VAR;
+ else
+ varno = scan->scanrelid;
if (tlist_matches_tupdesc(&node->ps,
scan->plan.targetlist,
- scan->scanrelid,
+ varno,
node->ss_ScanTupleSlot->tts_tupleDescriptor))
node->ps.ps_ProjInfo = NULL;
else
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index 4dbf10b8da9..65591e2445d 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -566,20 +566,22 @@ ExecBuildProjectionInfo(List *targetList,
switch (variable->varno)
{
- case INNER:
+ case INNER_VAR:
varSlotOffsets[numSimpleVars] = offsetof(ExprContext,
ecxt_innertuple);
if (projInfo->pi_lastInnerVar < attnum)
projInfo->pi_lastInnerVar = attnum;
break;
- case OUTER:
+ case OUTER_VAR:
varSlotOffsets[numSimpleVars] = offsetof(ExprContext,
ecxt_outertuple);
if (projInfo->pi_lastOuterVar < attnum)
projInfo->pi_lastOuterVar = attnum;
break;
+ /* INDEX_VAR is handled by default case */
+
default:
varSlotOffsets[numSimpleVars] = offsetof(ExprContext,
ecxt_scantuple);
@@ -628,16 +630,18 @@ get_last_attnums(Node *node, ProjectionInfo *projInfo)
switch (variable->varno)
{
- case INNER:
+ case INNER_VAR:
if (projInfo->pi_lastInnerVar < attnum)
projInfo->pi_lastInnerVar = attnum;
break;
- case OUTER:
+ case OUTER_VAR:
if (projInfo->pi_lastOuterVar < attnum)
projInfo->pi_lastOuterVar = attnum;
break;
+ /* INDEX_VAR is handled by default case */
+
default:
if (projInfo->pi_lastScanVar < attnum)
projInfo->pi_lastScanVar = attnum;
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index e769d6d012c..0701da40b1b 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -806,8 +806,8 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos)
{
Var *var = (Var *) node;
- /* setrefs.c should have set the varno to OUTER */
- Assert(var->varno == OUTER);
+ /* setrefs.c should have set the varno to OUTER_VAR */
+ Assert(var->varno == OUTER_VAR);
Assert(var->varlevelsup == 0);
*colnos = bms_add_member(*colnos, var->varattno);
return false;
diff --git a/src/backend/executor/nodeBitmapIndexscan.c b/src/backend/executor/nodeBitmapIndexscan.c
index 8e1df079b37..8cc8315a457 100644
--- a/src/backend/executor/nodeBitmapIndexscan.c
+++ b/src/backend/executor/nodeBitmapIndexscan.c
@@ -266,7 +266,6 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags)
*/
ExecIndexBuildScanKeys((PlanState *) indexstate,
indexstate->biss_RelationDesc,
- node->scan.scanrelid,
node->indexqual,
false,
&indexstate->biss_ScanKeys,
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index e72a71bf51b..091aef90e0a 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -755,8 +755,8 @@ ExecHashTableInsert(HashJoinTable hashtable,
* Compute the hash value for a tuple
*
* The tuple to be tested must be in either econtext->ecxt_outertuple or
- * econtext->ecxt_innertuple. Vars in the hashkeys expressions reference
- * either OUTER or INNER.
+ * econtext->ecxt_innertuple. Vars in the hashkeys expressions should have
+ * varno either OUTER_VAR or INNER_VAR.
*
* A TRUE result means the tuple's hash value has been successfully computed
* and stored at *hashvalue. A FALSE result means the tuple cannot match
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
new file mode 100644
index 00000000000..487373b4970
--- /dev/null
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -0,0 +1,542 @@
+/*-------------------------------------------------------------------------
+ *
+ * nodeIndexonlyscan.c
+ * Routines to support index-only scans
+ *
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/executor/nodeIndexonlyscan.c
+ *
+ *-------------------------------------------------------------------------
+ */
+/*
+ * INTERFACE ROUTINES
+ * ExecIndexOnlyScan scans an index
+ * IndexOnlyNext retrieve next tuple
+ * ExecInitIndexOnlyScan creates and initializes state info.
+ * ExecReScanIndexOnlyScan rescans the indexed relation.
+ * ExecEndIndexOnlyScan releases all storage.
+ * ExecIndexOnlyMarkPos marks scan position.
+ * ExecIndexOnlyRestrPos restores scan position.
+ */
+#include "postgres.h"
+
+#include "access/relscan.h"
+#include "access/visibilitymap.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_type.h"
+#include "executor/execdebug.h"
+#include "executor/nodeIndexonlyscan.h"
+#include "executor/nodeIndexscan.h"
+#include "storage/bufmgr.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+
+
+static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node);
+static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup,
+ Relation indexRel);
+
+
+/* ----------------------------------------------------------------
+ * IndexOnlyNext
+ *
+ * Retrieve a tuple from the IndexOnlyScan node's index.
+ * ----------------------------------------------------------------
+ */
+static TupleTableSlot *
+IndexOnlyNext(IndexOnlyScanState *node)
+{
+ EState *estate;
+ ExprContext *econtext;
+ ScanDirection direction;
+ IndexScanDesc scandesc;
+ HeapTuple tuple;
+ TupleTableSlot *slot;
+ ItemPointer tid;
+
+ /*
+ * extract necessary information from index scan node
+ */
+ estate = node->ss.ps.state;
+ direction = estate->es_direction;
+ /* flip direction if this is an overall backward scan */
+ if (ScanDirectionIsBackward(((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir))
+ {
+ if (ScanDirectionIsForward(direction))
+ direction = BackwardScanDirection;
+ else if (ScanDirectionIsBackward(direction))
+ direction = ForwardScanDirection;
+ }
+ scandesc = node->ioss_ScanDesc;
+ econtext = node->ss.ps.ps_ExprContext;
+ slot = node->ss.ss_ScanTupleSlot;
+
+ /*
+ * OK, now that we have what we need, fetch the next tuple.
+ */
+ while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
+ {
+ /*
+ * We can skip the heap fetch if the TID references a heap page on
+ * which all tuples are known visible to everybody. In any case,
+ * we'll use the index tuple not the heap tuple as the data source.
+ */
+ if (!visibilitymap_test(scandesc->heapRelation,
+ ItemPointerGetBlockNumber(tid),
+ &node->ioss_VMBuffer))
+ {
+ /*
+ * Rats, we have to visit the heap to check visibility.
+ */
+ tuple = index_fetch_heap(scandesc);
+ if (tuple == NULL)
+ continue; /* no visible tuple, try next index entry */
+
+ /*
+ * Only MVCC snapshots are supported here, so there should be no
+ * need to keep following the HOT chain once a visible entry has
+ * been found. If we did want to allow that, we'd need to keep
+ * more state to remember not to call index_getnext_tid next time.
+ */
+ if (scandesc->xs_continue_hot)
+ elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
+
+ /*
+ * Note: at this point we are holding a pin on the heap page, as
+ * recorded in scandesc->xs_cbuf. We could release that pin now,
+ * but it's not clear whether it's a win to do so. The next index
+ * entry might require a visit to the same heap page.
+ */
+ }
+
+ /*
+ * Fill the scan tuple slot with data from the index.
+ */
+ StoreIndexTuple(slot, scandesc->xs_itup, scandesc->indexRelation);
+
+ /*
+ * If the index was lossy, we have to recheck the index quals.
+ * (Currently, this can never happen, but we should support the case
+ * for possible future use, eg with GiST indexes.)
+ */
+ if (scandesc->xs_recheck)
+ {
+ econtext->ecxt_scantuple = slot;
+ ResetExprContext(econtext);
+ if (!ExecQual(node->indexqual, econtext, false))
+ {
+ /* Fails recheck, so drop it and loop back for another */
+ InstrCountFiltered2(node, 1);
+ continue;
+ }
+ }
+
+ return slot;
+ }
+
+ /*
+ * if we get here it means the index scan failed so we are at the end of
+ * the scan..
+ */
+ return ExecClearTuple(slot);
+}
+
+/*
+ * StoreIndexTuple
+ * Fill the slot with data from the index tuple.
+ *
+ * At some point this might be generally-useful functionality, but
+ * right now we don't need it elsewhere.
+ */
+static void
+StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, Relation indexRel)
+{
+ TupleDesc indexDesc = RelationGetDescr(indexRel);
+ int nindexatts = indexDesc->natts;
+ Datum *values = slot->tts_values;
+ bool *isnull = slot->tts_isnull;
+ int i;
+
+ /*
+ * Note: we must use the index relation's tupdesc in index_getattr,
+ * not the slot's tupdesc, because of index_descriptor_hack().
+ */
+ Assert(slot->tts_tupleDescriptor->natts == nindexatts);
+
+ ExecClearTuple(slot);
+ for (i = 0; i < nindexatts; i++)
+ values[i] = index_getattr(itup, i + 1, indexDesc, &isnull[i]);
+ ExecStoreVirtualTuple(slot);
+}
+
+/*
+ * index_descriptor_hack -- ugly kluge to make index's tupdesc OK for slot
+ *
+ * This is necessary because, alone among btree opclasses, name_ops uses
+ * a storage type (cstring) different from its input type. The index
+ * tuple descriptor will show "cstring", which is correct, but we have to
+ * expose "name" as the slot datatype or ExecEvalVar will whine. If we
+ * ever want to have any other cases with a different storage type, we ought
+ * to think of a cleaner solution than this.
+ */
+static TupleDesc
+index_descriptor_hack(Relation indexRel)
+{
+ TupleDesc tupdesc = RelationGetDescr(indexRel);
+ int i;
+
+ /* copy so we can scribble on it safely */
+ tupdesc = CreateTupleDescCopy(tupdesc);
+
+ for (i = 0; i < tupdesc->natts; i++)
+ {
+ if (indexRel->rd_opfamily[i] == NAME_BTREE_FAM_OID &&
+ tupdesc->attrs[i]->atttypid == CSTRINGOID)
+ {
+ tupdesc->attrs[i]->atttypid = NAMEOID;
+
+ /*
+ * We set attlen to match the type OID just in case anything looks
+ * at it. Note that this is safe only because StoreIndexTuple
+ * will insert the data as a virtual tuple, and we don't expect
+ * anything will try to materialize the scan tuple slot.
+ */
+ tupdesc->attrs[i]->attlen = NAMEDATALEN;
+ }
+ }
+
+ return tupdesc;
+}
+
+/*
+ * IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual
+ *
+ * This can't really happen, since an index can't supply CTID which would
+ * be necessary data for any potential EvalPlanQual target relation. If it
+ * did happen, the EPQ code would pass us the wrong data, namely a heap
+ * tuple not an index tuple. So throw an error.
+ */
+static bool
+IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot)
+{
+ elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans");
+ return false; /* keep compiler quiet */
+}
+
+/* ----------------------------------------------------------------
+ * ExecIndexOnlyScan(node)
+ * ----------------------------------------------------------------
+ */
+TupleTableSlot *
+ExecIndexOnlyScan(IndexOnlyScanState *node)
+{
+ /*
+ * If we have runtime keys and they've not already been set up, do it now.
+ */
+ if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady)
+ ExecReScan((PlanState *) node);
+
+ return ExecScan(&node->ss,
+ (ExecScanAccessMtd) IndexOnlyNext,
+ (ExecScanRecheckMtd) IndexOnlyRecheck);
+}
+
+/* ----------------------------------------------------------------
+ * ExecReScanIndexOnlyScan(node)
+ *
+ * Recalculates the values of any scan keys whose value depends on
+ * information known at runtime, then rescans the indexed relation.
+ *
+ * Updating the scan key was formerly done separately in
+ * ExecUpdateIndexScanKeys. Integrating it into ReScan makes
+ * rescans of indices and relations/general streams more uniform.
+ * ----------------------------------------------------------------
+ */
+void
+ExecReScanIndexOnlyScan(IndexOnlyScanState *node)
+{
+ /*
+ * If we are doing runtime key calculations (ie, any of the index key
+ * values weren't simple Consts), compute the new key values. But first,
+ * reset the context so we don't leak memory as each outer tuple is
+ * scanned. Note this assumes that we will recalculate *all* runtime keys
+ * on each call.
+ */
+ if (node->ioss_NumRuntimeKeys != 0)
+ {
+ ExprContext *econtext = node->ioss_RuntimeContext;
+
+ ResetExprContext(econtext);
+ ExecIndexEvalRuntimeKeys(econtext,
+ node->ioss_RuntimeKeys,
+ node->ioss_NumRuntimeKeys);
+ }
+ node->ioss_RuntimeKeysReady = true;
+
+ /* reset index scan */
+ index_rescan(node->ioss_ScanDesc,
+ node->ioss_ScanKeys, node->ioss_NumScanKeys,
+ node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
+
+ ExecScanReScan(&node->ss);
+}
+
+
+/* ----------------------------------------------------------------
+ * ExecEndIndexOnlyScan
+ * ----------------------------------------------------------------
+ */
+void
+ExecEndIndexOnlyScan(IndexOnlyScanState *node)
+{
+ Relation indexRelationDesc;
+ IndexScanDesc indexScanDesc;
+ Relation relation;
+
+ /*
+ * extract information from the node
+ */
+ indexRelationDesc = node->ioss_RelationDesc;
+ indexScanDesc = node->ioss_ScanDesc;
+ relation = node->ss.ss_currentRelation;
+
+ /* Release VM buffer pin, if any. */
+ if (node->ioss_VMBuffer != InvalidBuffer)
+ {
+ ReleaseBuffer(node->ioss_VMBuffer);
+ node->ioss_VMBuffer = InvalidBuffer;
+ }
+
+ /*
+ * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext
+ */
+#ifdef NOT_USED
+ ExecFreeExprContext(&node->ss.ps);
+ if (node->ioss_RuntimeContext)
+ FreeExprContext(node->ioss_RuntimeContext, true);
+#endif
+
+ /*
+ * clear out tuple table slots
+ */
+ ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
+ ExecClearTuple(node->ss.ss_ScanTupleSlot);
+
+ /*
+ * close the index relation (no-op if we didn't open it)
+ */
+ if (indexScanDesc)
+ index_endscan(indexScanDesc);
+ if (indexRelationDesc)
+ index_close(indexRelationDesc, NoLock);
+
+ /*
+ * close the heap relation.
+ */
+ ExecCloseScanRelation(relation);
+}
+
+/* ----------------------------------------------------------------
+ * ExecIndexOnlyMarkPos
+ * ----------------------------------------------------------------
+ */
+void
+ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
+{
+ index_markpos(node->ioss_ScanDesc);
+}
+
+/* ----------------------------------------------------------------
+ * ExecIndexOnlyRestrPos
+ * ----------------------------------------------------------------
+ */
+void
+ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
+{
+ index_restrpos(node->ioss_ScanDesc);
+}
+
+/* ----------------------------------------------------------------
+ * ExecInitIndexOnlyScan
+ *
+ * Initializes the index scan's state information, creates
+ * scan keys, and opens the base and index relations.
+ *
+ * Note: index scans have 2 sets of state information because
+ * we have to keep track of the base relation and the
+ * index relation.
+ * ----------------------------------------------------------------
+ */
+IndexOnlyScanState *
+ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
+{
+ IndexOnlyScanState *indexstate;
+ Relation currentRelation;
+ bool relistarget;
+ TupleDesc tupDesc;
+
+ /*
+ * create state structure
+ */
+ indexstate = makeNode(IndexOnlyScanState);
+ indexstate->ss.ps.plan = (Plan *) node;
+ indexstate->ss.ps.state = estate;
+
+ /*
+ * Miscellaneous initialization
+ *
+ * create expression context for node
+ */
+ ExecAssignExprContext(estate, &indexstate->ss.ps);
+
+ indexstate->ss.ps.ps_TupFromTlist = false;
+
+ /*
+ * initialize child expressions
+ *
+ * Note: we don't initialize all of the indexorderby expression, only the
+ * sub-parts corresponding to runtime keys (see below).
+ */
+ indexstate->ss.ps.targetlist = (List *)
+ ExecInitExpr((Expr *) node->scan.plan.targetlist,
+ (PlanState *) indexstate);
+ indexstate->ss.ps.qual = (List *)
+ ExecInitExpr((Expr *) node->scan.plan.qual,
+ (PlanState *) indexstate);
+ indexstate->indexqual = (List *)
+ ExecInitExpr((Expr *) node->indexqual,
+ (PlanState *) indexstate);
+
+ /*
+ * tuple table initialization
+ */
+ ExecInitResultTupleSlot(estate, &indexstate->ss.ps);
+ ExecInitScanTupleSlot(estate, &indexstate->ss);
+
+ /*
+ * open the base relation and acquire appropriate lock on it.
+ */
+ currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid);
+
+ indexstate->ss.ss_currentRelation = currentRelation;
+ indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
+
+ /*
+ * Initialize result tuple type.
+ */
+ ExecAssignResultTypeFromTL(&indexstate->ss.ps);
+
+ /*
+ * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
+ * here. This allows an index-advisor plugin to EXPLAIN a plan containing
+ * references to nonexistent indexes.
+ */
+ if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
+ return indexstate;
+
+ /*
+ * Open the index relation.
+ *
+ * If the parent table is one of the target relations of the query, then
+ * InitPlan already opened and write-locked the index, so we can avoid
+ * taking another lock here. Otherwise we need a normal reader's lock.
+ */
+ relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid);
+ indexstate->ioss_RelationDesc = index_open(node->indexid,
+ relistarget ? NoLock : AccessShareLock);
+
+ /*
+ * Now we can get the scan tuple's type (which is the index's rowtype,
+ * not the heap's) and initialize result projection info.
+ */
+ tupDesc = index_descriptor_hack(indexstate->ioss_RelationDesc);
+ ExecAssignScanType(&indexstate->ss, tupDesc);
+ ExecAssignScanProjectionInfo(&indexstate->ss);
+
+ /*
+ * Initialize index-specific scan state
+ */
+ indexstate->ioss_RuntimeKeysReady = false;
+ indexstate->ioss_RuntimeKeys = NULL;
+ indexstate->ioss_NumRuntimeKeys = 0;
+
+ /*
+ * build the index scan keys from the index qualification
+ */
+ ExecIndexBuildScanKeys((PlanState *) indexstate,
+ indexstate->ioss_RelationDesc,
+ node->indexqual,
+ false,
+ &indexstate->ioss_ScanKeys,
+ &indexstate->ioss_NumScanKeys,
+ &indexstate->ioss_RuntimeKeys,
+ &indexstate->ioss_NumRuntimeKeys,
+ NULL, /* no ArrayKeys */
+ NULL);
+
+ /*
+ * any ORDER BY exprs have to be turned into scankeys in the same way
+ */
+ ExecIndexBuildScanKeys((PlanState *) indexstate,
+ indexstate->ioss_RelationDesc,
+ node->indexorderby,
+ true,
+ &indexstate->ioss_OrderByKeys,
+ &indexstate->ioss_NumOrderByKeys,
+ &indexstate->ioss_RuntimeKeys,
+ &indexstate->ioss_NumRuntimeKeys,
+ NULL, /* no ArrayKeys */
+ NULL);
+
+ /*
+ * If we have runtime keys, we need an ExprContext to evaluate them. The
+ * node's standard context won't do because we want to reset that context
+ * for every tuple. So, build another context just like the other one...
+ * -tgl 7/11/00
+ */
+ if (indexstate->ioss_NumRuntimeKeys != 0)
+ {
+ ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
+
+ ExecAssignExprContext(estate, &indexstate->ss.ps);
+ indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
+ indexstate->ss.ps.ps_ExprContext = stdecontext;
+ }
+ else
+ {
+ indexstate->ioss_RuntimeContext = NULL;
+ }
+
+ /*
+ * Initialize scan descriptor.
+ */
+ indexstate->ioss_ScanDesc = index_beginscan(currentRelation,
+ indexstate->ioss_RelationDesc,
+ estate->es_snapshot,
+ indexstate->ioss_NumScanKeys,
+ indexstate->ioss_NumOrderByKeys);
+
+ /* Set it up for index-only scan */
+ indexstate->ioss_ScanDesc->xs_want_itup = true;
+ indexstate->ioss_VMBuffer = InvalidBuffer;
+
+ /*
+ * If no run-time keys to calculate, go ahead and pass the scankeys to the
+ * index AM.
+ */
+ if (indexstate->ioss_NumRuntimeKeys == 0)
+ index_rescan(indexstate->ioss_ScanDesc,
+ indexstate->ioss_ScanKeys,
+ indexstate->ioss_NumScanKeys,
+ indexstate->ioss_OrderByKeys,
+ indexstate->ioss_NumOrderByKeys);
+
+ /*
+ * all done.
+ */
+ return indexstate;
+}
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index 56b9855094a..6d073bf5fdb 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -14,8 +14,8 @@
*/
/*
* INTERFACE ROUTINES
- * ExecIndexScan scans a relation using indices
- * ExecIndexNext using index to retrieve next tuple
+ * ExecIndexScan scans a relation using an index
+ * IndexNext retrieve next tuple using index
* ExecInitIndexScan creates and initializes state info.
* ExecReScanIndexScan rescans the indexed relation.
* ExecEndIndexScan releases all storage.
@@ -26,7 +26,6 @@
#include "access/nbtree.h"
#include "access/relscan.h"
-#include "access/visibilitymap.h"
#include "executor/execdebug.h"
#include "executor/nodeIndexscan.h"
#include "optimizer/clauses.h"
@@ -37,7 +36,6 @@
static TupleTableSlot *IndexNext(IndexScanState *node);
-static void IndexStoreHeapTuple(TupleTableSlot *slot, IndexScanDesc scandesc);
/* ----------------------------------------------------------------
@@ -56,7 +54,6 @@ IndexNext(IndexScanState *node)
IndexScanDesc scandesc;
HeapTuple tuple;
TupleTableSlot *slot;
- ItemPointer tid;
/*
* extract necessary information from index scan node
@@ -76,67 +73,23 @@ IndexNext(IndexScanState *node)
slot = node->ss.ss_ScanTupleSlot;
/*
- * OK, now that we have what we need, fetch the next TID.
+ * ok, now that we have what we need, fetch the next tuple.
*/
- while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
+ while ((tuple = index_getnext(scandesc, direction)) != NULL)
{
/*
- * Attempt index-only scan, if possible. For this, we need to have
- * gotten an index tuple from the AM, and we need the TID to reference
- * a heap page on which all tuples are known visible to everybody.
- * If that's the case, we don't need to visit the heap page for tuple
- * visibility testing, and we don't need any column values that are
- * not available from the index.
- *
- * Note: in the index-only path, we are still holding pin on the
- * scan's xs_cbuf, ie, the previously visited heap page. It's not
- * clear whether it'd be better to release that pin.
+ * Store the scanned tuple in the scan tuple slot of the scan state.
+ * Note: we pass 'false' because tuples returned by amgetnext are
+ * pointers onto disk pages and must not be pfree()'d.
*/
- if (scandesc->xs_want_itup &&
- visibilitymap_test(scandesc->heapRelation,
- ItemPointerGetBlockNumber(tid),
- &node->iss_VMBuffer))
- {
- /*
- * Convert index tuple to look like a heap tuple, and store the
- * results in the scan tuple slot.
- */
- IndexStoreHeapTuple(slot, scandesc);
- }
- else
- {
- /* Index-only approach not possible, so fetch heap tuple. */
- tuple = index_fetch_heap(scandesc);
-
- /* Tuple might not be visible. */
- if (tuple == NULL)
- continue;
-
- /*
- * Only MVCC snapshots are supported here, so there should be no
- * need to keep following the HOT chain once a visible entry has
- * been found. If we did want to allow that, we'd need to keep
- * more state to remember not to call index_getnext_tid next time.
- */
- if (scandesc->xs_continue_hot)
- elog(ERROR, "unsupported use of non-MVCC snapshot in executor");
-
- /*
- * Store the scanned tuple in the scan tuple slot of the scan
- * state.
- *
- * Note: we pass 'false' because tuples returned by amgetnext are
- * pointers onto disk pages and must not be pfree()'d.
- */
- ExecStoreTuple(tuple, /* tuple to store */
- slot, /* slot to store in */
- scandesc->xs_cbuf, /* buffer containing tuple */
- false); /* don't pfree */
- }
+ ExecStoreTuple(tuple, /* tuple to store */
+ slot, /* slot to store in */
+ scandesc->xs_cbuf, /* buffer containing tuple */
+ false); /* don't pfree */
/*
* If the index was lossy, we have to recheck the index quals using
- * the real tuple.
+ * the fetched tuple.
*/
if (scandesc->xs_recheck)
{
@@ -161,53 +114,6 @@ IndexNext(IndexScanState *node)
}
/*
- * IndexStoreHeapTuple
- *
- * When performing an index-only scan, we build a faux heap tuple
- * from the index tuple. Columns not present in the index are set to
- * NULL, which is OK because we know they won't be referenced.
- *
- * The faux tuple is built as a virtual tuple that depends on the
- * scandesc's xs_itup, so that must remain valid for as long as we
- * need the slot contents.
- */
-static void
-IndexStoreHeapTuple(TupleTableSlot *slot, IndexScanDesc scandesc)
-{
- Form_pg_index indexForm = scandesc->indexRelation->rd_index;
- TupleDesc indexDesc = RelationGetDescr(scandesc->indexRelation);
- int nindexatts = indexDesc->natts;
- int nheapatts = slot->tts_tupleDescriptor->natts;
- Datum *values = slot->tts_values;
- bool *isnull = slot->tts_isnull;
- int i;
-
- /* We must first set the slot to empty, and mark all columns as null */
- ExecClearTuple(slot);
-
- memset(isnull, true, nheapatts * sizeof(bool));
-
- /* Transpose index tuple into heap tuple. */
- for (i = 0; i < nindexatts; i++)
- {
- int indexatt = indexForm->indkey.values[i];
-
- /* Ignore expression columns, as well as system attributes */
- if (indexatt <= 0)
- continue;
-
- Assert(indexatt <= nheapatts);
-
- values[indexatt - 1] = index_getattr(scandesc->xs_itup, i + 1,
- indexDesc,
- &isnull[indexatt - 1]);
- }
-
- /* And now we can mark the slot as holding a virtual tuple. */
- ExecStoreVirtualTuple(slot);
-}
-
-/*
* IndexRecheck -- access method routine to recheck a tuple in EvalPlanQual
*/
static bool
@@ -493,13 +399,6 @@ ExecEndIndexScan(IndexScanState *node)
indexScanDesc = node->iss_ScanDesc;
relation = node->ss.ss_currentRelation;
- /* Release VM buffer pin, if any. */
- if (node->iss_VMBuffer != InvalidBuffer)
- {
- ReleaseBuffer(node->iss_VMBuffer);
- node->iss_VMBuffer = InvalidBuffer;
- }
-
/*
* Free the exprcontext(s) ... now dead code, see ExecFreeExprContext
*/
@@ -659,7 +558,6 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
*/
ExecIndexBuildScanKeys((PlanState *) indexstate,
indexstate->iss_RelationDesc,
- node->scan.scanrelid,
node->indexqual,
false,
&indexstate->iss_ScanKeys,
@@ -674,7 +572,6 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
*/
ExecIndexBuildScanKeys((PlanState *) indexstate,
indexstate->iss_RelationDesc,
- node->scan.scanrelid,
node->indexorderby,
true,
&indexstate->iss_OrderByKeys,
@@ -712,10 +609,6 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
indexstate->iss_NumScanKeys,
indexstate->iss_NumOrderByKeys);
- /* Prepare for possible index-only scan */
- indexstate->iss_ScanDesc->xs_want_itup = node->indexonly;
- indexstate->iss_VMBuffer = InvalidBuffer;
-
/*
* If no run-time keys to calculate, go ahead and pass the scankeys to the
* index AM.
@@ -772,7 +665,6 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
*
* planstate: executor state node we are working for
* index: the index we are building scan keys for
- * scanrelid: varno of the index's relation within current query
* quals: indexquals (or indexorderbys) expressions
* isorderby: true if processing ORDER BY exprs, false if processing quals
* *runtimeKeys: ptr to pre-existing IndexRuntimeKeyInfos, or NULL if none
@@ -791,7 +683,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
* ScalarArrayOpExpr quals are not supported.
*/
void
-ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
+ExecIndexBuildScanKeys(PlanState *planstate, Relation index,
List *quals, bool isorderby,
ScanKey *scanKeys, int *numScanKeys,
IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys,
@@ -865,7 +757,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
Assert(leftop != NULL);
if (!(IsA(leftop, Var) &&
- ((Var *) leftop)->varno == scanrelid))
+ ((Var *) leftop)->varno == INDEX_VAR))
elog(ERROR, "indexqual doesn't have key on left side");
varattno = ((Var *) leftop)->varattno;
@@ -979,7 +871,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
Assert(leftop != NULL);
if (!(IsA(leftop, Var) &&
- ((Var *) leftop)->varno == scanrelid))
+ ((Var *) leftop)->varno == INDEX_VAR))
elog(ERROR, "indexqual doesn't have key on left side");
varattno = ((Var *) leftop)->varattno;
@@ -1107,7 +999,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
Assert(leftop != NULL);
if (!(IsA(leftop, Var) &&
- ((Var *) leftop)->varno == scanrelid))
+ ((Var *) leftop)->varno == INDEX_VAR))
elog(ERROR, "indexqual doesn't have key on left side");
varattno = ((Var *) leftop)->varattno;
@@ -1172,7 +1064,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid,
Assert(leftop != NULL);
if (!(IsA(leftop, Var) &&
- ((Var *) leftop)->varno == scanrelid))
+ ((Var *) leftop)->varno == INDEX_VAR))
elog(ERROR, "NullTest indexqual has wrong key");
varattno = ((Var *) leftop)->varattno;
diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c
index 49b880d0caf..d6433c7f537 100644
--- a/src/backend/executor/nodeNestloop.c
+++ b/src/backend/executor/nodeNestloop.c
@@ -147,8 +147,8 @@ ExecNestLoop(NestLoopState *node)
ParamExecData *prm;
prm = &(econtext->ecxt_param_exec_vals[paramno]);
- /* Param value should be an OUTER var */
- Assert(nlp->paramval->varno == OUTER);
+ /* Param value should be an OUTER_VAR var */
+ Assert(nlp->paramval->varno == OUTER_VAR);
Assert(nlp->paramval->varattno > 0);
prm->value = slot_getattr(outerTupleSlot,
nlp->paramval->varattno,