diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2011-10-11 14:20:06 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2011-10-11 14:21:30 -0400 |
commit | a0185461dd94c8d31d8d55a7f2839b0d2f172ab9 (patch) | |
tree | 3bd68d4e123336bbdefa8fd92372f0af7fb6d64f /src | |
parent | fa351d5a0db0672b6f586315720302e493116f27 (diff) | |
download | postgresql-a0185461dd94c8d31d8d55a7f2839b0d2f172ab9.tar.gz postgresql-a0185461dd94c8d31d8d55a7f2839b0d2f172ab9.zip |
Rearrange the implementation of index-only scans.
This commit changes index-only scans so that data is read directly from the
index tuple without first generating a faux heap tuple. The only immediate
benefit is that indexes on system columns (such as OID) can be used in
index-only scans, but this is necessary infrastructure if we are ever to
support index-only scans on expression indexes. The executor is now ready
for that, though the planner still needs substantial work to recognize
the possibility.
To do this, Vars in index-only plan nodes have to refer to index columns
not heap columns. I introduced a new special varno, INDEX_VAR, to mark
such Vars to avoid confusion. (In passing, this commit renames the two
existing special varnos to OUTER_VAR and INNER_VAR.) This allows
ruleutils.c to handle them with logic similar to what we use for subplan
reference Vars.
Since index-only scans are now fundamentally different from regular
indexscans so far as their expression subtrees are concerned, I also chose
to change them to have their own plan node type (and hence, their own
executor source file).
Diffstat (limited to 'src')
34 files changed, 1313 insertions, 420 deletions
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index fbcaf6cbe09..e38de5c1534 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -79,6 +79,8 @@ static void show_instrumentation_count(const char *qlabel, int which, PlanState *planstate, ExplainState *es); static void show_foreignscan_info(ForeignScanState *fsstate, ExplainState *es); static const char *explain_get_index_name(Oid indexId); +static void ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, + ExplainState *es); static void ExplainScanTarget(Scan *plan, ExplainState *es); static void ExplainModifyTarget(ModifyTable *plan, ExplainState *es); static void ExplainTargetRel(Plan *plan, Index rti, ExplainState *es); @@ -656,10 +658,10 @@ ExplainNode(PlanState *planstate, List *ancestors, pname = sname = "Seq Scan"; break; case T_IndexScan: - if (((IndexScan *) plan)->indexonly) - pname = sname = "Index Only Scan"; - else - pname = sname = "Index Scan"; + pname = sname = "Index Scan"; + break; + case T_IndexOnlyScan: + pname = sname = "Index Only Scan"; break; case T_BitmapIndexScan: pname = sname = "Bitmap Index Scan"; @@ -793,42 +795,6 @@ ExplainNode(PlanState *planstate, List *ancestors, switch (nodeTag(plan)) { - case T_IndexScan: - { - IndexScan *indexscan = (IndexScan *) plan; - const char *indexname = - explain_get_index_name(indexscan->indexid); - - if (es->format == EXPLAIN_FORMAT_TEXT) - { - if (ScanDirectionIsBackward(indexscan->indexorderdir)) - appendStringInfoString(es->str, " Backward"); - appendStringInfo(es->str, " using %s", indexname); - } - else - { - const char *scandir; - - switch (indexscan->indexorderdir) - { - case BackwardScanDirection: - scandir = "Backward"; - break; - case NoMovementScanDirection: - scandir = "NoMovement"; - break; - case ForwardScanDirection: - scandir = "Forward"; - break; - default: - scandir = "???"; - break; - } - ExplainPropertyText("Scan Direction", scandir, es); - ExplainPropertyText("Index Name", indexname, es); - } - } - /* FALL THRU */ case T_SeqScan: case T_BitmapHeapScan: case T_TidScan: @@ -840,6 +806,26 @@ ExplainNode(PlanState *planstate, List *ancestors, case T_ForeignScan: ExplainScanTarget((Scan *) plan, es); break; + case T_IndexScan: + { + IndexScan *indexscan = (IndexScan *) plan; + + ExplainIndexScanDetails(indexscan->indexid, + indexscan->indexorderdir, + es); + ExplainScanTarget((Scan *) indexscan, es); + } + break; + case T_IndexOnlyScan: + { + IndexOnlyScan *indexonlyscan = (IndexOnlyScan *) plan; + + ExplainIndexScanDetails(indexonlyscan->indexid, + indexonlyscan->indexorderdir, + es); + ExplainScanTarget((Scan *) indexonlyscan, es); + } + break; case T_BitmapIndexScan: { BitmapIndexScan *bitmapindexscan = (BitmapIndexScan *) plan; @@ -1014,6 +1000,19 @@ ExplainNode(PlanState *planstate, List *ancestors, show_instrumentation_count("Rows Removed by Filter", 1, planstate, es); break; + case T_IndexOnlyScan: + show_scan_qual(((IndexOnlyScan *) plan)->indexqual, + "Index Cond", planstate, ancestors, es); + if (((IndexOnlyScan *) plan)->indexqual) + show_instrumentation_count("Rows Removed by Index Recheck", 2, + planstate, es); + show_scan_qual(((IndexOnlyScan *) plan)->indexorderby, + "Order By", planstate, ancestors, es); + show_scan_qual(plan->qual, "Filter", planstate, ancestors, es); + if (plan->qual) + show_instrumentation_count("Rows Removed by Filter", 1, + planstate, es); + break; case T_BitmapIndexScan: show_scan_qual(((BitmapIndexScan *) plan)->indexqualorig, "Index Cond", planstate, ancestors, es); @@ -1627,6 +1626,45 @@ explain_get_index_name(Oid indexId) } /* + * Add some additional details about an IndexScan or IndexOnlyScan + */ +static void +ExplainIndexScanDetails(Oid indexid, ScanDirection indexorderdir, + ExplainState *es) +{ + const char *indexname = explain_get_index_name(indexid); + + if (es->format == EXPLAIN_FORMAT_TEXT) + { + if (ScanDirectionIsBackward(indexorderdir)) + appendStringInfoString(es->str, " Backward"); + appendStringInfo(es->str, " using %s", indexname); + } + else + { + const char *scandir; + + switch (indexorderdir) + { + case BackwardScanDirection: + scandir = "Backward"; + break; + case NoMovementScanDirection: + scandir = "NoMovement"; + break; + case ForwardScanDirection: + scandir = "Forward"; + break; + default: + scandir = "???"; + break; + } + ExplainPropertyText("Scan Direction", scandir, es); + ExplainPropertyText("Index Name", indexname, es); + } +} + +/* * Show the target of a Scan node */ static void @@ -1670,6 +1708,7 @@ ExplainTargetRel(Plan *plan, Index rti, ExplainState *es) { case T_SeqScan: case T_IndexScan: + case T_IndexOnlyScan: case T_BitmapHeapScan: case T_TidScan: case T_ForeignScan: diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c index 06d368e0773..9fb97548485 100644 --- a/src/backend/commands/trigger.c +++ b/src/backend/commands/trigger.c @@ -2734,9 +2734,9 @@ TriggerEnabled(EState *estate, ResultRelInfo *relinfo, oldContext = MemoryContextSwitchTo(estate->es_query_cxt); tgqual = stringToNode(trigger->tgqual); - /* Change references to OLD and NEW to INNER and OUTER */ - ChangeVarNodes(tgqual, PRS2_OLD_VARNO, INNER, 0); - ChangeVarNodes(tgqual, PRS2_NEW_VARNO, OUTER, 0); + /* Change references to OLD and NEW to INNER_VAR and OUTER_VAR */ + ChangeVarNodes(tgqual, PRS2_OLD_VARNO, INNER_VAR, 0); + ChangeVarNodes(tgqual, PRS2_NEW_VARNO, OUTER_VAR, 0); /* ExecQual wants implicit-AND form */ tgqual = (Node *) make_ands_implicit((Expr *) tgqual); *predicate = (List *) ExecPrepareExpr((Expr *) tgqual, estate); @@ -2783,7 +2783,7 @@ TriggerEnabled(EState *estate, ResultRelInfo *relinfo, /* * Finally evaluate the expression, making the old and/or new tuples - * available as INNER/OUTER respectively. + * available as INNER_VAR/OUTER_VAR respectively. */ econtext->ecxt_innertuple = oldslot; econtext->ecxt_outertuple = newslot; diff --git a/src/backend/executor/Makefile b/src/backend/executor/Makefile index a854c9a5dc6..6081b56c086 100644 --- a/src/backend/executor/Makefile +++ b/src/backend/executor/Makefile @@ -17,7 +17,8 @@ OBJS = execAmi.o execCurrent.o execGrouping.o execJunk.o execMain.o \ execUtils.o functions.o instrument.o nodeAppend.o nodeAgg.o \ nodeBitmapAnd.o nodeBitmapOr.o \ nodeBitmapHeapscan.o nodeBitmapIndexscan.o nodeHash.o \ - nodeHashjoin.o nodeIndexscan.o nodeLimit.o nodeLockRows.o \ + nodeHashjoin.o nodeIndexscan.o nodeIndexonlyscan.o \ + nodeLimit.o nodeLockRows.o \ nodeMaterial.o nodeMergeAppend.o nodeMergejoin.o nodeModifyTable.o \ nodeNestloop.o nodeFunctionscan.o nodeRecursiveunion.o nodeResult.o \ nodeSeqscan.o nodeSetOp.o nodeSort.o nodeUnique.o \ diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 711e8c77866..fa27640fed2 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -26,6 +26,7 @@ #include "executor/nodeGroup.h" #include "executor/nodeHash.h" #include "executor/nodeHashjoin.h" +#include "executor/nodeIndexonlyscan.h" #include "executor/nodeIndexscan.h" #include "executor/nodeLimit.h" #include "executor/nodeLockRows.h" @@ -155,6 +156,10 @@ ExecReScan(PlanState *node) ExecReScanIndexScan((IndexScanState *) node); break; + case T_IndexOnlyScanState: + ExecReScanIndexOnlyScan((IndexOnlyScanState *) node); + break; + case T_BitmapIndexScanState: ExecReScanBitmapIndexScan((BitmapIndexScanState *) node); break; @@ -273,6 +278,10 @@ ExecMarkPos(PlanState *node) ExecIndexMarkPos((IndexScanState *) node); break; + case T_IndexOnlyScanState: + ExecIndexOnlyMarkPos((IndexOnlyScanState *) node); + break; + case T_TidScanState: ExecTidMarkPos((TidScanState *) node); break; @@ -326,6 +335,10 @@ ExecRestrPos(PlanState *node) ExecIndexRestrPos((IndexScanState *) node); break; + case T_IndexOnlyScanState: + ExecIndexOnlyRestrPos((IndexOnlyScanState *) node); + break; + case T_TidScanState: ExecTidRestrPos((TidScanState *) node); break; @@ -371,6 +384,7 @@ ExecSupportsMarkRestore(NodeTag plantype) { case T_SeqScan: case T_IndexScan: + case T_IndexOnlyScan: case T_TidScan: case T_ValuesScan: case T_Material: @@ -442,6 +456,10 @@ ExecSupportsBackwardScan(Plan *node) return IndexSupportsBackwardScan(((IndexScan *) node)->indexid) && TargetListSupportsBackwardScan(node->targetlist); + case T_IndexOnlyScan: + return IndexSupportsBackwardScan(((IndexOnlyScan *) node)->indexid) && + TargetListSupportsBackwardScan(node->targetlist); + case T_SubqueryScan: return ExecSupportsBackwardScan(((SubqueryScan *) node)->subplan) && TargetListSupportsBackwardScan(node->targetlist); @@ -474,7 +492,8 @@ TargetListSupportsBackwardScan(List *targetlist) } /* - * An IndexScan node supports backward scan only if the index's AM does. + * An IndexScan or IndexOnlyScan node supports backward scan only if the + * index's AM does. */ static bool IndexSupportsBackwardScan(Oid indexid) diff --git a/src/backend/executor/execCurrent.c b/src/backend/executor/execCurrent.c index 61a5f471124..5d70ad60de2 100644 --- a/src/backend/executor/execCurrent.c +++ b/src/backend/executor/execCurrent.c @@ -262,6 +262,7 @@ search_plan_tree(PlanState *node, Oid table_oid) */ case T_SeqScanState: case T_IndexScanState: + case T_IndexOnlyScanState: case T_BitmapHeapScanState: case T_TidScanState: { diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index 284fc6a63b7..8ab9892c850 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -89,6 +89,7 @@ #include "executor/nodeGroup.h" #include "executor/nodeHash.h" #include "executor/nodeHashjoin.h" +#include "executor/nodeIndexonlyscan.h" #include "executor/nodeIndexscan.h" #include "executor/nodeLimit.h" #include "executor/nodeLockRows.h" @@ -192,6 +193,11 @@ ExecInitNode(Plan *node, EState *estate, int eflags) estate, eflags); break; + case T_IndexOnlyScan: + result = (PlanState *) ExecInitIndexOnlyScan((IndexOnlyScan *) node, + estate, eflags); + break; + case T_BitmapIndexScan: result = (PlanState *) ExecInitBitmapIndexScan((BitmapIndexScan *) node, estate, eflags); @@ -397,6 +403,10 @@ ExecProcNode(PlanState *node) result = ExecIndexScan((IndexScanState *) node); break; + case T_IndexOnlyScanState: + result = ExecIndexOnlyScan((IndexOnlyScanState *) node); + break; + /* BitmapIndexScanState does not yield tuples */ case T_BitmapHeapScanState: @@ -627,6 +637,10 @@ ExecEndNode(PlanState *node) ExecEndIndexScan((IndexScanState *) node); break; + case T_IndexOnlyScanState: + ExecEndIndexOnlyScan((IndexOnlyScanState *) node); + break; + case T_BitmapIndexScanState: ExecEndBitmapIndexScan((BitmapIndexScanState *) node); break; diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c index 80f08d8b92e..887e5ce82a0 100644 --- a/src/backend/executor/execQual.c +++ b/src/backend/executor/execQual.c @@ -578,14 +578,16 @@ ExecEvalVar(ExprState *exprstate, ExprContext *econtext, /* Get the input slot and attribute number we want */ switch (variable->varno) { - case INNER: /* get the tuple from the inner node */ + case INNER_VAR: /* get the tuple from the inner node */ slot = econtext->ecxt_innertuple; break; - case OUTER: /* get the tuple from the outer node */ + case OUTER_VAR: /* get the tuple from the outer node */ slot = econtext->ecxt_outertuple; break; + /* INDEX_VAR is handled by default case */ + default: /* get the tuple from the relation being * scanned */ slot = econtext->ecxt_scantuple; @@ -761,14 +763,16 @@ ExecEvalScalarVar(ExprState *exprstate, ExprContext *econtext, /* Get the input slot and attribute number we want */ switch (variable->varno) { - case INNER: /* get the tuple from the inner node */ + case INNER_VAR: /* get the tuple from the inner node */ slot = econtext->ecxt_innertuple; break; - case OUTER: /* get the tuple from the outer node */ + case OUTER_VAR: /* get the tuple from the outer node */ slot = econtext->ecxt_outertuple; break; + /* INDEX_VAR is handled by default case */ + default: /* get the tuple from the relation being * scanned */ slot = econtext->ecxt_scantuple; @@ -804,14 +808,16 @@ ExecEvalWholeRowVar(ExprState *exprstate, ExprContext *econtext, /* Get the input slot we want */ switch (variable->varno) { - case INNER: /* get the tuple from the inner node */ + case INNER_VAR: /* get the tuple from the inner node */ slot = econtext->ecxt_innertuple; break; - case OUTER: /* get the tuple from the outer node */ + case OUTER_VAR: /* get the tuple from the outer node */ slot = econtext->ecxt_outertuple; break; + /* INDEX_VAR is handled by default case */ + default: /* get the tuple from the relation being * scanned */ slot = econtext->ecxt_scantuple; @@ -873,14 +879,16 @@ ExecEvalWholeRowSlow(ExprState *exprstate, ExprContext *econtext, /* Get the input slot we want */ switch (variable->varno) { - case INNER: /* get the tuple from the inner node */ + case INNER_VAR: /* get the tuple from the inner node */ slot = econtext->ecxt_innertuple; break; - case OUTER: /* get the tuple from the outer node */ + case OUTER_VAR: /* get the tuple from the outer node */ slot = econtext->ecxt_outertuple; break; + /* INDEX_VAR is handled by default case */ + default: /* get the tuple from the relation being * scanned */ slot = econtext->ecxt_scantuple; diff --git a/src/backend/executor/execScan.c b/src/backend/executor/execScan.c index d4ed2358564..42acc102c63 100644 --- a/src/backend/executor/execScan.c +++ b/src/backend/executor/execScan.c @@ -246,10 +246,17 @@ void ExecAssignScanProjectionInfo(ScanState *node) { Scan *scan = (Scan *) node->ps.plan; + Index varno; + + /* Vars in an index-only scan's tlist should be INDEX_VAR */ + if (IsA(scan, IndexOnlyScan)) + varno = INDEX_VAR; + else + varno = scan->scanrelid; if (tlist_matches_tupdesc(&node->ps, scan->plan.targetlist, - scan->scanrelid, + varno, node->ss_ScanTupleSlot->tts_tupleDescriptor)) node->ps.ps_ProjInfo = NULL; else diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 4dbf10b8da9..65591e2445d 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -566,20 +566,22 @@ ExecBuildProjectionInfo(List *targetList, switch (variable->varno) { - case INNER: + case INNER_VAR: varSlotOffsets[numSimpleVars] = offsetof(ExprContext, ecxt_innertuple); if (projInfo->pi_lastInnerVar < attnum) projInfo->pi_lastInnerVar = attnum; break; - case OUTER: + case OUTER_VAR: varSlotOffsets[numSimpleVars] = offsetof(ExprContext, ecxt_outertuple); if (projInfo->pi_lastOuterVar < attnum) projInfo->pi_lastOuterVar = attnum; break; + /* INDEX_VAR is handled by default case */ + default: varSlotOffsets[numSimpleVars] = offsetof(ExprContext, ecxt_scantuple); @@ -628,16 +630,18 @@ get_last_attnums(Node *node, ProjectionInfo *projInfo) switch (variable->varno) { - case INNER: + case INNER_VAR: if (projInfo->pi_lastInnerVar < attnum) projInfo->pi_lastInnerVar = attnum; break; - case OUTER: + case OUTER_VAR: if (projInfo->pi_lastOuterVar < attnum) projInfo->pi_lastOuterVar = attnum; break; + /* INDEX_VAR is handled by default case */ + default: if (projInfo->pi_lastScanVar < attnum) projInfo->pi_lastScanVar = attnum; diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index e769d6d012c..0701da40b1b 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -806,8 +806,8 @@ find_unaggregated_cols_walker(Node *node, Bitmapset **colnos) { Var *var = (Var *) node; - /* setrefs.c should have set the varno to OUTER */ - Assert(var->varno == OUTER); + /* setrefs.c should have set the varno to OUTER_VAR */ + Assert(var->varno == OUTER_VAR); Assert(var->varlevelsup == 0); *colnos = bms_add_member(*colnos, var->varattno); return false; diff --git a/src/backend/executor/nodeBitmapIndexscan.c b/src/backend/executor/nodeBitmapIndexscan.c index 8e1df079b37..8cc8315a457 100644 --- a/src/backend/executor/nodeBitmapIndexscan.c +++ b/src/backend/executor/nodeBitmapIndexscan.c @@ -266,7 +266,6 @@ ExecInitBitmapIndexScan(BitmapIndexScan *node, EState *estate, int eflags) */ ExecIndexBuildScanKeys((PlanState *) indexstate, indexstate->biss_RelationDesc, - node->scan.scanrelid, node->indexqual, false, &indexstate->biss_ScanKeys, diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index e72a71bf51b..091aef90e0a 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -755,8 +755,8 @@ ExecHashTableInsert(HashJoinTable hashtable, * Compute the hash value for a tuple * * The tuple to be tested must be in either econtext->ecxt_outertuple or - * econtext->ecxt_innertuple. Vars in the hashkeys expressions reference - * either OUTER or INNER. + * econtext->ecxt_innertuple. Vars in the hashkeys expressions should have + * varno either OUTER_VAR or INNER_VAR. * * A TRUE result means the tuple's hash value has been successfully computed * and stored at *hashvalue. A FALSE result means the tuple cannot match diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c new file mode 100644 index 00000000000..487373b4970 --- /dev/null +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -0,0 +1,542 @@ +/*------------------------------------------------------------------------- + * + * nodeIndexonlyscan.c + * Routines to support index-only scans + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/executor/nodeIndexonlyscan.c + * + *------------------------------------------------------------------------- + */ +/* + * INTERFACE ROUTINES + * ExecIndexOnlyScan scans an index + * IndexOnlyNext retrieve next tuple + * ExecInitIndexOnlyScan creates and initializes state info. + * ExecReScanIndexOnlyScan rescans the indexed relation. + * ExecEndIndexOnlyScan releases all storage. + * ExecIndexOnlyMarkPos marks scan position. + * ExecIndexOnlyRestrPos restores scan position. + */ +#include "postgres.h" + +#include "access/relscan.h" +#include "access/visibilitymap.h" +#include "catalog/pg_opfamily.h" +#include "catalog/pg_type.h" +#include "executor/execdebug.h" +#include "executor/nodeIndexonlyscan.h" +#include "executor/nodeIndexscan.h" +#include "storage/bufmgr.h" +#include "utils/memutils.h" +#include "utils/rel.h" + + +static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node); +static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, + Relation indexRel); + + +/* ---------------------------------------------------------------- + * IndexOnlyNext + * + * Retrieve a tuple from the IndexOnlyScan node's index. + * ---------------------------------------------------------------- + */ +static TupleTableSlot * +IndexOnlyNext(IndexOnlyScanState *node) +{ + EState *estate; + ExprContext *econtext; + ScanDirection direction; + IndexScanDesc scandesc; + HeapTuple tuple; + TupleTableSlot *slot; + ItemPointer tid; + + /* + * extract necessary information from index scan node + */ + estate = node->ss.ps.state; + direction = estate->es_direction; + /* flip direction if this is an overall backward scan */ + if (ScanDirectionIsBackward(((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir)) + { + if (ScanDirectionIsForward(direction)) + direction = BackwardScanDirection; + else if (ScanDirectionIsBackward(direction)) + direction = ForwardScanDirection; + } + scandesc = node->ioss_ScanDesc; + econtext = node->ss.ps.ps_ExprContext; + slot = node->ss.ss_ScanTupleSlot; + + /* + * OK, now that we have what we need, fetch the next tuple. + */ + while ((tid = index_getnext_tid(scandesc, direction)) != NULL) + { + /* + * We can skip the heap fetch if the TID references a heap page on + * which all tuples are known visible to everybody. In any case, + * we'll use the index tuple not the heap tuple as the data source. + */ + if (!visibilitymap_test(scandesc->heapRelation, + ItemPointerGetBlockNumber(tid), + &node->ioss_VMBuffer)) + { + /* + * Rats, we have to visit the heap to check visibility. + */ + tuple = index_fetch_heap(scandesc); + if (tuple == NULL) + continue; /* no visible tuple, try next index entry */ + + /* + * Only MVCC snapshots are supported here, so there should be no + * need to keep following the HOT chain once a visible entry has + * been found. If we did want to allow that, we'd need to keep + * more state to remember not to call index_getnext_tid next time. + */ + if (scandesc->xs_continue_hot) + elog(ERROR, "non-MVCC snapshots are not supported in index-only scans"); + + /* + * Note: at this point we are holding a pin on the heap page, as + * recorded in scandesc->xs_cbuf. We could release that pin now, + * but it's not clear whether it's a win to do so. The next index + * entry might require a visit to the same heap page. + */ + } + + /* + * Fill the scan tuple slot with data from the index. + */ + StoreIndexTuple(slot, scandesc->xs_itup, scandesc->indexRelation); + + /* + * If the index was lossy, we have to recheck the index quals. + * (Currently, this can never happen, but we should support the case + * for possible future use, eg with GiST indexes.) + */ + if (scandesc->xs_recheck) + { + econtext->ecxt_scantuple = slot; + ResetExprContext(econtext); + if (!ExecQual(node->indexqual, econtext, false)) + { + /* Fails recheck, so drop it and loop back for another */ + InstrCountFiltered2(node, 1); + continue; + } + } + + return slot; + } + + /* + * if we get here it means the index scan failed so we are at the end of + * the scan.. + */ + return ExecClearTuple(slot); +} + +/* + * StoreIndexTuple + * Fill the slot with data from the index tuple. + * + * At some point this might be generally-useful functionality, but + * right now we don't need it elsewhere. + */ +static void +StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, Relation indexRel) +{ + TupleDesc indexDesc = RelationGetDescr(indexRel); + int nindexatts = indexDesc->natts; + Datum *values = slot->tts_values; + bool *isnull = slot->tts_isnull; + int i; + + /* + * Note: we must use the index relation's tupdesc in index_getattr, + * not the slot's tupdesc, because of index_descriptor_hack(). + */ + Assert(slot->tts_tupleDescriptor->natts == nindexatts); + + ExecClearTuple(slot); + for (i = 0; i < nindexatts; i++) + values[i] = index_getattr(itup, i + 1, indexDesc, &isnull[i]); + ExecStoreVirtualTuple(slot); +} + +/* + * index_descriptor_hack -- ugly kluge to make index's tupdesc OK for slot + * + * This is necessary because, alone among btree opclasses, name_ops uses + * a storage type (cstring) different from its input type. The index + * tuple descriptor will show "cstring", which is correct, but we have to + * expose "name" as the slot datatype or ExecEvalVar will whine. If we + * ever want to have any other cases with a different storage type, we ought + * to think of a cleaner solution than this. + */ +static TupleDesc +index_descriptor_hack(Relation indexRel) +{ + TupleDesc tupdesc = RelationGetDescr(indexRel); + int i; + + /* copy so we can scribble on it safely */ + tupdesc = CreateTupleDescCopy(tupdesc); + + for (i = 0; i < tupdesc->natts; i++) + { + if (indexRel->rd_opfamily[i] == NAME_BTREE_FAM_OID && + tupdesc->attrs[i]->atttypid == CSTRINGOID) + { + tupdesc->attrs[i]->atttypid = NAMEOID; + + /* + * We set attlen to match the type OID just in case anything looks + * at it. Note that this is safe only because StoreIndexTuple + * will insert the data as a virtual tuple, and we don't expect + * anything will try to materialize the scan tuple slot. + */ + tupdesc->attrs[i]->attlen = NAMEDATALEN; + } + } + + return tupdesc; +} + +/* + * IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual + * + * This can't really happen, since an index can't supply CTID which would + * be necessary data for any potential EvalPlanQual target relation. If it + * did happen, the EPQ code would pass us the wrong data, namely a heap + * tuple not an index tuple. So throw an error. + */ +static bool +IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot) +{ + elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans"); + return false; /* keep compiler quiet */ +} + +/* ---------------------------------------------------------------- + * ExecIndexOnlyScan(node) + * ---------------------------------------------------------------- + */ +TupleTableSlot * +ExecIndexOnlyScan(IndexOnlyScanState *node) +{ + /* + * If we have runtime keys and they've not already been set up, do it now. + */ + if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady) + ExecReScan((PlanState *) node); + + return ExecScan(&node->ss, + (ExecScanAccessMtd) IndexOnlyNext, + (ExecScanRecheckMtd) IndexOnlyRecheck); +} + +/* ---------------------------------------------------------------- + * ExecReScanIndexOnlyScan(node) + * + * Recalculates the values of any scan keys whose value depends on + * information known at runtime, then rescans the indexed relation. + * + * Updating the scan key was formerly done separately in + * ExecUpdateIndexScanKeys. Integrating it into ReScan makes + * rescans of indices and relations/general streams more uniform. + * ---------------------------------------------------------------- + */ +void +ExecReScanIndexOnlyScan(IndexOnlyScanState *node) +{ + /* + * If we are doing runtime key calculations (ie, any of the index key + * values weren't simple Consts), compute the new key values. But first, + * reset the context so we don't leak memory as each outer tuple is + * scanned. Note this assumes that we will recalculate *all* runtime keys + * on each call. + */ + if (node->ioss_NumRuntimeKeys != 0) + { + ExprContext *econtext = node->ioss_RuntimeContext; + + ResetExprContext(econtext); + ExecIndexEvalRuntimeKeys(econtext, + node->ioss_RuntimeKeys, + node->ioss_NumRuntimeKeys); + } + node->ioss_RuntimeKeysReady = true; + + /* reset index scan */ + index_rescan(node->ioss_ScanDesc, + node->ioss_ScanKeys, node->ioss_NumScanKeys, + node->ioss_OrderByKeys, node->ioss_NumOrderByKeys); + + ExecScanReScan(&node->ss); +} + + +/* ---------------------------------------------------------------- + * ExecEndIndexOnlyScan + * ---------------------------------------------------------------- + */ +void +ExecEndIndexOnlyScan(IndexOnlyScanState *node) +{ + Relation indexRelationDesc; + IndexScanDesc indexScanDesc; + Relation relation; + + /* + * extract information from the node + */ + indexRelationDesc = node->ioss_RelationDesc; + indexScanDesc = node->ioss_ScanDesc; + relation = node->ss.ss_currentRelation; + + /* Release VM buffer pin, if any. */ + if (node->ioss_VMBuffer != InvalidBuffer) + { + ReleaseBuffer(node->ioss_VMBuffer); + node->ioss_VMBuffer = InvalidBuffer; + } + + /* + * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext + */ +#ifdef NOT_USED + ExecFreeExprContext(&node->ss.ps); + if (node->ioss_RuntimeContext) + FreeExprContext(node->ioss_RuntimeContext, true); +#endif + + /* + * clear out tuple table slots + */ + ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); + ExecClearTuple(node->ss.ss_ScanTupleSlot); + + /* + * close the index relation (no-op if we didn't open it) + */ + if (indexScanDesc) + index_endscan(indexScanDesc); + if (indexRelationDesc) + index_close(indexRelationDesc, NoLock); + + /* + * close the heap relation. + */ + ExecCloseScanRelation(relation); +} + +/* ---------------------------------------------------------------- + * ExecIndexOnlyMarkPos + * ---------------------------------------------------------------- + */ +void +ExecIndexOnlyMarkPos(IndexOnlyScanState *node) +{ + index_markpos(node->ioss_ScanDesc); +} + +/* ---------------------------------------------------------------- + * ExecIndexOnlyRestrPos + * ---------------------------------------------------------------- + */ +void +ExecIndexOnlyRestrPos(IndexOnlyScanState *node) +{ + index_restrpos(node->ioss_ScanDesc); +} + +/* ---------------------------------------------------------------- + * ExecInitIndexOnlyScan + * + * Initializes the index scan's state information, creates + * scan keys, and opens the base and index relations. + * + * Note: index scans have 2 sets of state information because + * we have to keep track of the base relation and the + * index relation. + * ---------------------------------------------------------------- + */ +IndexOnlyScanState * +ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags) +{ + IndexOnlyScanState *indexstate; + Relation currentRelation; + bool relistarget; + TupleDesc tupDesc; + + /* + * create state structure + */ + indexstate = makeNode(IndexOnlyScanState); + indexstate->ss.ps.plan = (Plan *) node; + indexstate->ss.ps.state = estate; + + /* + * Miscellaneous initialization + * + * create expression context for node + */ + ExecAssignExprContext(estate, &indexstate->ss.ps); + + indexstate->ss.ps.ps_TupFromTlist = false; + + /* + * initialize child expressions + * + * Note: we don't initialize all of the indexorderby expression, only the + * sub-parts corresponding to runtime keys (see below). + */ + indexstate->ss.ps.targetlist = (List *) + ExecInitExpr((Expr *) node->scan.plan.targetlist, + (PlanState *) indexstate); + indexstate->ss.ps.qual = (List *) + ExecInitExpr((Expr *) node->scan.plan.qual, + (PlanState *) indexstate); + indexstate->indexqual = (List *) + ExecInitExpr((Expr *) node->indexqual, + (PlanState *) indexstate); + + /* + * tuple table initialization + */ + ExecInitResultTupleSlot(estate, &indexstate->ss.ps); + ExecInitScanTupleSlot(estate, &indexstate->ss); + + /* + * open the base relation and acquire appropriate lock on it. + */ + currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid); + + indexstate->ss.ss_currentRelation = currentRelation; + indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */ + + /* + * Initialize result tuple type. + */ + ExecAssignResultTypeFromTL(&indexstate->ss.ps); + + /* + * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop + * here. This allows an index-advisor plugin to EXPLAIN a plan containing + * references to nonexistent indexes. + */ + if (eflags & EXEC_FLAG_EXPLAIN_ONLY) + return indexstate; + + /* + * Open the index relation. + * + * If the parent table is one of the target relations of the query, then + * InitPlan already opened and write-locked the index, so we can avoid + * taking another lock here. Otherwise we need a normal reader's lock. + */ + relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid); + indexstate->ioss_RelationDesc = index_open(node->indexid, + relistarget ? NoLock : AccessShareLock); + + /* + * Now we can get the scan tuple's type (which is the index's rowtype, + * not the heap's) and initialize result projection info. + */ + tupDesc = index_descriptor_hack(indexstate->ioss_RelationDesc); + ExecAssignScanType(&indexstate->ss, tupDesc); + ExecAssignScanProjectionInfo(&indexstate->ss); + + /* + * Initialize index-specific scan state + */ + indexstate->ioss_RuntimeKeysReady = false; + indexstate->ioss_RuntimeKeys = NULL; + indexstate->ioss_NumRuntimeKeys = 0; + + /* + * build the index scan keys from the index qualification + */ + ExecIndexBuildScanKeys((PlanState *) indexstate, + indexstate->ioss_RelationDesc, + node->indexqual, + false, + &indexstate->ioss_ScanKeys, + &indexstate->ioss_NumScanKeys, + &indexstate->ioss_RuntimeKeys, + &indexstate->ioss_NumRuntimeKeys, + NULL, /* no ArrayKeys */ + NULL); + + /* + * any ORDER BY exprs have to be turned into scankeys in the same way + */ + ExecIndexBuildScanKeys((PlanState *) indexstate, + indexstate->ioss_RelationDesc, + node->indexorderby, + true, + &indexstate->ioss_OrderByKeys, + &indexstate->ioss_NumOrderByKeys, + &indexstate->ioss_RuntimeKeys, + &indexstate->ioss_NumRuntimeKeys, + NULL, /* no ArrayKeys */ + NULL); + + /* + * If we have runtime keys, we need an ExprContext to evaluate them. The + * node's standard context won't do because we want to reset that context + * for every tuple. So, build another context just like the other one... + * -tgl 7/11/00 + */ + if (indexstate->ioss_NumRuntimeKeys != 0) + { + ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext; + + ExecAssignExprContext(estate, &indexstate->ss.ps); + indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext; + indexstate->ss.ps.ps_ExprContext = stdecontext; + } + else + { + indexstate->ioss_RuntimeContext = NULL; + } + + /* + * Initialize scan descriptor. + */ + indexstate->ioss_ScanDesc = index_beginscan(currentRelation, + indexstate->ioss_RelationDesc, + estate->es_snapshot, + indexstate->ioss_NumScanKeys, + indexstate->ioss_NumOrderByKeys); + + /* Set it up for index-only scan */ + indexstate->ioss_ScanDesc->xs_want_itup = true; + indexstate->ioss_VMBuffer = InvalidBuffer; + + /* + * If no run-time keys to calculate, go ahead and pass the scankeys to the + * index AM. + */ + if (indexstate->ioss_NumRuntimeKeys == 0) + index_rescan(indexstate->ioss_ScanDesc, + indexstate->ioss_ScanKeys, + indexstate->ioss_NumScanKeys, + indexstate->ioss_OrderByKeys, + indexstate->ioss_NumOrderByKeys); + + /* + * all done. + */ + return indexstate; +} diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 56b9855094a..6d073bf5fdb 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -14,8 +14,8 @@ */ /* * INTERFACE ROUTINES - * ExecIndexScan scans a relation using indices - * ExecIndexNext using index to retrieve next tuple + * ExecIndexScan scans a relation using an index + * IndexNext retrieve next tuple using index * ExecInitIndexScan creates and initializes state info. * ExecReScanIndexScan rescans the indexed relation. * ExecEndIndexScan releases all storage. @@ -26,7 +26,6 @@ #include "access/nbtree.h" #include "access/relscan.h" -#include "access/visibilitymap.h" #include "executor/execdebug.h" #include "executor/nodeIndexscan.h" #include "optimizer/clauses.h" @@ -37,7 +36,6 @@ static TupleTableSlot *IndexNext(IndexScanState *node); -static void IndexStoreHeapTuple(TupleTableSlot *slot, IndexScanDesc scandesc); /* ---------------------------------------------------------------- @@ -56,7 +54,6 @@ IndexNext(IndexScanState *node) IndexScanDesc scandesc; HeapTuple tuple; TupleTableSlot *slot; - ItemPointer tid; /* * extract necessary information from index scan node @@ -76,67 +73,23 @@ IndexNext(IndexScanState *node) slot = node->ss.ss_ScanTupleSlot; /* - * OK, now that we have what we need, fetch the next TID. + * ok, now that we have what we need, fetch the next tuple. */ - while ((tid = index_getnext_tid(scandesc, direction)) != NULL) + while ((tuple = index_getnext(scandesc, direction)) != NULL) { /* - * Attempt index-only scan, if possible. For this, we need to have - * gotten an index tuple from the AM, and we need the TID to reference - * a heap page on which all tuples are known visible to everybody. - * If that's the case, we don't need to visit the heap page for tuple - * visibility testing, and we don't need any column values that are - * not available from the index. - * - * Note: in the index-only path, we are still holding pin on the - * scan's xs_cbuf, ie, the previously visited heap page. It's not - * clear whether it'd be better to release that pin. + * Store the scanned tuple in the scan tuple slot of the scan state. + * Note: we pass 'false' because tuples returned by amgetnext are + * pointers onto disk pages and must not be pfree()'d. */ - if (scandesc->xs_want_itup && - visibilitymap_test(scandesc->heapRelation, - ItemPointerGetBlockNumber(tid), - &node->iss_VMBuffer)) - { - /* - * Convert index tuple to look like a heap tuple, and store the - * results in the scan tuple slot. - */ - IndexStoreHeapTuple(slot, scandesc); - } - else - { - /* Index-only approach not possible, so fetch heap tuple. */ - tuple = index_fetch_heap(scandesc); - - /* Tuple might not be visible. */ - if (tuple == NULL) - continue; - - /* - * Only MVCC snapshots are supported here, so there should be no - * need to keep following the HOT chain once a visible entry has - * been found. If we did want to allow that, we'd need to keep - * more state to remember not to call index_getnext_tid next time. - */ - if (scandesc->xs_continue_hot) - elog(ERROR, "unsupported use of non-MVCC snapshot in executor"); - - /* - * Store the scanned tuple in the scan tuple slot of the scan - * state. - * - * Note: we pass 'false' because tuples returned by amgetnext are - * pointers onto disk pages and must not be pfree()'d. - */ - ExecStoreTuple(tuple, /* tuple to store */ - slot, /* slot to store in */ - scandesc->xs_cbuf, /* buffer containing tuple */ - false); /* don't pfree */ - } + ExecStoreTuple(tuple, /* tuple to store */ + slot, /* slot to store in */ + scandesc->xs_cbuf, /* buffer containing tuple */ + false); /* don't pfree */ /* * If the index was lossy, we have to recheck the index quals using - * the real tuple. + * the fetched tuple. */ if (scandesc->xs_recheck) { @@ -161,53 +114,6 @@ IndexNext(IndexScanState *node) } /* - * IndexStoreHeapTuple - * - * When performing an index-only scan, we build a faux heap tuple - * from the index tuple. Columns not present in the index are set to - * NULL, which is OK because we know they won't be referenced. - * - * The faux tuple is built as a virtual tuple that depends on the - * scandesc's xs_itup, so that must remain valid for as long as we - * need the slot contents. - */ -static void -IndexStoreHeapTuple(TupleTableSlot *slot, IndexScanDesc scandesc) -{ - Form_pg_index indexForm = scandesc->indexRelation->rd_index; - TupleDesc indexDesc = RelationGetDescr(scandesc->indexRelation); - int nindexatts = indexDesc->natts; - int nheapatts = slot->tts_tupleDescriptor->natts; - Datum *values = slot->tts_values; - bool *isnull = slot->tts_isnull; - int i; - - /* We must first set the slot to empty, and mark all columns as null */ - ExecClearTuple(slot); - - memset(isnull, true, nheapatts * sizeof(bool)); - - /* Transpose index tuple into heap tuple. */ - for (i = 0; i < nindexatts; i++) - { - int indexatt = indexForm->indkey.values[i]; - - /* Ignore expression columns, as well as system attributes */ - if (indexatt <= 0) - continue; - - Assert(indexatt <= nheapatts); - - values[indexatt - 1] = index_getattr(scandesc->xs_itup, i + 1, - indexDesc, - &isnull[indexatt - 1]); - } - - /* And now we can mark the slot as holding a virtual tuple. */ - ExecStoreVirtualTuple(slot); -} - -/* * IndexRecheck -- access method routine to recheck a tuple in EvalPlanQual */ static bool @@ -493,13 +399,6 @@ ExecEndIndexScan(IndexScanState *node) indexScanDesc = node->iss_ScanDesc; relation = node->ss.ss_currentRelation; - /* Release VM buffer pin, if any. */ - if (node->iss_VMBuffer != InvalidBuffer) - { - ReleaseBuffer(node->iss_VMBuffer); - node->iss_VMBuffer = InvalidBuffer; - } - /* * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext */ @@ -659,7 +558,6 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) */ ExecIndexBuildScanKeys((PlanState *) indexstate, indexstate->iss_RelationDesc, - node->scan.scanrelid, node->indexqual, false, &indexstate->iss_ScanKeys, @@ -674,7 +572,6 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) */ ExecIndexBuildScanKeys((PlanState *) indexstate, indexstate->iss_RelationDesc, - node->scan.scanrelid, node->indexorderby, true, &indexstate->iss_OrderByKeys, @@ -712,10 +609,6 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) indexstate->iss_NumScanKeys, indexstate->iss_NumOrderByKeys); - /* Prepare for possible index-only scan */ - indexstate->iss_ScanDesc->xs_want_itup = node->indexonly; - indexstate->iss_VMBuffer = InvalidBuffer; - /* * If no run-time keys to calculate, go ahead and pass the scankeys to the * index AM. @@ -772,7 +665,6 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) * * planstate: executor state node we are working for * index: the index we are building scan keys for - * scanrelid: varno of the index's relation within current query * quals: indexquals (or indexorderbys) expressions * isorderby: true if processing ORDER BY exprs, false if processing quals * *runtimeKeys: ptr to pre-existing IndexRuntimeKeyInfos, or NULL if none @@ -791,7 +683,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) * ScalarArrayOpExpr quals are not supported. */ void -ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, +ExecIndexBuildScanKeys(PlanState *planstate, Relation index, List *quals, bool isorderby, ScanKey *scanKeys, int *numScanKeys, IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys, @@ -865,7 +757,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, Assert(leftop != NULL); if (!(IsA(leftop, Var) && - ((Var *) leftop)->varno == scanrelid)) + ((Var *) leftop)->varno == INDEX_VAR)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; @@ -979,7 +871,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, Assert(leftop != NULL); if (!(IsA(leftop, Var) && - ((Var *) leftop)->varno == scanrelid)) + ((Var *) leftop)->varno == INDEX_VAR)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; @@ -1107,7 +999,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, Assert(leftop != NULL); if (!(IsA(leftop, Var) && - ((Var *) leftop)->varno == scanrelid)) + ((Var *) leftop)->varno == INDEX_VAR)) elog(ERROR, "indexqual doesn't have key on left side"); varattno = ((Var *) leftop)->varattno; @@ -1172,7 +1064,7 @@ ExecIndexBuildScanKeys(PlanState *planstate, Relation index, Index scanrelid, Assert(leftop != NULL); if (!(IsA(leftop, Var) && - ((Var *) leftop)->varno == scanrelid)) + ((Var *) leftop)->varno == INDEX_VAR)) elog(ERROR, "NullTest indexqual has wrong key"); varattno = ((Var *) leftop)->varattno; diff --git a/src/backend/executor/nodeNestloop.c b/src/backend/executor/nodeNestloop.c index 49b880d0caf..d6433c7f537 100644 --- a/src/backend/executor/nodeNestloop.c +++ b/src/backend/executor/nodeNestloop.c @@ -147,8 +147,8 @@ ExecNestLoop(NestLoopState *node) ParamExecData *prm; prm = &(econtext->ecxt_param_exec_vals[paramno]); - /* Param value should be an OUTER var */ - Assert(nlp->paramval->varno == OUTER); + /* Param value should be an OUTER_VAR var */ + Assert(nlp->paramval->varno == OUTER_VAR); Assert(nlp->paramval->varattno > 0); prm->value = slot_getattr(outerTupleSlot, nlp->paramval->varattno, diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 5100642dd63..24ac5295f60 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -370,7 +370,31 @@ _copyIndexScan(IndexScan *from) COPY_NODE_FIELD(indexorderby); COPY_NODE_FIELD(indexorderbyorig); COPY_SCALAR_FIELD(indexorderdir); - COPY_SCALAR_FIELD(indexonly); + + return newnode; +} + +/* + * _copyIndexOnlyScan + */ +static IndexOnlyScan * +_copyIndexOnlyScan(IndexOnlyScan *from) +{ + IndexOnlyScan *newnode = makeNode(IndexOnlyScan); + + /* + * copy node superclass fields + */ + CopyScanFields((Scan *) from, (Scan *) newnode); + + /* + * copy remainder of node + */ + COPY_SCALAR_FIELD(indexid); + COPY_NODE_FIELD(indexqual); + COPY_NODE_FIELD(indexorderby); + COPY_NODE_FIELD(indextlist); + COPY_SCALAR_FIELD(indexorderdir); return newnode; } @@ -3871,6 +3895,9 @@ copyObject(void *from) case T_IndexScan: retval = _copyIndexScan(from); break; + case T_IndexOnlyScan: + retval = _copyIndexOnlyScan(from); + break; case T_BitmapIndexScan: retval = _copyBitmapIndexScan(from); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 9f564277747..eba3d6d5797 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -447,7 +447,20 @@ _outIndexScan(StringInfo str, IndexScan *node) WRITE_NODE_FIELD(indexorderby); WRITE_NODE_FIELD(indexorderbyorig); WRITE_ENUM_FIELD(indexorderdir, ScanDirection); - WRITE_BOOL_FIELD(indexonly); +} + +static void +_outIndexOnlyScan(StringInfo str, IndexOnlyScan *node) +{ + WRITE_NODE_TYPE("INDEXONLYSCAN"); + + _outScanInfo(str, (Scan *) node); + + WRITE_OID_FIELD(indexid); + WRITE_NODE_FIELD(indexqual); + WRITE_NODE_FIELD(indexorderby); + WRITE_NODE_FIELD(indextlist); + WRITE_ENUM_FIELD(indexorderdir, ScanDirection); } static void @@ -1501,7 +1514,6 @@ _outIndexPath(StringInfo str, IndexPath *node) WRITE_NODE_FIELD(indexorderbys); WRITE_BOOL_FIELD(isjoininner); WRITE_ENUM_FIELD(indexscandir, ScanDirection); - WRITE_BOOL_FIELD(indexonly); WRITE_FLOAT_FIELD(indextotalcost, "%.2f"); WRITE_FLOAT_FIELD(indexselectivity, "%.4f"); WRITE_FLOAT_FIELD(rows, "%.0f"); @@ -1752,8 +1764,9 @@ _outIndexOptInfo(StringInfo str, IndexOptInfo *node) WRITE_FLOAT_FIELD(tuples, "%.0f"); WRITE_INT_FIELD(ncolumns); WRITE_OID_FIELD(relam); - WRITE_NODE_FIELD(indexprs); + /* indexprs is redundant since we print indextlist */ WRITE_NODE_FIELD(indpred); + WRITE_NODE_FIELD(indextlist); WRITE_BOOL_FIELD(predOK); WRITE_BOOL_FIELD(unique); WRITE_BOOL_FIELD(hypothetical); @@ -2707,6 +2720,9 @@ _outNode(StringInfo str, void *obj) case T_IndexScan: _outIndexScan(str, obj); break; + case T_IndexOnlyScan: + _outIndexOnlyScan(str, obj); + break; case T_BitmapIndexScan: _outBitmapIndexScan(str, obj); break; diff --git a/src/backend/nodes/print.c b/src/backend/nodes/print.c index 0b0cf384829..5fe4fd5520c 100644 --- a/src/backend/nodes/print.c +++ b/src/backend/nodes/print.c @@ -320,14 +320,18 @@ print_expr(Node *expr, List *rtable) switch (var->varno) { - case INNER: + case INNER_VAR: relname = "INNER"; attname = "?"; break; - case OUTER: + case OUTER_VAR: relname = "OUTER"; attname = "?"; break; + case INDEX_VAR: + relname = "INDEX"; + attname = "?"; + break; default: { RangeTblEntry *rte; diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 7090a7e0c0d..9ab146a1f74 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -199,14 +199,15 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel) true, NULL, SAOP_FORBID, ST_ANYSCAN); /* - * Submit all the ones that can form plain IndexScan plans to add_path. (A - * plain IndexPath always represents a plain IndexScan plan; however some - * of the indexes might support only bitmap scans, and those we mustn't - * submit to add_path here.) Also, pick out the ones that might be useful - * as bitmap scans. For that, we must discard indexes that don't support - * bitmap scans, and we also are only interested in paths that have some - * selectivity; we should discard anything that was generated solely for - * ordering purposes. + * Submit all the ones that can form plain IndexScan plans to add_path. + * (A plain IndexPath might represent either a plain IndexScan or an + * IndexOnlyScan, but for our purposes here the distinction does not + * matter. However, some of the indexes might support only bitmap scans, + * and those we mustn't submit to add_path here.) Also, pick out the ones + * that might be useful as bitmap scans. For that, we must discard + * indexes that don't support bitmap scans, and we also are only + * interested in paths that have some selectivity; we should discard + * anything that was generated solely for ordering purposes. */ bitindexpaths = NIL; foreach(l, indexpaths) @@ -1107,11 +1108,9 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index) /* * For the moment, we just ignore index expressions. It might be nice - * to do something with them, later. We also ignore index columns - * that are system columns (such as OID), because the virtual-tuple - * coding used by IndexStoreHeapTuple() can't deal with them. + * to do something with them, later. */ - if (attno <= 0) + if (attno == 0) continue; index_attrs = diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c index e5228a81c63..d32fbba237c 100644 --- a/src/backend/optimizer/path/pathkeys.c +++ b/src/backend/optimizer/path/pathkeys.c @@ -25,7 +25,6 @@ #include "optimizer/pathnode.h" #include "optimizer/paths.h" #include "optimizer/tlist.h" -#include "parser/parsetree.h" #include "utils/lsyscache.h" @@ -35,8 +34,6 @@ static PathKey *make_canonical_pathkey(PlannerInfo *root, EquivalenceClass *eclass, Oid opfamily, int strategy, bool nulls_first); static bool pathkey_is_redundant(PathKey *new_pathkey, List *pathkeys); -static Var *find_indexkey_var(PlannerInfo *root, RelOptInfo *rel, - AttrNumber varattno); static bool right_merge_direction(PlannerInfo *root, PathKey *pathkey); @@ -504,21 +501,24 @@ build_index_pathkeys(PlannerInfo *root, ScanDirection scandir) { List *retval = NIL; - ListCell *indexprs_item; + ListCell *lc; int i; if (index->sortopfamily == NULL) return NIL; /* non-orderable index */ - indexprs_item = list_head(index->indexprs); - for (i = 0; i < index->ncolumns; i++) + i = 0; + foreach(lc, index->indextlist) { + TargetEntry *indextle = (TargetEntry *) lfirst(lc); + Expr *indexkey; bool reverse_sort; bool nulls_first; - int ikey; - Expr *indexkey; PathKey *cpathkey; + /* We assume we don't need to make a copy of the tlist item */ + indexkey = indextle->expr; + if (ScanDirectionIsBackward(scandir)) { reverse_sort = !index->reverse_sort[i]; @@ -530,21 +530,6 @@ build_index_pathkeys(PlannerInfo *root, nulls_first = index->nulls_first[i]; } - ikey = index->indexkeys[i]; - if (ikey != 0) - { - /* simple index column */ - indexkey = (Expr *) find_indexkey_var(root, index->rel, ikey); - } - else - { - /* expression --- assume we need not copy it */ - if (indexprs_item == NULL) - elog(ERROR, "wrong number of index expressions"); - indexkey = (Expr *) lfirst(indexprs_item); - indexprs_item = lnext(indexprs_item); - } - /* OK, try to make a canonical pathkey for this sort key */ cpathkey = make_pathkey_from_sortinfo(root, indexkey, @@ -568,44 +553,11 @@ build_index_pathkeys(PlannerInfo *root, /* Add to list unless redundant */ if (!pathkey_is_redundant(cpathkey, retval)) retval = lappend(retval, cpathkey); - } - return retval; -} - -/* - * Find or make a Var node for the specified attribute of the rel. - * - * We first look for the var in the rel's target list, because that's - * easy and fast. But the var might not be there (this should normally - * only happen for vars that are used in WHERE restriction clauses, - * but not in join clauses or in the SELECT target list). In that case, - * gin up a Var node the hard way. - */ -static Var * -find_indexkey_var(PlannerInfo *root, RelOptInfo *rel, AttrNumber varattno) -{ - ListCell *temp; - Index relid; - Oid reloid, - vartypeid, - varcollid; - int32 type_mod; - - foreach(temp, rel->reltargetlist) - { - Var *var = (Var *) lfirst(temp); - - if (IsA(var, Var) && - var->varattno == varattno) - return var; + i++; } - relid = rel->relid; - reloid = getrelid(relid, root->parse->rtable); - get_atttypetypmodcoll(reloid, varattno, &vartypeid, &type_mod, &varcollid); - - return makeVar(relid, varattno, vartypeid, type_mod, varcollid, 0); + return retval; } /* diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 36ee7c5648a..a76f2c603cd 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -53,8 +53,8 @@ static Material *create_material_plan(PlannerInfo *root, MaterialPath *best_path static Plan *create_unique_plan(PlannerInfo *root, UniquePath *best_path); static SeqScan *create_seqscan_plan(PlannerInfo *root, Path *best_path, List *tlist, List *scan_clauses); -static IndexScan *create_indexscan_plan(PlannerInfo *root, IndexPath *best_path, - List *tlist, List *scan_clauses); +static Scan *create_indexscan_plan(PlannerInfo *root, IndexPath *best_path, + List *tlist, List *scan_clauses, bool indexonly); static BitmapHeapScan *create_bitmap_scan_plan(PlannerInfo *root, BitmapHeapPath *best_path, List *tlist, List *scan_clauses); @@ -95,7 +95,12 @@ static SeqScan *make_seqscan(List *qptlist, List *qpqual, Index scanrelid); static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid, Oid indexid, List *indexqual, List *indexqualorig, List *indexorderby, List *indexorderbyorig, - ScanDirection indexscandir, bool indexonly); + ScanDirection indexscandir); +static IndexOnlyScan *make_indexonlyscan(List *qptlist, List *qpqual, + Index scanrelid, Oid indexid, + List *indexqual, List *indexorderby, + List *indextlist, + ScanDirection indexscandir); static BitmapIndexScan *make_bitmap_indexscan(Index scanrelid, Oid indexid, List *indexqual, List *indexqualorig); @@ -206,6 +211,7 @@ create_plan_recurse(PlannerInfo *root, Path *best_path) { case T_SeqScan: case T_IndexScan: + case T_IndexOnlyScan: case T_BitmapHeapScan: case T_TidScan: case T_SubqueryScan: @@ -274,10 +280,18 @@ create_scan_plan(PlannerInfo *root, Path *best_path) */ if (use_physical_tlist(root, rel)) { - tlist = build_physical_tlist(root, rel); - /* if fail because of dropped cols, use regular method */ - if (tlist == NIL) - tlist = build_relation_tlist(rel); + if (best_path->pathtype == T_IndexOnlyScan) + { + /* For index-only scan, the preferred tlist is the index's */ + tlist = copyObject(((IndexPath *) best_path)->indexinfo->indextlist); + } + else + { + tlist = build_physical_tlist(root, rel); + /* if fail because of dropped cols, use regular method */ + if (tlist == NIL) + tlist = build_relation_tlist(rel); + } } else tlist = build_relation_tlist(rel); @@ -302,7 +316,16 @@ create_scan_plan(PlannerInfo *root, Path *best_path) plan = (Plan *) create_indexscan_plan(root, (IndexPath *) best_path, tlist, - scan_clauses); + scan_clauses, + false); + break; + + case T_IndexOnlyScan: + plan = (Plan *) create_indexscan_plan(root, + (IndexPath *) best_path, + tlist, + scan_clauses, + true); break; case T_BitmapHeapScan: @@ -476,6 +499,7 @@ disuse_physical_tlist(Plan *plan, Path *path) { case T_SeqScan: case T_IndexScan: + case T_IndexOnlyScan: case T_BitmapHeapScan: case T_TidScan: case T_SubqueryScan: @@ -1044,16 +1068,23 @@ create_seqscan_plan(PlannerInfo *root, Path *best_path, * Returns an indexscan plan for the base relation scanned by 'best_path' * with restriction clauses 'scan_clauses' and targetlist 'tlist'. * + * We use this for both plain IndexScans and IndexOnlyScans, because the + * qual preprocessing work is the same for both. Note that the caller tells + * us which to build --- we don't look at best_path->path.pathtype, because + * create_bitmap_subplan needs to be able to override the prior decision. + * * The indexquals list of the path contains implicitly-ANDed qual conditions. * The list can be empty --- then no index restrictions will be applied during * the scan. */ -static IndexScan * +static Scan * create_indexscan_plan(PlannerInfo *root, IndexPath *best_path, List *tlist, - List *scan_clauses) + List *scan_clauses, + bool indexonly) { + Scan *scan_plan; List *indexquals = best_path->indexquals; List *indexorderbys = best_path->indexorderbys; Index baserelid = best_path->path.parent->relid; @@ -1063,7 +1094,6 @@ create_indexscan_plan(PlannerInfo *root, List *fixed_indexquals; List *fixed_indexorderbys; ListCell *l; - IndexScan *scan_plan; /* it should be a base rel... */ Assert(baserelid > 0); @@ -1077,7 +1107,7 @@ create_indexscan_plan(PlannerInfo *root, /* * The executor needs a copy with the indexkey on the left of each clause - * and with index attr numbers substituted for table ones. + * and with index Vars substituted for table ones. */ fixed_indexquals = fix_indexqual_references(root, best_path, indexquals); @@ -1175,20 +1205,29 @@ create_indexscan_plan(PlannerInfo *root, } /* Finally ready to build the plan node */ - scan_plan = make_indexscan(tlist, - qpqual, - baserelid, - indexoid, - fixed_indexquals, - stripped_indexquals, - fixed_indexorderbys, - indexorderbys, - best_path->indexscandir, - best_path->indexonly); - - copy_path_costsize(&scan_plan->scan.plan, &best_path->path); + if (indexonly) + scan_plan = (Scan *) make_indexonlyscan(tlist, + qpqual, + baserelid, + indexoid, + fixed_indexquals, + fixed_indexorderbys, + best_path->indexinfo->indextlist, + best_path->indexscandir); + else + scan_plan = (Scan *) make_indexscan(tlist, + qpqual, + baserelid, + indexoid, + fixed_indexquals, + stripped_indexquals, + fixed_indexorderbys, + indexorderbys, + best_path->indexscandir); + + copy_path_costsize(&scan_plan->plan, &best_path->path); /* use the indexscan-specific rows estimate, not the parent rel's */ - scan_plan->scan.plan.plan_rows = best_path->rows; + scan_plan->plan.plan_rows = best_path->rows; return scan_plan; } @@ -1440,7 +1479,9 @@ create_bitmap_subplan(PlannerInfo *root, Path *bitmapqual, ListCell *l; /* Use the regular indexscan plan build machinery... */ - iscan = create_indexscan_plan(root, ipath, NIL, NIL); + iscan = (IndexScan *) create_indexscan_plan(root, ipath, + NIL, NIL, false); + Assert(IsA(iscan, IndexScan)); /* then convert to a bitmap indexscan */ plan = (Plan *) make_bitmap_indexscan(iscan->scan.scanrelid, iscan->indexid, @@ -2549,17 +2590,13 @@ fix_indexorderby_references(PlannerInfo *root, IndexPath *index_path, /* * fix_indexqual_operand * Convert an indexqual expression to a Var referencing the index column. + * + * We represent index keys by Var nodes having varno == INDEX_VAR and varattno + * equal to the index's attribute number (index column position). */ static Node * fix_indexqual_operand(Node *node, IndexOptInfo *index) { - /* - * We represent index keys by Var nodes having the varno of the base table - * but varattno equal to the index's attribute number (index column - * position). This is a bit hokey ... would be cleaner to use a - * special-purpose node type that could not be mistaken for a regular Var. - * But it will do for now. - */ Var *result; int pos; ListCell *indexpr_item; @@ -2583,6 +2620,7 @@ fix_indexqual_operand(Node *node, IndexOptInfo *index) if (index->indexkeys[pos] == varatt) { result = (Var *) copyObject(node); + result->varno = INDEX_VAR; result->varattno = pos + 1; return (Node *) result; } @@ -2606,7 +2644,7 @@ fix_indexqual_operand(Node *node, IndexOptInfo *index) if (equal(node, indexkey)) { /* Found a match */ - result = makeVar(index->rel->relid, pos + 1, + result = makeVar(INDEX_VAR, pos + 1, exprType(lfirst(indexpr_item)), -1, exprCollation(lfirst(indexpr_item)), 0); @@ -2842,8 +2880,7 @@ make_indexscan(List *qptlist, List *indexqualorig, List *indexorderby, List *indexorderbyorig, - ScanDirection indexscandir, - bool indexonly) + ScanDirection indexscandir) { IndexScan *node = makeNode(IndexScan); Plan *plan = &node->scan.plan; @@ -2860,7 +2897,34 @@ make_indexscan(List *qptlist, node->indexorderby = indexorderby; node->indexorderbyorig = indexorderbyorig; node->indexorderdir = indexscandir; - node->indexonly = indexonly; + + return node; +} + +static IndexOnlyScan * +make_indexonlyscan(List *qptlist, + List *qpqual, + Index scanrelid, + Oid indexid, + List *indexqual, + List *indexorderby, + List *indextlist, + ScanDirection indexscandir) +{ + IndexOnlyScan *node = makeNode(IndexOnlyScan); + Plan *plan = &node->scan.plan; + + /* cost should be inserted by caller */ + plan->targetlist = qptlist; + plan->qual = qpqual; + plan->lefttree = NULL; + plan->righttree = NULL; + node->scan.scanrelid = scanrelid; + node->indexid = indexid; + node->indexqual = indexqual; + node->indexorderby = indexorderby; + node->indextlist = indextlist; + node->indexorderdir = indexscandir; return node; } diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index d60163379b2..493103a1dbd 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -63,6 +63,7 @@ typedef struct { PlannerInfo *root; indexed_tlist *subplan_itlist; + Index newvarno; int rtoffset; } fix_upper_expr_context; @@ -81,6 +82,9 @@ typedef struct ((List *) fix_scan_expr(root, (Node *) (lst), rtoffset)) static Plan *set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset); +static Plan *set_indexonlyscan_references(PlannerInfo *root, + IndexOnlyScan *plan, + int rtoffset); static Plan *set_subqueryscan_references(PlannerInfo *root, SubqueryScan *plan, int rtoffset); @@ -113,6 +117,7 @@ static Node *fix_join_expr_mutator(Node *node, static Node *fix_upper_expr(PlannerInfo *root, Node *node, indexed_tlist *subplan_itlist, + Index newvarno, int rtoffset); static Node *fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context); @@ -235,6 +240,16 @@ set_plan_references(PlannerInfo *root, Plan *plan) } /* + * Check for RT index overflow; it's very unlikely, but if it did happen, + * the executor would get confused by varnos that match the special varno + * values. + */ + if (IS_SPECIAL_VARNO(list_length(glob->finalrtable))) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("too many range table entries"))); + + /* * Adjust RT indexes of PlanRowMarks and add to final rowmarks list */ foreach(lc, root->rowMarks) @@ -305,6 +320,13 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) fix_scan_list(root, splan->indexorderbyorig, rtoffset); } break; + case T_IndexOnlyScan: + { + IndexOnlyScan *splan = (IndexOnlyScan *) plan; + + return set_indexonlyscan_references(root, splan, rtoffset); + } + break; case T_BitmapIndexScan: { BitmapIndexScan *splan = (BitmapIndexScan *) plan; @@ -653,6 +675,49 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) } /* + * set_indexonlyscan_references + * Do set_plan_references processing on an IndexOnlyScan + * + * This is unlike the handling of a plain IndexScan because we have to + * convert Vars referencing the heap into Vars referencing the index. + * We can use the fix_upper_expr machinery for that, by working from a + * targetlist describing the index columns. + */ +static Plan * +set_indexonlyscan_references(PlannerInfo *root, + IndexOnlyScan *plan, + int rtoffset) +{ + indexed_tlist *index_itlist; + + index_itlist = build_tlist_index(plan->indextlist); + + plan->scan.scanrelid += rtoffset; + plan->scan.plan.targetlist = (List *) + fix_upper_expr(root, + (Node *) plan->scan.plan.targetlist, + index_itlist, + INDEX_VAR, + rtoffset); + plan->scan.plan.qual = (List *) + fix_upper_expr(root, + (Node *) plan->scan.plan.qual, + index_itlist, + INDEX_VAR, + rtoffset); + /* indexqual is already transformed to reference index columns */ + plan->indexqual = fix_scan_list(root, plan->indexqual, rtoffset); + /* indexorderby is already transformed to reference index columns */ + plan->indexorderby = fix_scan_list(root, plan->indexorderby, rtoffset); + /* indextlist must NOT be transformed to reference index columns */ + plan->indextlist = fix_scan_list(root, plan->indextlist, rtoffset); + + pfree(index_itlist); + + return (Plan *) plan; +} + +/* * set_subqueryscan_references * Do set_plan_references processing on a SubqueryScan * @@ -919,11 +984,13 @@ fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context) Assert(var->varlevelsup == 0); /* - * We should not see any Vars marked INNER or OUTER. + * We should not see any Vars marked INNER_VAR or OUTER_VAR. But an + * indexqual expression could contain INDEX_VAR Vars. */ - Assert(var->varno != INNER); - Assert(var->varno != OUTER); - var->varno += context->rtoffset; + Assert(var->varno != INNER_VAR); + Assert(var->varno != OUTER_VAR); + if (!IS_SPECIAL_VARNO(var->varno)) + var->varno += context->rtoffset; if (var->varnoold > 0) var->varnoold += context->rtoffset; return (Node *) var; @@ -932,9 +999,10 @@ fix_scan_expr_mutator(Node *node, fix_scan_expr_context *context) { CurrentOfExpr *cexpr = (CurrentOfExpr *) copyObject(node); - Assert(cexpr->cvarno != INNER); - Assert(cexpr->cvarno != OUTER); - cexpr->cvarno += context->rtoffset; + Assert(cexpr->cvarno != INNER_VAR); + Assert(cexpr->cvarno != OUTER_VAR); + if (!IS_SPECIAL_VARNO(cexpr->cvarno)) + cexpr->cvarno += context->rtoffset; return (Node *) cexpr; } if (IsA(node, PlaceHolderVar)) @@ -963,9 +1031,9 @@ fix_scan_expr_walker(Node *node, fix_scan_expr_context *context) /* * set_join_references * Modify the target list and quals of a join node to reference its - * subplans, by setting the varnos to OUTER or INNER and setting attno - * values to the result domain number of either the corresponding outer - * or inner join tuple item. Also perform opcode lookup for these + * subplans, by setting the varnos to OUTER_VAR or INNER_VAR and setting + * attno values to the result domain number of either the corresponding + * outer or inner join tuple item. Also perform opcode lookup for these * expressions. and add regclass OIDs to root->glob->relationOids. */ static void @@ -1012,6 +1080,7 @@ set_join_references(PlannerInfo *root, Join *join, int rtoffset) nlp->paramval = (Var *) fix_upper_expr(root, (Node *) nlp->paramval, outer_itlist, + OUTER_VAR, rtoffset); } } @@ -1083,17 +1152,19 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset) search_indexed_tlist_for_sortgroupref((Node *) tle->expr, tle->ressortgroupref, subplan_itlist, - OUTER); + OUTER_VAR); if (!newexpr) newexpr = fix_upper_expr(root, (Node *) tle->expr, subplan_itlist, + OUTER_VAR, rtoffset); } else newexpr = fix_upper_expr(root, (Node *) tle->expr, subplan_itlist, + OUTER_VAR, rtoffset); tle = flatCopyTargetEntry(tle); tle->expr = (Expr *) newexpr; @@ -1105,6 +1176,7 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset) fix_upper_expr(root, (Node *) plan->qual, subplan_itlist, + OUTER_VAR, rtoffset); pfree(subplan_itlist); @@ -1113,7 +1185,7 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset) /* * set_dummy_tlist_references * Replace the targetlist of an upper-level plan node with a simple - * list of OUTER references to its child. + * list of OUTER_VAR references to its child. * * This is used for plan types like Sort and Append that don't evaluate * their targetlists. Although the executor doesn't care at all what's in @@ -1136,7 +1208,7 @@ set_dummy_tlist_references(Plan *plan, int rtoffset) Var *oldvar = (Var *) tle->expr; Var *newvar; - newvar = makeVar(OUTER, + newvar = makeVar(OUTER_VAR, tle->resno, exprType((Node *) oldvar), exprTypmod((Node *) oldvar), @@ -1382,11 +1454,12 @@ search_indexed_tlist_for_sortgroupref(Node *node, * relation target lists. Also perform opcode lookup and add * regclass OIDs to root->glob->relationOids. * - * This is used in two different scenarios: a normal join clause, where - * all the Vars in the clause *must* be replaced by OUTER or INNER references; - * and a RETURNING clause, which may contain both Vars of the target relation - * and Vars of other relations. In the latter case we want to replace the - * other-relation Vars by OUTER references, while leaving target Vars alone. + * This is used in two different scenarios: a normal join clause, where all + * the Vars in the clause *must* be replaced by OUTER_VAR or INNER_VAR + * references; and a RETURNING clause, which may contain both Vars of the + * target relation and Vars of other relations. In the latter case we want + * to replace the other-relation Vars by OUTER_VAR references, while leaving + * target Vars alone. * * For a normal join, acceptable_rel should be zero so that any failure to * match a Var will be reported as an error. For the RETURNING case, pass @@ -1435,7 +1508,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) /* First look for the var in the input tlists */ newvar = search_indexed_tlist_for_var(var, context->outer_itlist, - OUTER, + OUTER_VAR, context->rtoffset); if (newvar) return (Node *) newvar; @@ -1443,7 +1516,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) { newvar = search_indexed_tlist_for_var(var, context->inner_itlist, - INNER, + INNER_VAR, context->rtoffset); if (newvar) return (Node *) newvar; @@ -1470,7 +1543,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) { newvar = search_indexed_tlist_for_non_var((Node *) phv, context->outer_itlist, - OUTER); + OUTER_VAR); if (newvar) return (Node *) newvar; } @@ -1478,7 +1551,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) { newvar = search_indexed_tlist_for_non_var((Node *) phv, context->inner_itlist, - INNER); + INNER_VAR); if (newvar) return (Node *) newvar; } @@ -1491,7 +1564,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) { newvar = search_indexed_tlist_for_non_var(node, context->outer_itlist, - OUTER); + OUTER_VAR); if (newvar) return (Node *) newvar; } @@ -1499,7 +1572,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) { newvar = search_indexed_tlist_for_non_var(node, context->inner_itlist, - INNER); + INNER_VAR); if (newvar) return (Node *) newvar; } @@ -1516,7 +1589,7 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) * root->glob->relationOids. * * This is used to fix up target and qual expressions of non-join upper-level - * plan nodes. + * plan nodes, as well as index-only scan nodes. * * An error is raised if no matching var can be found in the subplan tlist * --- so this routine should only be applied to nodes whose subplans' @@ -1529,23 +1602,26 @@ fix_join_expr_mutator(Node *node, fix_join_expr_context *context) * subplan tlist is just a flattened list of Vars.) * * 'node': the tree to be fixed (a target item or qual) - * 'subplan_itlist': indexed target list for subplan + * 'subplan_itlist': indexed target list for subplan (or index) + * 'newvarno': varno to use for Vars referencing tlist elements * 'rtoffset': how much to increment varnoold by * * The resulting tree is a copy of the original in which all Var nodes have - * varno = OUTER, varattno = resno of corresponding subplan target. + * varno = newvarno, varattno = resno of corresponding targetlist element. * The original tree is not modified. */ static Node * fix_upper_expr(PlannerInfo *root, Node *node, indexed_tlist *subplan_itlist, + Index newvarno, int rtoffset) { fix_upper_expr_context context; context.root = root; context.subplan_itlist = subplan_itlist; + context.newvarno = newvarno; context.rtoffset = rtoffset; return fix_upper_expr_mutator(node, &context); } @@ -1563,7 +1639,7 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context) newvar = search_indexed_tlist_for_var(var, context->subplan_itlist, - OUTER, + context->newvarno, context->rtoffset); if (!newvar) elog(ERROR, "variable not found in subplan target list"); @@ -1578,7 +1654,7 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context) { newvar = search_indexed_tlist_for_non_var((Node *) phv, context->subplan_itlist, - OUTER); + context->newvarno); if (newvar) return (Node *) newvar; } @@ -1590,7 +1666,7 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context) { newvar = search_indexed_tlist_for_non_var(node, context->subplan_itlist, - OUTER); + context->newvarno); if (newvar) return (Node *) newvar; } @@ -1610,7 +1686,7 @@ fix_upper_expr_mutator(Node *node, fix_upper_expr_context *context) * table should be left alone, however (the executor will evaluate them * using the actual heap tuple, after firing triggers if any). In the * adjusted RETURNING list, result-table Vars will still have their - * original varno, but Vars for other rels will have varno OUTER. + * original varno, but Vars for other rels will have varno OUTER_VAR. * * We also must perform opcode lookup and add regclass OIDs to * root->glob->relationOids. diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 2e308c625ad..c4046ca5344 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -1974,6 +1974,18 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, context.paramids = bms_add_members(context.paramids, scan_params); break; + case T_IndexOnlyScan: + finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexqual, + &context); + finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexorderby, + &context); + + /* + * we need not look at indextlist, since it cannot contain Params. + */ + context.paramids = bms_add_members(context.paramids, scan_params); + break; + case T_BitmapIndexScan: finalize_primnode((Node *) ((BitmapIndexScan *) plan)->indexqual, &context); diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 8ed55a3d0e2..6aa34412def 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -452,7 +452,7 @@ create_index_path(PlannerInfo *root, indexscandir = NoMovementScanDirection; } - pathnode->path.pathtype = T_IndexScan; + pathnode->path.pathtype = indexonly ? T_IndexOnlyScan : T_IndexScan; pathnode->path.parent = rel; pathnode->path.pathkeys = pathkeys; @@ -470,7 +470,6 @@ create_index_path(PlannerInfo *root, pathnode->isjoininner = (outer_rel != NULL); pathnode->indexscandir = indexscandir; - pathnode->indexonly = indexonly; if (outer_rel != NULL) { diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index 742e7a880ad..0b3675f1461 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -22,6 +22,7 @@ #include "access/sysattr.h" #include "access/transam.h" #include "catalog/catalog.h" +#include "catalog/heap.h" #include "miscadmin.h" #include "nodes/makefuncs.h" #include "optimizer/clauses.h" @@ -49,6 +50,8 @@ static int32 get_rel_data_width(Relation rel, int32 *attr_widths); static List *get_relation_constraints(PlannerInfo *root, Oid relationObjectId, RelOptInfo *rel, bool include_notnull); +static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index, + Relation heapRelation); /* @@ -314,6 +317,10 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, ChangeVarNodes((Node *) info->indexprs, 1, varno, 0); if (info->indpred && varno != 1) ChangeVarNodes((Node *) info->indpred, 1, varno, 0); + + /* Build targetlist using the completed indexprs data */ + info->indextlist = build_index_tlist(root, info, relation); + info->predOK = false; /* set later in indxpath.c */ info->unique = index->indisunique; info->hypothetical = false; @@ -901,6 +908,70 @@ build_physical_tlist(PlannerInfo *root, RelOptInfo *rel) } /* + * build_index_tlist + * + * Build a targetlist representing the columns of the specified index. + * Each column is represented by a Var for the corresponding base-relation + * column, or an expression in base-relation Vars, as appropriate. + * + * There are never any dropped columns in indexes, so unlike + * build_physical_tlist, we need no failure case. + */ +static List * +build_index_tlist(PlannerInfo *root, IndexOptInfo *index, + Relation heapRelation) +{ + List *tlist = NIL; + Index varno = index->rel->relid; + ListCell *indexpr_item; + int i; + + indexpr_item = list_head(index->indexprs); + for (i = 0; i < index->ncolumns; i++) + { + int indexkey = index->indexkeys[i]; + Expr *indexvar; + + if (indexkey != 0) + { + /* simple column */ + Form_pg_attribute att_tup; + + if (indexkey < 0) + att_tup = SystemAttributeDefinition(indexkey, + heapRelation->rd_rel->relhasoids); + else + att_tup = heapRelation->rd_att->attrs[indexkey - 1]; + + indexvar = (Expr *) makeVar(varno, + indexkey, + att_tup->atttypid, + att_tup->atttypmod, + att_tup->attcollation, + 0); + } + else + { + /* expression column */ + if (indexpr_item == NULL) + elog(ERROR, "wrong number of index expressions"); + indexvar = (Expr *) lfirst(indexpr_item); + indexpr_item = lnext(indexpr_item); + } + + tlist = lappend(tlist, + makeTargetEntry(indexvar, + i + 1, + NULL, + false)); + } + if (indexpr_item != NULL) + elog(ERROR, "wrong number of index expressions"); + + return tlist; +} + +/* * restriction_selectivity * * Returns the selectivity of a specified restriction operator clause. diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index c112a9cc163..75923a6f2ea 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -107,9 +107,11 @@ typedef struct * deparse_namespace list (since a plan tree never contains Vars with * varlevelsup > 0). We store the PlanState node that is the immediate * parent of the expression to be deparsed, as well as a list of that - * PlanState's ancestors. In addition, we store the outer and inner - * subplan nodes, whose targetlists are used to resolve OUTER and INNER Vars. - * (Note: these could be derived on-the-fly from the planstate instead.) + * PlanState's ancestors. In addition, we store its outer and inner subplan + * state nodes, as well as their plan nodes' targetlists, and the indextlist + * if the current PlanState is an IndexOnlyScanState. (These fields could + * be derived on-the-fly from the current PlanState, but it seems notationally + * clearer to set them up as separate fields.) */ typedef struct { @@ -118,10 +120,11 @@ typedef struct /* Remaining fields are used only when deparsing a Plan tree: */ PlanState *planstate; /* immediate parent of current expression */ List *ancestors; /* ancestors of planstate */ - PlanState *outer_planstate; /* OUTER subplan state, or NULL if none */ - PlanState *inner_planstate; /* INNER subplan state, or NULL if none */ - Plan *outer_plan; /* OUTER subplan, or NULL if none */ - Plan *inner_plan; /* INNER subplan, or NULL if none */ + PlanState *outer_planstate; /* outer subplan state, or NULL if none */ + PlanState *inner_planstate; /* inner subplan state, or NULL if none */ + List *outer_tlist; /* referent for OUTER_VAR Vars */ + List *inner_tlist; /* referent for INNER_VAR Vars */ + List *index_tlist; /* referent for INDEX_VAR Vars */ } deparse_namespace; @@ -2162,9 +2165,14 @@ deparse_context_for(const char *aliasname, Oid relid) * deparse_context_for_planstate - Build deparse context for a plan * * When deparsing an expression in a Plan tree, we might have to resolve - * OUTER or INNER references. To do this, the caller must provide the - * parent PlanState node. Then OUTER and INNER references can be resolved - * by drilling down into the left and right child plans. + * OUTER_VAR, INNER_VAR, or INDEX_VAR references. To do this, the caller must + * provide the parent PlanState node. Then OUTER_VAR and INNER_VAR references + * can be resolved by drilling down into the left and right child plans. + * Similarly, INDEX_VAR references can be resolved by reference to the + * indextlist given in the parent IndexOnlyScan node. (Note that we don't + * currently support deparsing of indexquals in regular IndexScan or + * BitmapIndexScan nodes; for those, we can only deparse the indexqualorig + * fields, which won't contain INDEX_VAR Vars.) * * Note: planstate really ought to be declared as "PlanState *", but we use * "Node *" to avoid having to include execnodes.h in builtins.h. @@ -2175,7 +2183,7 @@ deparse_context_for(const char *aliasname, Oid relid) * * The plan's rangetable list must also be passed. We actually prefer to use * the rangetable to resolve simple Vars, but the plan inputs are necessary - * for Vars that reference expressions computed in subplan target lists. + * for Vars with special varnos. */ List * deparse_context_for_planstate(Node *planstate, List *ancestors, @@ -2201,10 +2209,11 @@ deparse_context_for_planstate(Node *planstate, List *ancestors, * set_deparse_planstate: set up deparse_namespace to parse subexpressions * of a given PlanState node * - * This sets the planstate, outer_planstate, inner_planstate, outer_plan, and - * inner_plan fields. Caller is responsible for adjusting the ancestors list - * if necessary. Note that the rtable and ctes fields do not need to change - * when shifting attention to different plan nodes in a single plan tree. + * This sets the planstate, outer_planstate, inner_planstate, outer_tlist, + * inner_tlist, and index_tlist fields. Caller is responsible for adjusting + * the ancestors list if necessary. Note that the rtable and ctes fields do + * not need to change when shifting attention to different plan nodes in a + * single plan tree. */ static void set_deparse_planstate(deparse_namespace *dpns, PlanState *ps) @@ -2229,9 +2238,9 @@ set_deparse_planstate(deparse_namespace *dpns, PlanState *ps) dpns->outer_planstate = outerPlanState(ps); if (dpns->outer_planstate) - dpns->outer_plan = dpns->outer_planstate->plan; + dpns->outer_tlist = dpns->outer_planstate->plan->targetlist; else - dpns->outer_plan = NULL; + dpns->outer_tlist = NIL; /* * For a SubqueryScan, pretend the subplan is INNER referent. (We don't @@ -2246,18 +2255,25 @@ set_deparse_planstate(deparse_namespace *dpns, PlanState *ps) dpns->inner_planstate = innerPlanState(ps); if (dpns->inner_planstate) - dpns->inner_plan = dpns->inner_planstate->plan; + dpns->inner_tlist = dpns->inner_planstate->plan->targetlist; else - dpns->inner_plan = NULL; + dpns->inner_tlist = NIL; + + /* index_tlist is set only if it's an IndexOnlyScan */ + if (IsA(ps->plan, IndexOnlyScan)) + dpns->index_tlist = ((IndexOnlyScan *) ps->plan)->indextlist; + else + dpns->index_tlist = NIL; } /* * push_child_plan: temporarily transfer deparsing attention to a child plan * - * When expanding an OUTER or INNER reference, we must adjust the deparse - * context in case the referenced expression itself uses OUTER/INNER. We - * modify the top stack entry in-place to avoid affecting levelsup issues - * (although in a Plan tree there really shouldn't be any). + * When expanding an OUTER_VAR or INNER_VAR reference, we must adjust the + * deparse context in case the referenced expression itself uses + * OUTER_VAR/INNER_VAR. We modify the top stack entry in-place to avoid + * affecting levelsup issues (although in a Plan tree there really shouldn't + * be any). * * Caller must provide a local deparse_namespace variable to save the * previous state for pop_child_plan. @@ -2271,10 +2287,11 @@ push_child_plan(deparse_namespace *dpns, PlanState *ps, /* * Currently we don't bother to adjust the ancestors list, because an - * OUTER or INNER reference really shouldn't contain any Params that would - * be set by the parent node itself. If we did want to adjust it, - * lcons'ing dpns->planstate onto dpns->ancestors would be the appropriate - * thing --- and pop_child_plan would need to undo the change to the list. + * OUTER_VAR or INNER_VAR reference really shouldn't contain any Params + * that would be set by the parent node itself. If we did want to adjust + * the list, lcons'ing dpns->planstate onto dpns->ancestors would be the + * appropriate thing --- and pop_child_plan would need to undo the change + * to the list. */ /* Set attention on selected child */ @@ -2298,7 +2315,7 @@ pop_child_plan(deparse_namespace *dpns, deparse_namespace *save_dpns) * When expanding a Param reference, we must adjust the deparse context * to match the plan node that contains the expression being printed; * otherwise we'd fail if that expression itself contains a Param or - * OUTER/INNER variables. + * OUTER_VAR/INNER_VAR/INDEX_VAR variable. * * The target ancestor is conveniently identified by the ListCell holding it * in dpns->ancestors. @@ -3716,22 +3733,22 @@ get_variable(Var *var, int levelsup, bool showstar, deparse_context *context) /* * Try to find the relevant RTE in this rtable. In a plan tree, it's - * likely that varno is OUTER or INNER, in which case we must dig down - * into the subplans. + * likely that varno is OUTER_VAR or INNER_VAR, in which case we must dig + * down into the subplans, or INDEX_VAR, which is resolved similarly. */ if (var->varno >= 1 && var->varno <= list_length(dpns->rtable)) { rte = rt_fetch(var->varno, dpns->rtable); attnum = var->varattno; } - else if (var->varno == OUTER && dpns->outer_plan) + else if (var->varno == OUTER_VAR && dpns->outer_tlist) { TargetEntry *tle; deparse_namespace save_dpns; - tle = get_tle_by_resno(dpns->outer_plan->targetlist, var->varattno); + tle = get_tle_by_resno(dpns->outer_tlist, var->varattno); if (!tle) - elog(ERROR, "bogus varattno for OUTER var: %d", var->varattno); + elog(ERROR, "bogus varattno for OUTER_VAR var: %d", var->varattno); Assert(netlevelsup == 0); push_child_plan(dpns, dpns->outer_planstate, &save_dpns); @@ -3749,14 +3766,14 @@ get_variable(Var *var, int levelsup, bool showstar, deparse_context *context) pop_child_plan(dpns, &save_dpns); return NULL; } - else if (var->varno == INNER && dpns->inner_plan) + else if (var->varno == INNER_VAR && dpns->inner_tlist) { TargetEntry *tle; deparse_namespace save_dpns; - tle = get_tle_by_resno(dpns->inner_plan->targetlist, var->varattno); + tle = get_tle_by_resno(dpns->inner_tlist, var->varattno); if (!tle) - elog(ERROR, "bogus varattno for INNER var: %d", var->varattno); + elog(ERROR, "bogus varattno for INNER_VAR var: %d", var->varattno); Assert(netlevelsup == 0); push_child_plan(dpns, dpns->inner_planstate, &save_dpns); @@ -3774,6 +3791,28 @@ get_variable(Var *var, int levelsup, bool showstar, deparse_context *context) pop_child_plan(dpns, &save_dpns); return NULL; } + else if (var->varno == INDEX_VAR && dpns->index_tlist) + { + TargetEntry *tle; + + tle = get_tle_by_resno(dpns->index_tlist, var->varattno); + if (!tle) + elog(ERROR, "bogus varattno for INDEX_VAR var: %d", var->varattno); + + Assert(netlevelsup == 0); + + /* + * Force parentheses because our caller probably assumed a Var is a + * simple expression. + */ + if (!IsA(tle->expr, Var)) + appendStringInfoChar(buf, '('); + get_rule_expr((Node *) tle->expr, context, true); + if (!IsA(tle->expr, Var)) + appendStringInfoChar(buf, ')'); + + return NULL; + } else { elog(ERROR, "bogus varno: %d", var->varno); @@ -3789,16 +3828,16 @@ get_variable(Var *var, int levelsup, bool showstar, deparse_context *context) * no alias. So in that case, drill down to the subplan and print the * contents of the referenced tlist item. This works because in a plan * tree, such Vars can only occur in a SubqueryScan or CteScan node, and - * we'll have set dpns->inner_plan to reference the child plan node. + * we'll have set dpns->inner_planstate to reference the child plan node. */ if ((rte->rtekind == RTE_SUBQUERY || rte->rtekind == RTE_CTE) && attnum > list_length(rte->eref->colnames) && - dpns->inner_plan) + dpns->inner_planstate) { TargetEntry *tle; deparse_namespace save_dpns; - tle = get_tle_by_resno(dpns->inner_plan->targetlist, var->varattno); + tle = get_tle_by_resno(dpns->inner_tlist, var->varattno); if (!tle) elog(ERROR, "bogus varattno for subquery var: %d", var->varattno); @@ -3984,23 +4023,23 @@ get_name_for_var_field(Var *var, int fieldno, /* * Try to find the relevant RTE in this rtable. In a plan tree, it's - * likely that varno is OUTER or INNER, in which case we must dig down - * into the subplans. + * likely that varno is OUTER_VAR or INNER_VAR, in which case we must dig + * down into the subplans, or INDEX_VAR, which is resolved similarly. */ if (var->varno >= 1 && var->varno <= list_length(dpns->rtable)) { rte = rt_fetch(var->varno, dpns->rtable); attnum = var->varattno; } - else if (var->varno == OUTER && dpns->outer_plan) + else if (var->varno == OUTER_VAR && dpns->outer_tlist) { TargetEntry *tle; deparse_namespace save_dpns; const char *result; - tle = get_tle_by_resno(dpns->outer_plan->targetlist, var->varattno); + tle = get_tle_by_resno(dpns->outer_tlist, var->varattno); if (!tle) - elog(ERROR, "bogus varattno for OUTER var: %d", var->varattno); + elog(ERROR, "bogus varattno for OUTER_VAR var: %d", var->varattno); Assert(netlevelsup == 0); push_child_plan(dpns, dpns->outer_planstate, &save_dpns); @@ -4011,15 +4050,15 @@ get_name_for_var_field(Var *var, int fieldno, pop_child_plan(dpns, &save_dpns); return result; } - else if (var->varno == INNER && dpns->inner_plan) + else if (var->varno == INNER_VAR && dpns->inner_tlist) { TargetEntry *tle; deparse_namespace save_dpns; const char *result; - tle = get_tle_by_resno(dpns->inner_plan->targetlist, var->varattno); + tle = get_tle_by_resno(dpns->inner_tlist, var->varattno); if (!tle) - elog(ERROR, "bogus varattno for INNER var: %d", var->varattno); + elog(ERROR, "bogus varattno for INNER_VAR var: %d", var->varattno); Assert(netlevelsup == 0); push_child_plan(dpns, dpns->inner_planstate, &save_dpns); @@ -4030,6 +4069,22 @@ get_name_for_var_field(Var *var, int fieldno, pop_child_plan(dpns, &save_dpns); return result; } + else if (var->varno == INDEX_VAR && dpns->index_tlist) + { + TargetEntry *tle; + const char *result; + + tle = get_tle_by_resno(dpns->index_tlist, var->varattno); + if (!tle) + elog(ERROR, "bogus varattno for INDEX_VAR var: %d", var->varattno); + + Assert(netlevelsup == 0); + + result = get_name_for_var_field((Var *) tle->expr, fieldno, + levelsup, context); + + return result; + } else { elog(ERROR, "bogus varno: %d", var->varno); @@ -4115,11 +4170,10 @@ get_name_for_var_field(Var *var, int fieldno, deparse_namespace save_dpns; const char *result; - if (!dpns->inner_plan) + if (!dpns->inner_planstate) elog(ERROR, "failed to find plan for subquery %s", rte->eref->aliasname); - tle = get_tle_by_resno(dpns->inner_plan->targetlist, - attnum); + tle = get_tle_by_resno(dpns->inner_tlist, attnum); if (!tle) elog(ERROR, "bogus varattno for subquery var: %d", attnum); @@ -4232,11 +4286,10 @@ get_name_for_var_field(Var *var, int fieldno, deparse_namespace save_dpns; const char *result; - if (!dpns->inner_plan) + if (!dpns->inner_planstate) elog(ERROR, "failed to find plan for CTE %s", rte->eref->aliasname); - tle = get_tle_by_resno(dpns->inner_plan->targetlist, - attnum); + tle = get_tle_by_resno(dpns->inner_tlist, attnum); if (!tle) elog(ERROR, "bogus varattno for subquery var: %d", attnum); diff --git a/src/backend/utils/adt/tid.c b/src/backend/utils/adt/tid.c index 69e89b82c9e..b4ac9357fb4 100644 --- a/src/backend/utils/adt/tid.c +++ b/src/backend/utils/adt/tid.c @@ -306,7 +306,7 @@ currtid_for_view(Relation viewrel, ItemPointer tid) Var *var = (Var *) tle->expr; RangeTblEntry *rte; - if (var->varno > 0 && var->varno < INNER && + if (!IS_SPECIAL_VARNO(var->varno) && var->varattno == SelfItemPointerAttributeNumber) { rte = rt_fetch(var->varno, query->rtable); diff --git a/src/include/executor/nodeIndexonlyscan.h b/src/include/executor/nodeIndexonlyscan.h new file mode 100644 index 00000000000..1c59cee5a73 --- /dev/null +++ b/src/include/executor/nodeIndexonlyscan.h @@ -0,0 +1,26 @@ +/*------------------------------------------------------------------------- + * + * nodeIndexonlyscan.h + * + * + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/executor/nodeIndexonlyscan.h + * + *------------------------------------------------------------------------- + */ +#ifndef NODEINDEXONLYSCAN_H +#define NODEINDEXONLYSCAN_H + +#include "nodes/execnodes.h" + +extern IndexOnlyScanState *ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags); +extern TupleTableSlot *ExecIndexOnlyScan(IndexOnlyScanState *node); +extern void ExecEndIndexOnlyScan(IndexOnlyScanState *node); +extern void ExecIndexOnlyMarkPos(IndexOnlyScanState *node); +extern void ExecIndexOnlyRestrPos(IndexOnlyScanState *node); +extern void ExecReScanIndexOnlyScan(IndexOnlyScanState *node); + +#endif /* NODEINDEXONLYSCAN_H */ diff --git a/src/include/executor/nodeIndexscan.h b/src/include/executor/nodeIndexscan.h index 481a7df70fe..4094031c185 100644 --- a/src/include/executor/nodeIndexscan.h +++ b/src/include/executor/nodeIndexscan.h @@ -23,9 +23,12 @@ extern void ExecIndexMarkPos(IndexScanState *node); extern void ExecIndexRestrPos(IndexScanState *node); extern void ExecReScanIndexScan(IndexScanState *node); -/* routines exported to share code with nodeBitmapIndexscan.c */ +/* + * These routines are exported to share code with nodeIndexonlyscan.c and + * nodeBitmapIndexscan.c + */ extern void ExecIndexBuildScanKeys(PlanState *planstate, Relation index, - Index scanrelid, List *quals, bool isorderby, + List *quals, bool isorderby, ScanKey *scanKeys, int *numScanKeys, IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys, IndexArrayKeyInfo **arrayKeys, int *numArrayKeys); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 3885fa0099d..0a89f189d7c 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -1226,7 +1226,6 @@ typedef struct * RuntimeContext expr context for evaling runtime Skeys * RelationDesc index relation descriptor * ScanDesc index scan descriptor - * VMBuffer buffer in use for visibility map testing, if any * ---------------- */ typedef struct IndexScanState @@ -1243,10 +1242,43 @@ typedef struct IndexScanState ExprContext *iss_RuntimeContext; Relation iss_RelationDesc; IndexScanDesc iss_ScanDesc; - Buffer iss_VMBuffer; } IndexScanState; /* ---------------- + * IndexOnlyScanState information + * + * indexqual execution state for indexqual expressions + * ScanKeys Skey structures for index quals + * NumScanKeys number of ScanKeys + * OrderByKeys Skey structures for index ordering operators + * NumOrderByKeys number of OrderByKeys + * RuntimeKeys info about Skeys that must be evaluated at runtime + * NumRuntimeKeys number of RuntimeKeys + * RuntimeKeysReady true if runtime Skeys have been computed + * RuntimeContext expr context for evaling runtime Skeys + * RelationDesc index relation descriptor + * ScanDesc index scan descriptor + * VMBuffer buffer in use for visibility map testing, if any + * ---------------- + */ +typedef struct IndexOnlyScanState +{ + ScanState ss; /* its first field is NodeTag */ + List *indexqual; + ScanKey ioss_ScanKeys; + int ioss_NumScanKeys; + ScanKey ioss_OrderByKeys; + int ioss_NumOrderByKeys; + IndexRuntimeKeyInfo *ioss_RuntimeKeys; + int ioss_NumRuntimeKeys; + bool ioss_RuntimeKeysReady; + ExprContext *ioss_RuntimeContext; + Relation ioss_RelationDesc; + IndexScanDesc ioss_ScanDesc; + Buffer ioss_VMBuffer; +} IndexOnlyScanState; + +/* ---------------- * BitmapIndexScanState information * * result bitmap to return output into, or NULL diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index ecf62b335b6..7aa299485fc 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -52,6 +52,7 @@ typedef enum NodeTag T_Scan, T_SeqScan, T_IndexScan, + T_IndexOnlyScan, T_BitmapIndexScan, T_BitmapHeapScan, T_TidScan, @@ -97,6 +98,7 @@ typedef enum NodeTag T_ScanState, T_SeqScanState, T_IndexScanState, + T_IndexOnlyScanState, T_BitmapIndexScanState, T_BitmapHeapScanState, T_TidScanState, diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 60467f52769..ababded845a 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -285,11 +285,8 @@ typedef Scan SeqScan; * * indexqual has the same form, but the expressions have been commuted if * necessary to put the indexkeys on the left, and the indexkeys are replaced - * by Var nodes identifying the index columns (varattno is the index column - * position, not the base table's column, even though varno is for the base - * table). This is a bit hokey ... would be cleaner to use a special-purpose - * node type that could not be mistaken for a regular Var. But it will do - * for now. + * by Var nodes identifying the index columns (their varno is INDEX_VAR and + * their varattno is the index column number). * * indexorderbyorig is similarly the original form of any ORDER BY expressions * that are being implemented by the index, while indexorderby is modified to @@ -302,8 +299,7 @@ typedef Scan SeqScan; * (Note these fields are used for amcanorderbyop cases, not amcanorder cases.) * * indexorderdir specifies the scan ordering, for indexscans on amcanorder - * indexes (for other indexes it should be "don't care"). indexonly specifies - * an index-only scan, for indexscans on amcanreturn indexes. + * indexes (for other indexes it should be "don't care"). * ---------------- */ typedef struct IndexScan @@ -315,10 +311,36 @@ typedef struct IndexScan List *indexorderby; /* list of index ORDER BY exprs */ List *indexorderbyorig; /* the same in original form */ ScanDirection indexorderdir; /* forward or backward or don't care */ - bool indexonly; /* attempt to skip heap fetches? */ } IndexScan; /* ---------------- + * index-only scan node + * + * IndexOnlyScan is very similar to IndexScan, but it specifies an + * index-only scan, in which the data comes from the index not the heap. + * Because of this, *all* Vars in the plan node's targetlist, qual, and + * index expressions reference index columns and have varno = INDEX_VAR. + * Hence we do not need separate indexqualorig and indexorderbyorig lists, + * since their contents would be equivalent to indexqual and indexorderby. + * + * To help EXPLAIN interpret the index Vars for display, we provide + * indextlist, which represents the contents of the index as a targetlist + * with one TLE per index column. Vars appearing in this list reference + * the base table, and this is the only field in the plan node that may + * contain such Vars. + * ---------------- + */ +typedef struct IndexOnlyScan +{ + Scan scan; + Oid indexid; /* OID of index to scan */ + List *indexqual; /* list of index quals (usually OpExprs) */ + List *indexorderby; /* list of index ORDER BY exprs */ + List *indextlist; /* TargetEntry list describing index's cols */ + ScanDirection indexorderdir; /* forward or backward or don't care */ +} IndexOnlyScan; + +/* ---------------- * bitmap index scan node * * BitmapIndexScan delivers a bitmap of potential tuple locations; diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index f1e20ef937c..cedf022e174 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -118,15 +118,19 @@ typedef struct Expr * Note: during parsing/planning, varnoold/varoattno are always just copies * of varno/varattno. At the tail end of planning, Var nodes appearing in * upper-level plan nodes are reassigned to point to the outputs of their - * subplans; for example, in a join node varno becomes INNER or OUTER and - * varattno becomes the index of the proper element of that subplan's target - * list. But varnoold/varoattno continue to hold the original values. + * subplans; for example, in a join node varno becomes INNER_VAR or OUTER_VAR + * and varattno becomes the index of the proper element of that subplan's + * target list. But varnoold/varoattno continue to hold the original values. * The code doesn't really need varnoold/varoattno, but they are very useful * for debugging and interpreting completed plans, so we keep them around. */ -#define INNER 65000 -#define OUTER 65001 +#define INNER_VAR 65000 /* reference to inner subplan */ +#define OUTER_VAR 65001 /* reference to outer subplan */ +#define INDEX_VAR 65002 /* reference to index column */ +#define IS_SPECIAL_VARNO(varno) ((varno) >= INNER_VAR) + +/* Symbols for the indexes of the special RTE entries in rules */ #define PRS2_OLD_VARNO 1 #define PRS2_NEW_VARNO 2 @@ -134,7 +138,7 @@ typedef struct Var { Expr xpr; Index varno; /* index of this var's relation in the range - * table (could also be INNER or OUTER) */ + * table, or INNER_VAR/OUTER_VAR/INDEX_VAR */ AttrNumber varattno; /* attribute number of this var, or zero for * all */ Oid vartype; /* pg_type OID for the type of this var */ diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index cf48ba433c8..45ca52e516e 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -449,6 +449,10 @@ typedef struct RelOptInfo * The indexprs and indpred expressions have been run through * prepqual.c and eval_const_expressions() for ease of matching to * WHERE clauses. indpred is in implicit-AND form. + * + * indextlist is a TargetEntry list representing the index columns. + * It provides an equivalent base-relation Var for each simple column, + * and links to the matching indexprs element for each expression column. */ typedef struct IndexOptInfo { @@ -478,6 +482,8 @@ typedef struct IndexOptInfo List *indexprs; /* expressions for non-simple index columns */ List *indpred; /* predicate if a partial index, else NIL */ + List *indextlist; /* targetlist representing index columns */ + bool predOK; /* true if predicate matches query */ bool unique; /* true if a unique index */ bool hypothetical; /* true if index doesn't really exist */ @@ -640,6 +646,9 @@ typedef struct Path /*---------- * IndexPath represents an index scan over a single index. * + * This struct is used for both regular indexscans and index-only scans; + * path.pathtype is T_IndexScan or T_IndexOnlyScan to show which is meant. + * * 'indexinfo' is the index to be scanned. * * 'indexclauses' is a list of index qualification clauses, with implicit @@ -673,14 +682,10 @@ typedef struct Path * NoMovementScanDirection for an indexscan, but the planner wants to * distinguish ordered from unordered indexes for building pathkeys.) * - * 'indexonly' is TRUE for an index-only scan, that is, the index's access - * method has amcanreturn = TRUE and we only need columns available from the - * index. - * * 'indextotalcost' and 'indexselectivity' are saved in the IndexPath so that * we need not recompute them when considering using the same index in a * bitmap index/heap scan (see BitmapHeapPath). The costs of the IndexPath - * itself represent the costs of an IndexScan plan type. + * itself represent the costs of an IndexScan or IndexOnlyScan plan type. * * 'rows' is the estimated result tuple count for the indexscan. This * is the same as path.parent->rows for a simple indexscan, but it is @@ -698,7 +703,6 @@ typedef struct IndexPath List *indexorderbys; bool isjoininner; ScanDirection indexscandir; - bool indexonly; Cost indextotalcost; Selectivity indexselectivity; double rows; /* estimated number of result tuples */ @@ -714,11 +718,12 @@ typedef struct IndexPath * The individual indexscans are represented by IndexPath nodes, and any * logic on top of them is represented by a tree of BitmapAndPath and * BitmapOrPath nodes. Notice that we can use the same IndexPath node both - * to represent a regular IndexScan plan, and as the child of a BitmapHeapPath - * that represents scanning the same index using a BitmapIndexScan. The - * startup_cost and total_cost figures of an IndexPath always represent the - * costs to use it as a regular IndexScan. The costs of a BitmapIndexScan - * can be computed using the IndexPath's indextotalcost and indexselectivity. + * to represent a regular (or index-only) index scan plan, and as the child + * of a BitmapHeapPath that represents scanning the same index using a + * BitmapIndexScan. The startup_cost and total_cost figures of an IndexPath + * always represent the costs to use it as a regular (or index-only) + * IndexScan. The costs of a BitmapIndexScan can be computed using the + * IndexPath's indextotalcost and indexselectivity. * * BitmapHeapPaths can be nestloop inner indexscans. The isjoininner and * rows fields serve the same purpose as for plain IndexPaths. |