diff options
author | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2015-05-15 14:26:51 +0300 |
---|---|---|
committer | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2015-05-15 14:26:51 +0300 |
commit | 35fcb1b3d038a501f3f4c87c05630095abaaadab (patch) | |
tree | d67f36684fb18b8523e78f13c0a358b376f50d4b /src/backend/executor/nodeIndexscan.c | |
parent | ecd222e770d352121590363ffdf981147a43e976 (diff) | |
download | postgresql-35fcb1b3d038a501f3f4c87c05630095abaaadab.tar.gz postgresql-35fcb1b3d038a501f3f4c87c05630095abaaadab.zip |
Allow GiST distance function to return merely a lower-bound.
The distance function can now set *recheck = false, like index quals. The
executor will then re-check the ORDER BY expressions, and use a queue to
reorder the results on the fly.
This makes it possible to do kNN-searches on polygons and circles, which
don't store the exact value in the index, but just a bounding box.
Alexander Korotkov and me
Diffstat (limited to 'src/backend/executor/nodeIndexscan.c')
-rw-r--r-- | src/backend/executor/nodeIndexscan.c | 379 |
1 files changed, 376 insertions, 3 deletions
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 48fa91981ff..2164da0c847 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -16,6 +16,7 @@ * INTERFACE ROUTINES * ExecIndexScan scans a relation using an index * IndexNext retrieve next tuple using index + * IndexNextWithReorder same, but recheck ORDER BY expressions * ExecInitIndexScan creates and initializes state info. * ExecReScanIndexScan rescans the indexed relation. * ExecEndIndexScan releases all storage. @@ -28,14 +29,38 @@ #include "access/relscan.h" #include "executor/execdebug.h" #include "executor/nodeIndexscan.h" +#include "lib/pairingheap.h" #include "optimizer/clauses.h" #include "utils/array.h" +#include "utils/datum.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" +/* + * When an ordering operator is used, tuples fetched from the index that + * need to be reordered are queued in a pairing heap, as ReorderTuples. + */ +typedef struct +{ + pairingheap_node ph_node; + HeapTuple htup; + Datum *orderbyvals; + bool *orderbynulls; +} ReorderTuple; static TupleTableSlot *IndexNext(IndexScanState *node); +static TupleTableSlot *IndexNextWithReorder(IndexScanState *node); +static void EvalOrderByExpressions(IndexScanState *node, ExprContext *econtext); +static bool IndexRecheck(IndexScanState *node, TupleTableSlot *slot); +static int cmp_orderbyvals(const Datum *adist, const bool *anulls, + const Datum *bdist, const bool *bnulls, + IndexScanState *node); +static int reorderqueue_cmp(const pairingheap_node *a, + const pairingheap_node *b, void *arg); +static void reorderqueue_push(IndexScanState *node, HeapTuple tuple, + Datum *orderbyvals, bool *orderbynulls); +static HeapTuple reorderqueue_pop(IndexScanState *node); /* ---------------------------------------------------------------- @@ -110,10 +135,200 @@ IndexNext(IndexScanState *node) * if we get here it means the index scan failed so we are at the end of * the scan.. */ + node->iss_ReachedEnd = true; + return ExecClearTuple(slot); +} + +/* ---------------------------------------------------------------- + * IndexNextWithReorder + * + * Like IndexNext, but his version can also re-check any + * ORDER BY expressions, and reorder the tuples as necessary. + * ---------------------------------------------------------------- + */ +static TupleTableSlot * +IndexNextWithReorder(IndexScanState *node) +{ + ExprContext *econtext; + IndexScanDesc scandesc; + HeapTuple tuple; + TupleTableSlot *slot; + ReorderTuple *topmost = NULL; + bool was_exact; + Datum *lastfetched_vals; + bool *lastfetched_nulls; + int cmp; + + /* only forward scan is supported with reordering. */ + Assert(!ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir)); + Assert(ScanDirectionIsForward(node->ss.ps.state->es_direction)); + scandesc = node->iss_ScanDesc; + econtext = node->ss.ps.ps_ExprContext; + slot = node->ss.ss_ScanTupleSlot; + + for (;;) + { + /* + * Check the reorder queue first. If the topmost tuple in the queue + * has an ORDER BY value smaller than (or equal to) the value last + * returned by the index, we can return it now. + */ + if (!pairingheap_is_empty(node->iss_ReorderQueue)) + { + topmost = (ReorderTuple *) pairingheap_first(node->iss_ReorderQueue); + + if (node->iss_ReachedEnd || + cmp_orderbyvals(topmost->orderbyvals, + topmost->orderbynulls, + scandesc->xs_orderbyvals, + scandesc->xs_orderbynulls, + node) <= 0) + { + tuple = reorderqueue_pop(node); + + /* Pass 'true', as the tuple in the queue is a palloc'd copy */ + ExecStoreTuple(tuple, slot, InvalidBuffer, true); + return slot; + } + } + else if (node->iss_ReachedEnd) + { + /* Queue is empty, and no more tuples from index. We're done. */ + return ExecClearTuple(slot); + } + + /* + * Fetch next tuple from the index. + */ +next_indextuple: + tuple = index_getnext(scandesc, ForwardScanDirection); + if (!tuple) + { + /* + * No more tuples from the index. But we still need to drain any + * remaining tuples from the queue before we're done. + */ + node->iss_ReachedEnd = true; + continue; + } + + /* + * Store the scanned tuple in the scan tuple slot of the scan state. + * Note: we pass 'false' because tuples returned by amgetnext are + * pointers onto disk pages and must not be pfree()'d. + */ + ExecStoreTuple(tuple, /* tuple to store */ + slot, /* slot to store in */ + scandesc->xs_cbuf, /* buffer containing tuple */ + false); /* don't pfree */ + + /* + * If the index was lossy, we have to recheck the index quals and + * ORDER BY expressions using the fetched tuple. + */ + if (scandesc->xs_recheck) + { + econtext->ecxt_scantuple = slot; + ResetExprContext(econtext); + if (!ExecQual(node->indexqualorig, econtext, false)) + { + /* Fails recheck, so drop it and loop back for another */ + InstrCountFiltered2(node, 1); + goto next_indextuple; + } + + EvalOrderByExpressions(node, econtext); + + /* + * Was the ORDER BY value returned by the index accurate? The + * recheck flag means that the index can return inaccurate values, + * but then again, the value returned for any particular tuple + * could also be exactly correct. Compare the value returned by + * the index with the recalculated value. (If the value returned + * by the index happened to be exact right, we can often avoid + * pushing the tuple to the queue, just to pop it back out again.) + */ + cmp = cmp_orderbyvals(node->iss_OrderByValues, + node->iss_OrderByNulls, + scandesc->xs_orderbyvals, + scandesc->xs_orderbynulls, + node); + if (cmp < 0) + elog(ERROR, "index returned tuples in wrong order"); + else if (cmp == 0) + was_exact = true; + else + was_exact = false; + lastfetched_vals = node->iss_OrderByValues; + lastfetched_nulls = node->iss_OrderByNulls; + } + else + { + was_exact = true; + lastfetched_vals = scandesc->xs_orderbyvals; + lastfetched_nulls = scandesc->xs_orderbynulls; + } + + /* + * Can we return this tuple immediately, or does it need to be pushed + * to the reorder queue? If the ORDER BY expression values returned + * by the index were inaccurate, we can't return it yet, because the + * next tuple from the index might need to come before this one. + * Also, we can't return it yet if there are any smaller tuples in the + * queue already. + */ + if (!was_exact || (topmost && cmp_orderbyvals(lastfetched_vals, + lastfetched_nulls, + topmost->orderbyvals, + topmost->orderbynulls, + node) > 0)) + { + /* Put this tuple to the queue */ + reorderqueue_push(node, tuple, lastfetched_vals, lastfetched_nulls); + continue; + } + else + { + /* Can return this tuple immediately. */ + return slot; + } + } + + /* + * if we get here it means the index scan failed so we are at the end of + * the scan.. + */ return ExecClearTuple(slot); } /* + * Calculate the expressions in the ORDER BY clause, based on the heap tuple. + */ +static void +EvalOrderByExpressions(IndexScanState *node, ExprContext *econtext) +{ + int i; + ListCell *l; + MemoryContext oldContext; + + oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); + + i = 0; + foreach(l, node->indexorderbyorig) + { + ExprState *orderby = (ExprState *) lfirst(l); + + node->iss_OrderByValues[i] = ExecEvalExpr(orderby, + econtext, + &node->iss_OrderByNulls[i], + NULL); + i++; + } + + MemoryContextSwitchTo(oldContext); +} + +/* * IndexRecheck -- access method routine to recheck a tuple in EvalPlanQual */ static bool @@ -134,6 +349,109 @@ IndexRecheck(IndexScanState *node, TupleTableSlot *slot) return ExecQual(node->indexqualorig, econtext, false); } + +/* + * Compare ORDER BY expression values. + */ +static int +cmp_orderbyvals(const Datum *adist, const bool *anulls, + const Datum *bdist, const bool *bnulls, + IndexScanState *node) +{ + int i; + int result; + + for (i = 0; i < node->iss_NumOrderByKeys; i++) + { + SortSupport ssup = &node->iss_SortSupport[i]; + + /* Handle nulls. We only support NULLS LAST. */ + if (anulls[i] && !bnulls[i]) + return 1; + else if (!anulls[i] && bnulls[i]) + return -1; + else if (anulls[i] && bnulls[i]) + return 0; + + result = ssup->comparator(adist[i], bdist[i], ssup); + if (result != 0) + return result; + } + + return 0; +} + +/* + * Pairing heap provides getting topmost (greatest) element while KNN provides + * ascending sort. That's why we inverse the sort order. + */ +static int +reorderqueue_cmp(const pairingheap_node *a, const pairingheap_node *b, + void *arg) +{ + ReorderTuple *rta = (ReorderTuple *) a; + ReorderTuple *rtb = (ReorderTuple *) b; + IndexScanState *node = (IndexScanState *) arg; + + return -cmp_orderbyvals(rta->orderbyvals, rta->orderbynulls, + rtb->orderbyvals, rtb->orderbynulls, + node); +} + +/* + * Helper function to push a tuple to the reorder queue. + */ +static void +reorderqueue_push(IndexScanState *node, HeapTuple tuple, + Datum *orderbyvals, bool *orderbynulls) +{ + IndexScanDesc scandesc = node->iss_ScanDesc; + EState *estate = node->ss.ps.state; + MemoryContext oldContext = MemoryContextSwitchTo(estate->es_query_cxt); + ReorderTuple *rt; + int i; + + rt = (ReorderTuple *) palloc(sizeof(ReorderTuple)); + rt->htup = heap_copytuple(tuple); + rt->orderbyvals = + (Datum *) palloc(sizeof(Datum) * scandesc->numberOfOrderBys); + rt->orderbynulls = + (bool *) palloc(sizeof(bool) * scandesc->numberOfOrderBys); + for (i = 0; i < node->iss_NumOrderByKeys; i++) + { + if (!orderbynulls[i]) + rt->orderbyvals[i] = datumCopy(orderbyvals[i], + node->iss_OrderByTypByVals[i], + node->iss_OrderByTypLens[i]); + else + rt->orderbyvals[i] = (Datum) 0; + rt->orderbynulls[i] = orderbynulls[i]; + } + pairingheap_add(node->iss_ReorderQueue, &rt->ph_node); + + MemoryContextSwitchTo(oldContext); +} + +/* + * Helper function to pop the next tuple from the reorder queue. + */ +static HeapTuple +reorderqueue_pop(IndexScanState *node) +{ + HeapTuple result; + ReorderTuple *topmost; + + topmost = (ReorderTuple *) pairingheap_remove_first(node->iss_ReorderQueue); + + result = topmost->htup; + pfree(topmost->orderbyvals); + pfree(topmost->orderbynulls); + pfree(topmost); + + return result; +} + + /* ---------------------------------------------------------------- * ExecIndexScan(node) * ---------------------------------------------------------------- @@ -147,9 +465,14 @@ ExecIndexScan(IndexScanState *node) if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady) ExecReScan((PlanState *) node); - return ExecScan(&node->ss, - (ExecScanAccessMtd) IndexNext, - (ExecScanRecheckMtd) IndexRecheck); + if (node->iss_NumOrderByKeys > 0) + return ExecScan(&node->ss, + (ExecScanAccessMtd) IndexNextWithReorder, + (ExecScanRecheckMtd) IndexRecheck); + else + return ExecScan(&node->ss, + (ExecScanAccessMtd) IndexNext, + (ExecScanRecheckMtd) IndexRecheck); } /* ---------------------------------------------------------------- @@ -465,6 +788,7 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) IndexScanState *indexstate; Relation currentRelation; bool relistarget; + int i; /* * create state structure @@ -501,6 +825,9 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) indexstate->indexqualorig = (List *) ExecInitExpr((Expr *) node->indexqualorig, (PlanState *) indexstate); + indexstate->indexorderbyorig = (List *) + ExecInitExpr((Expr *) node->indexorderbyorig, + (PlanState *) indexstate); /* * tuple table initialization @@ -581,6 +908,52 @@ ExecInitIndexScan(IndexScan *node, EState *estate, int eflags) NULL, /* no ArrayKeys */ NULL); + /* Initialize sort support, if we need to re-check ORDER BY exprs */ + if (indexstate->iss_NumOrderByKeys > 0) + { + int numOrderByKeys = indexstate->iss_NumOrderByKeys; + + /* + * Prepare sort support, and look up the distance type for each ORDER + * BY expression. + */ + indexstate->iss_SortSupport = + palloc0(numOrderByKeys * sizeof(SortSupportData)); + indexstate->iss_OrderByTypByVals = + palloc(numOrderByKeys * sizeof(bool)); + indexstate->iss_OrderByTypLens = + palloc(numOrderByKeys * sizeof(int16)); + for (i = 0; i < indexstate->iss_NumOrderByKeys; i++) + { + Oid orderbyType; + Oid opfamily; + int16 strategy; + + PrepareSortSupportFromOrderingOp(node->indexorderbyops[i], + &indexstate->iss_SortSupport[i]); + + if (!get_ordering_op_properties(node->indexorderbyops[i], + &opfamily, &orderbyType, &strategy)) + { + elog(LOG, "operator %u is not a valid ordering operator", + node->indexorderbyops[i]); + } + get_typlenbyval(orderbyType, + &indexstate->iss_OrderByTypLens[i], + &indexstate->iss_OrderByTypByVals[i]); + } + + /* allocate arrays to hold the re-calculated distances */ + indexstate->iss_OrderByValues = + palloc(indexstate->iss_NumOrderByKeys * sizeof(Datum)); + indexstate->iss_OrderByNulls = + palloc(indexstate->iss_NumOrderByKeys * sizeof(bool)); + + /* and initialize the reorder queue */ + indexstate->iss_ReorderQueue = pairingheap_allocate(reorderqueue_cmp, + indexstate); + } + /* * If we have runtime keys, we need an ExprContext to evaluate them. The * node's standard context won't do because we want to reset that context |