aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/gist/gistutil.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2012-08-30 23:47:54 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2012-08-30 23:47:54 -0400
commit6707dd48cdfcc95934a89df5ad4c4c2ecf0e5225 (patch)
tree1a060de315f5a2e567448783d57170e3a27adceb /src/backend/access/gist/gistutil.c
parentf6956eb74edcdf21339f9d200ef63c4015a5f4cb (diff)
downloadpostgresql-6707dd48cdfcc95934a89df5ad4c4c2ecf0e5225.tar.gz
postgresql-6707dd48cdfcc95934a89df5ad4c4c2ecf0e5225.zip
Back-patch recent fixes for gistchoose and gistRelocateBuildBuffersOnSplit.
This back-ports commits c8ba697a4bdb934f0c51424c654e8db6133ea255 and e5db11c5582b469c04a11f217a0f32c827da5dd7, which fix one definite and one speculative bug in gistchoose, and make the code a lot more intelligible as well. In 9.2 only, this also affects the largely-copied-and-pasted logic in gistRelocateBuildBuffersOnSplit. The impact of the bugs was that the functions might make poor decisions as to which index tree branch to push a new entry down into, resulting in GiST index bloat and poor performance. The fixes rectify these decisions for future insertions, but a REINDEX would be needed to clean up any existing index bloat. Alexander Korotkov, Robert Haas, Tom Lane
Diffstat (limited to 'src/backend/access/gist/gistutil.c')
-rw-r--r--src/backend/access/gist/gistutil.c98
1 files changed, 73 insertions, 25 deletions
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 1754a103699..0ae4176132e 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -365,72 +365,120 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis
}
/*
- * find entry with lowest penalty
+ * Search an upper index page for the entry with lowest penalty for insertion
+ * of the new index key contained in "it".
+ *
+ * Returns the index of the page entry to insert into.
*/
OffsetNumber
gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */
GISTSTATE *giststate)
{
+ OffsetNumber result;
OffsetNumber maxoff;
OffsetNumber i;
- OffsetNumber which;
- float sum_grow,
- which_grow[INDEX_MAX_KEYS];
+ float best_penalty[INDEX_MAX_KEYS];
GISTENTRY entry,
identry[INDEX_MAX_KEYS];
bool isnull[INDEX_MAX_KEYS];
- maxoff = PageGetMaxOffsetNumber(p);
- *which_grow = -1.0;
- which = InvalidOffsetNumber;
- sum_grow = 1;
+ Assert(!GistPageIsLeaf(p));
+
gistDeCompressAtt(giststate, r,
it, NULL, (OffsetNumber) 0,
identry, isnull);
+ /* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */
+ result = FirstOffsetNumber;
+
+ /*
+ * The index may have multiple columns, and there's a penalty value for
+ * each column. The penalty associated with a column that appears earlier
+ * in the index definition is strictly more important than the penalty of
+ * a column that appears later in the index definition.
+ *
+ * best_penalty[j] is the best penalty we have seen so far for column j,
+ * or -1 when we haven't yet examined column j. Array entries to the
+ * right of the first -1 are undefined.
+ */
+ best_penalty[0] = -1;
+
+ /*
+ * Loop over tuples on page.
+ */
+ maxoff = PageGetMaxOffsetNumber(p);
Assert(maxoff >= FirstOffsetNumber);
- Assert(!GistPageIsLeaf(p));
- for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i))
+ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
{
- int j;
IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i));
+ bool zero_penalty;
+ int j;
- sum_grow = 0;
+ zero_penalty = true;
+
+ /* Loop over index attributes. */
for (j = 0; j < r->rd_att->natts; j++)
{
Datum datum;
float usize;
bool IsNull;
+ /* Compute penalty for this column. */
datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull);
gistdentryinit(giststate, j, &entry, datum, r, p, i,
FALSE, IsNull);
usize = gistpenalty(giststate, j, &entry, IsNull,
&identry[j], isnull[j]);
+ if (usize > 0)
+ zero_penalty = false;
- if (which_grow[j] < 0 || usize < which_grow[j])
+ if (best_penalty[j] < 0 || usize < best_penalty[j])
+ {
+ /*
+ * New best penalty for column. Tentatively select this tuple
+ * as the target, and record the best penalty. Then reset the
+ * next column's penalty to "unknown" (and indirectly, the
+ * same for all the ones to its right). This will force us to
+ * adopt this tuple's penalty values as the best for all the
+ * remaining columns during subsequent loop iterations.
+ */
+ result = i;
+ best_penalty[j] = usize;
+
+ if (j < r->rd_att->natts - 1)
+ best_penalty[j + 1] = -1;
+ }
+ else if (best_penalty[j] == usize)
{
- which = i;
- which_grow[j] = usize;
- if (j < r->rd_att->natts - 1 && i == FirstOffsetNumber)
- which_grow[j + 1] = -1;
- sum_grow += which_grow[j];
+ /*
+ * The current tuple is exactly as good for this column as the
+ * best tuple seen so far. The next iteration of this loop
+ * will compare the next column.
+ */
}
- else if (which_grow[j] == usize)
- sum_grow += usize;
else
{
- sum_grow = 1;
+ /*
+ * The current tuple is worse for this column than the best
+ * tuple seen so far. Skip the remaining columns and move on
+ * to the next tuple, if any.
+ */
+ zero_penalty = false; /* so outer loop won't exit */
break;
}
}
- }
- if (which == InvalidOffsetNumber)
- which = FirstOffsetNumber;
+ /*
+ * If we find a tuple with zero penalty for all columns, there's no
+ * need to examine remaining tuples; just break out of the loop and
+ * return it.
+ */
+ if (zero_penalty)
+ break;
+ }
- return which;
+ return result;
}
/*