diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2012-08-30 23:47:54 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2012-08-30 23:47:54 -0400 |
commit | 6707dd48cdfcc95934a89df5ad4c4c2ecf0e5225 (patch) | |
tree | 1a060de315f5a2e567448783d57170e3a27adceb /src/backend/access/gist/gistutil.c | |
parent | f6956eb74edcdf21339f9d200ef63c4015a5f4cb (diff) | |
download | postgresql-6707dd48cdfcc95934a89df5ad4c4c2ecf0e5225.tar.gz postgresql-6707dd48cdfcc95934a89df5ad4c4c2ecf0e5225.zip |
Back-patch recent fixes for gistchoose and gistRelocateBuildBuffersOnSplit.
This back-ports commits c8ba697a4bdb934f0c51424c654e8db6133ea255 and
e5db11c5582b469c04a11f217a0f32c827da5dd7, which fix one definite and one
speculative bug in gistchoose, and make the code a lot more intelligible as
well. In 9.2 only, this also affects the largely-copied-and-pasted logic
in gistRelocateBuildBuffersOnSplit.
The impact of the bugs was that the functions might make poor decisions
as to which index tree branch to push a new entry down into, resulting in
GiST index bloat and poor performance. The fixes rectify these decisions
for future insertions, but a REINDEX would be needed to clean up any
existing index bloat.
Alexander Korotkov, Robert Haas, Tom Lane
Diffstat (limited to 'src/backend/access/gist/gistutil.c')
-rw-r--r-- | src/backend/access/gist/gistutil.c | 98 |
1 files changed, 73 insertions, 25 deletions
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index 1754a103699..0ae4176132e 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -365,72 +365,120 @@ gistgetadjusted(Relation r, IndexTuple oldtup, IndexTuple addtup, GISTSTATE *gis } /* - * find entry with lowest penalty + * Search an upper index page for the entry with lowest penalty for insertion + * of the new index key contained in "it". + * + * Returns the index of the page entry to insert into. */ OffsetNumber gistchoose(Relation r, Page p, IndexTuple it, /* it has compressed entry */ GISTSTATE *giststate) { + OffsetNumber result; OffsetNumber maxoff; OffsetNumber i; - OffsetNumber which; - float sum_grow, - which_grow[INDEX_MAX_KEYS]; + float best_penalty[INDEX_MAX_KEYS]; GISTENTRY entry, identry[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; - maxoff = PageGetMaxOffsetNumber(p); - *which_grow = -1.0; - which = InvalidOffsetNumber; - sum_grow = 1; + Assert(!GistPageIsLeaf(p)); + gistDeCompressAtt(giststate, r, it, NULL, (OffsetNumber) 0, identry, isnull); + /* we'll return FirstOffsetNumber if page is empty (shouldn't happen) */ + result = FirstOffsetNumber; + + /* + * The index may have multiple columns, and there's a penalty value for + * each column. The penalty associated with a column that appears earlier + * in the index definition is strictly more important than the penalty of + * a column that appears later in the index definition. + * + * best_penalty[j] is the best penalty we have seen so far for column j, + * or -1 when we haven't yet examined column j. Array entries to the + * right of the first -1 are undefined. + */ + best_penalty[0] = -1; + + /* + * Loop over tuples on page. + */ + maxoff = PageGetMaxOffsetNumber(p); Assert(maxoff >= FirstOffsetNumber); - Assert(!GistPageIsLeaf(p)); - for (i = FirstOffsetNumber; i <= maxoff && sum_grow; i = OffsetNumberNext(i)) + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { - int j; IndexTuple itup = (IndexTuple) PageGetItem(p, PageGetItemId(p, i)); + bool zero_penalty; + int j; - sum_grow = 0; + zero_penalty = true; + + /* Loop over index attributes. */ for (j = 0; j < r->rd_att->natts; j++) { Datum datum; float usize; bool IsNull; + /* Compute penalty for this column. */ datum = index_getattr(itup, j + 1, giststate->tupdesc, &IsNull); gistdentryinit(giststate, j, &entry, datum, r, p, i, FALSE, IsNull); usize = gistpenalty(giststate, j, &entry, IsNull, &identry[j], isnull[j]); + if (usize > 0) + zero_penalty = false; - if (which_grow[j] < 0 || usize < which_grow[j]) + if (best_penalty[j] < 0 || usize < best_penalty[j]) + { + /* + * New best penalty for column. Tentatively select this tuple + * as the target, and record the best penalty. Then reset the + * next column's penalty to "unknown" (and indirectly, the + * same for all the ones to its right). This will force us to + * adopt this tuple's penalty values as the best for all the + * remaining columns during subsequent loop iterations. + */ + result = i; + best_penalty[j] = usize; + + if (j < r->rd_att->natts - 1) + best_penalty[j + 1] = -1; + } + else if (best_penalty[j] == usize) { - which = i; - which_grow[j] = usize; - if (j < r->rd_att->natts - 1 && i == FirstOffsetNumber) - which_grow[j + 1] = -1; - sum_grow += which_grow[j]; + /* + * The current tuple is exactly as good for this column as the + * best tuple seen so far. The next iteration of this loop + * will compare the next column. + */ } - else if (which_grow[j] == usize) - sum_grow += usize; else { - sum_grow = 1; + /* + * The current tuple is worse for this column than the best + * tuple seen so far. Skip the remaining columns and move on + * to the next tuple, if any. + */ + zero_penalty = false; /* so outer loop won't exit */ break; } } - } - if (which == InvalidOffsetNumber) - which = FirstOffsetNumber; + /* + * If we find a tuple with zero penalty for all columns, there's no + * need to examine remaining tuples; just break out of the loop and + * return it. + */ + if (zero_penalty) + break; + } - return which; + return result; } /* |