diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2009-03-24 22:06:03 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2009-03-24 22:06:03 +0000 |
commit | e5efda442cfcfa20a0cf6166952c671a0f39689b (patch) | |
tree | 5b2463555c3b06d69511b0747c4bcb59c4309bb5 | |
parent | fc022d72c79b83dfe17aad33b6de2cf0a4f6f3ba (diff) | |
download | postgresql-e5efda442cfcfa20a0cf6166952c671a0f39689b.tar.gz postgresql-e5efda442cfcfa20a0cf6166952c671a0f39689b.zip |
Install a search tree depth limit in GIN bulk-insert operations, to prevent
them from degrading badly when the input is sorted or nearly so. In this
scenario the tree is unbalanced to the point of becoming a mere linked list,
so insertions become O(N^2). The easiest and most safely back-patchable
solution is to stop growing the tree sooner, ie limit the growth of N. We
might later consider a rebalancing tree algorithm, but it's not clear that
the benefit would be worth the cost and complexity. Per report from Sergey
Burladyan and an earlier complaint from Heikki.
Back-patch to 8.2; older versions didn't have GIN indexes.
-rw-r--r-- | src/backend/access/gin/ginfast.c | 9 | ||||
-rw-r--r-- | src/backend/access/gin/gininsert.c | 6 | ||||
-rw-r--r-- | src/include/access/gin.h | 17 |
3 files changed, 20 insertions, 12 deletions
diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index d8624237ec1..f474ad6598e 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginfast.c,v 1.1 2009/03/24 20:17:10 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginfast.c,v 1.2 2009/03/24 22:06:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -749,9 +749,10 @@ ginInsertCleanup(Relation index, GinState *ginstate, * XXX using up maintenance_work_mem here is probably unreasonably * much, since vacuum might already be using that much. */ - if ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber || - ( GinPageHasFullRow(page) && - accum.allocatedMemory > maintenance_work_mem * 1024L ) ) + if (GinPageGetOpaque(page)->rightlink == InvalidBlockNumber || + (GinPageHasFullRow(page) && + (accum.allocatedMemory >= maintenance_work_mem * 1024L || + accum.maxdepth > GIN_MAX_TREE_DEPTH))) { ItemPointerData *list; uint32 nlist; diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index d05882cdb94..f6a348eb85d 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.19 2009/03/24 20:17:11 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.20 2009/03/24 22:06:03 tgl Exp $ *------------------------------------------------------------------------- */ @@ -245,7 +245,9 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values, &htup->t_self); /* If we've maxed out our available memory, dump everything to the index */ - if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L) + /* Also dump if the tree seems to be getting too unbalanced */ + if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L || + buildstate->accum.maxdepth > GIN_MAX_TREE_DEPTH) { ItemPointerData *list; Datum entry; diff --git a/src/include/access/gin.h b/src/include/access/gin.h index f0f45bc5e8a..c591c53638c 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -4,7 +4,7 @@ * * Copyright (c) 2006-2009, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.29 2009/03/24 20:17:14 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.30 2009/03/24 22:06:03 tgl Exp $ *-------------------------------------------------------------------------- */ #ifndef GIN_H @@ -27,6 +27,14 @@ #define GINNProcs 5 /* + * Max depth allowed in search tree during bulk inserts. This is to keep from + * degenerating to O(N^2) behavior when the tree is unbalanced due to sorted + * or nearly-sorted input. (Perhaps it would be better to use a balanced-tree + * algorithm, but in common cases that would only add useless overhead.) + */ +#define GIN_MAX_TREE_DEPTH 100 + +/* * Page opaque data in a inverted index page. * * Note: GIN does not include a page ID word as do the other index types. @@ -434,12 +442,9 @@ extern IndexTuple ginPageGetLinkItup(Buffer buf); /* gindatapage.c */ extern int compareItemPointers(ItemPointer a, ItemPointer b); -extern void -MergeItemPointers( - ItemPointerData *dst, +extern void MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, - ItemPointerData *b, uint32 nb -); + ItemPointerData *b, uint32 nb); extern void GinDataPageAddItem(Page page, void *data, OffsetNumber offset); extern void PageDeletePostingItem(Page page, OffsetNumber offset); |