diff options
author | Alvaro Herrera <alvherre@alvh.no-ip.org> | 2013-01-17 15:55:10 -0300 |
---|---|---|
committer | Alvaro Herrera <alvherre@alvh.no-ip.org> | 2013-01-17 16:13:17 -0300 |
commit | 279628a0a7cf582f7dfb68e25b7b76183dd8ff2f (patch) | |
tree | 7a9264f50a99eb6d52fcf41d1c6d78e23a07f6bf /src/backend/storage/buffer/bufmgr.c | |
parent | 0b6329130e8e4576e97ff763f0e773347e1a88af (diff) | |
download | postgresql-279628a0a7cf582f7dfb68e25b7b76183dd8ff2f.tar.gz postgresql-279628a0a7cf582f7dfb68e25b7b76183dd8ff2f.zip |
Accelerate end-of-transaction dropping of relations
When relations are dropped, at end of transaction we need to remove the
files and clean the buffer pool of buffers containing pages of those
relations. Previously we would scan the buffer pool once per relation
to clean up buffers. When there are many relations to drop, the
repeated scans make this process slow; so we now instead pass a list of
relations to drop and scan the pool once, checking each buffer against
the passed list. When the number of relations is larger than a
threshold (which as of this patch is being set to 20 relations) we sort
the array before starting, and bsearch the array; when it's smaller, we
simply scan the array linearly each time, because that's faster. The
exact optimal threshold value depends on many factors, but the
difference is not likely to be significant enough to justify making it
user-settable.
This has been measured to be a significant win (a 15x win when dropping
100,000 relations; an extreme case, but reportedly a real one).
Author: Tomas Vondra, some tweaks by me
Reviewed by: Robert Haas, Shigeru Hanada, Andres Freund, Álvaro Herrera
Diffstat (limited to 'src/backend/storage/buffer/bufmgr.c')
-rw-r--r-- | src/backend/storage/buffer/bufmgr.c | 109 |
1 files changed, 99 insertions, 10 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 03ed41dc152..13b80aefc5b 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -62,6 +62,7 @@ #define BUF_WRITTEN 0x01 #define BUF_REUSABLE 0x02 +#define DROP_RELS_BSEARCH_THRESHOLD 20 /* GUC variables */ bool zero_damaged_pages = false; @@ -107,6 +108,7 @@ static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, bool *foundPtr); static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln); static void AtProcExit_Buffers(int code, Datum arg); +static int rnode_comparator(const void *p1, const void *p2); /* @@ -2086,43 +2088,103 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber forkNum, } /* --------------------------------------------------------------------- - * DropRelFileNodeAllBuffers + * DropRelFileNodesAllBuffers * * This function removes from the buffer pool all the pages of all - * forks of the specified relation. It's equivalent to calling - * DropRelFileNodeBuffers once per fork with firstDelBlock = 0. + * forks of the specified relations. It's equivalent to calling + * DropRelFileNodeBuffers once per fork per relation with + * firstDelBlock = 0. * -------------------------------------------------------------------- */ void -DropRelFileNodeAllBuffers(RelFileNodeBackend rnode) +DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes) { - int i; + int i, + n = 0; + RelFileNode *nodes; + bool use_bsearch; + + if (nnodes == 0) + return; + + nodes = palloc(sizeof(RelFileNode) * nnodes); /* non-local relations */ /* If it's a local relation, it's localbuf.c's problem. */ - if (RelFileNodeBackendIsTemp(rnode)) + for (i = 0; i < nnodes; i++) { - if (rnode.backend == MyBackendId) - DropRelFileNodeAllLocalBuffers(rnode.node); + if (RelFileNodeBackendIsTemp(rnodes[i])) + { + if (rnodes[i].backend == MyBackendId) + DropRelFileNodeAllLocalBuffers(rnodes[i].node); + } + else + nodes[n++] = rnodes[i].node; + } + + /* + * If there are no non-local relations, then we're done. Release the memory + * and return. + */ + if (n == 0) + { + pfree(nodes); return; } + /* + * For low number of relations to drop just use a simple walk through, to + * save the bsearch overhead. The threshold to use is rather a guess than a + * exactly determined value, as it depends on many factors (CPU and RAM + * speeds, amount of shared buffers etc.). + */ + use_bsearch = n > DROP_RELS_BSEARCH_THRESHOLD; + + /* sort the list of rnodes if necessary */ + if (use_bsearch) + pg_qsort(nodes, n, sizeof(RelFileNode), rnode_comparator); + for (i = 0; i < NBuffers; i++) { + RelFileNode *rnode = NULL; volatile BufferDesc *bufHdr = &BufferDescriptors[i]; /* * As in DropRelFileNodeBuffers, an unlocked precheck should be safe * and saves some cycles. */ - if (!RelFileNodeEquals(bufHdr->tag.rnode, rnode.node)) + + if (!use_bsearch) + { + int j; + + for (j = 0; j < n; j++) + { + if (RelFileNodeEquals(bufHdr->tag.rnode, nodes[j])) + { + rnode = &nodes[j]; + break; + } + } + } + else + { + rnode = bsearch((const void *) &(bufHdr->tag.rnode), + nodes, n, sizeof(RelFileNode), + rnode_comparator); + } + + /* buffer doesn't belong to any of the given relfilenodes; skip it */ + if (rnode == NULL) continue; LockBufHdr(bufHdr); - if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node)) + if (RelFileNodeEquals(bufHdr->tag.rnode, (*rnode))) InvalidateBuffer(bufHdr); /* releases spinlock */ else UnlockBufHdr(bufHdr); } + + pfree(nodes); } /* --------------------------------------------------------------------- @@ -2953,3 +3015,30 @@ local_buffer_write_error_callback(void *arg) pfree(path); } } + +/* + * RelFileNode qsort/bsearch comparator; see RelFileNodeEquals. + */ +static int +rnode_comparator(const void *p1, const void *p2) +{ + RelFileNode n1 = *(RelFileNode *) p1; + RelFileNode n2 = *(RelFileNode *) p2; + + if (n1.relNode < n2.relNode) + return -1; + else if (n1.relNode > n2.relNode) + return 1; + + if (n1.dbNode < n2.dbNode) + return -1; + else if (n1.dbNode > n2.dbNode) + return 1; + + if (n1.spcNode < n2.spcNode) + return -1; + else if (n1.spcNode > n2.spcNode) + return 1; + else + return 0; +} |