aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/executor/nodeBitmapHeapscan.c96
-rw-r--r--src/backend/storage/buffer/bufmgr.c59
-rw-r--r--src/backend/storage/buffer/localbuf.c39
-rw-r--r--src/backend/storage/file/fd.c38
-rw-r--r--src/backend/storage/smgr/md.c22
-rw-r--r--src/backend/storage/smgr/smgr.c15
-rw-r--r--src/backend/utils/misc/guc.c76
-rw-r--r--src/backend/utils/misc/postgresql.conf.sample4
-rw-r--r--src/include/nodes/execnodes.h8
-rw-r--r--src/include/pg_config_manual.h11
-rw-r--r--src/include/storage/buf_internals.h6
-rw-r--r--src/include/storage/bufmgr.h5
-rw-r--r--src/include/storage/fd.h3
-rw-r--r--src/include/storage/smgr.h6
14 files changed, 372 insertions, 16 deletions
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index 880b9c9590e..2ba8b89ee35 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -21,7 +21,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.32 2009/01/10 21:08:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.33 2009/01/12 05:10:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -67,6 +67,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
TIDBitmap *tbm;
TBMIterator *tbmiterator;
TBMIterateResult *tbmres;
+ TBMIterator *prefetch_iterator;
OffsetNumber targoffset;
TupleTableSlot *slot;
@@ -81,6 +82,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
tbm = node->tbm;
tbmiterator = node->tbmiterator;
tbmres = node->tbmres;
+ prefetch_iterator = node->prefetch_iterator;
/*
* Check if we are evaluating PlanQual for tuple of this relation.
@@ -114,6 +116,15 @@ BitmapHeapNext(BitmapHeapScanState *node)
/*
* If we haven't yet performed the underlying index scan, do it, and
* begin the iteration over the bitmap.
+ *
+ * For prefetching, we use *two* iterators, one for the pages we are
+ * actually scanning and another that runs ahead of the first for
+ * prefetching. node->prefetch_pages tracks exactly how many pages
+ * ahead the prefetch iterator is. Also, node->prefetch_target tracks
+ * the desired prefetch distance, which starts small and increases up
+ * to the GUC-controlled maximum, target_prefetch_pages. This is to
+ * avoid doing a lot of prefetching in a scan that stops after a few
+ * tuples because of a LIMIT.
*/
if (tbm == NULL)
{
@@ -125,6 +136,15 @@ BitmapHeapNext(BitmapHeapScanState *node)
node->tbm = tbm;
node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
node->tbmres = tbmres = NULL;
+
+#ifdef USE_PREFETCH
+ if (target_prefetch_pages > 0)
+ {
+ node->prefetch_iterator = prefetch_iterator = tbm_begin_iterate(tbm);
+ node->prefetch_pages = 0;
+ node->prefetch_target = -1;
+ }
+#endif /* USE_PREFETCH */
}
for (;;)
@@ -144,6 +164,22 @@ BitmapHeapNext(BitmapHeapScanState *node)
break;
}
+#ifdef USE_PREFETCH
+ if (node->prefetch_pages > 0)
+ {
+ /* The main iterator has closed the distance by one page */
+ node->prefetch_pages--;
+ }
+ else if (prefetch_iterator)
+ {
+ /* Do not let the prefetch iterator get behind the main one */
+ TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
+
+ if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
+ elog(ERROR, "prefetch and main iterators are out of sync");
+ }
+#endif /* USE_PREFETCH */
+
/*
* Ignore any claimed entries past what we think is the end of the
* relation. (This is probably not necessary given that we got at
@@ -165,6 +201,23 @@ BitmapHeapNext(BitmapHeapScanState *node)
* Set rs_cindex to first slot to examine
*/
scan->rs_cindex = 0;
+
+#ifdef USE_PREFETCH
+ /*
+ * Increase prefetch target if it's not yet at the max. Note
+ * that we will increase it to zero after fetching the very
+ * first page/tuple, then to one after the second tuple is
+ * fetched, then it doubles as later pages are fetched.
+ */
+ if (node->prefetch_target >= target_prefetch_pages)
+ /* don't increase any further */ ;
+ else if (node->prefetch_target >= target_prefetch_pages / 2)
+ node->prefetch_target = target_prefetch_pages;
+ else if (node->prefetch_target > 0)
+ node->prefetch_target *= 2;
+ else
+ node->prefetch_target++;
+#endif /* USE_PREFETCH */
}
else
{
@@ -172,7 +225,40 @@ BitmapHeapNext(BitmapHeapScanState *node)
* Continuing in previously obtained page; advance rs_cindex
*/
scan->rs_cindex++;
+
+#ifdef USE_PREFETCH
+ /*
+ * Try to prefetch at least a few pages even before we get to the
+ * second page if we don't stop reading after the first tuple.
+ */
+ if (node->prefetch_target < target_prefetch_pages)
+ node->prefetch_target++;
+#endif /* USE_PREFETCH */
+ }
+
+#ifdef USE_PREFETCH
+ /*
+ * We issue prefetch requests *after* fetching the current page
+ * to try to avoid having prefetching interfere with the main I/O.
+ */
+ if (prefetch_iterator)
+ {
+ while (node->prefetch_pages < node->prefetch_target)
+ {
+ TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
+
+ if (tbmpre == NULL)
+ {
+ /* No more pages to prefetch */
+ tbm_end_iterate(prefetch_iterator);
+ node->prefetch_iterator = prefetch_iterator = NULL;
+ break;
+ }
+ node->prefetch_pages++;
+ PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
+ }
}
+#endif /* USE_PREFETCH */
/*
* Out of range? If so, nothing more to look at on this page
@@ -379,11 +465,14 @@ ExecBitmapHeapReScan(BitmapHeapScanState *node, ExprContext *exprCtxt)
if (node->tbmiterator)
tbm_end_iterate(node->tbmiterator);
+ if (node->prefetch_iterator)
+ tbm_end_iterate(node->prefetch_iterator);
if (node->tbm)
tbm_free(node->tbm);
node->tbm = NULL;
node->tbmiterator = NULL;
node->tbmres = NULL;
+ node->prefetch_iterator = NULL;
/*
* Always rescan the input immediately, to ensure we can pass down any
@@ -429,6 +518,8 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
*/
if (node->tbmiterator)
tbm_end_iterate(node->tbmiterator);
+ if (node->prefetch_iterator)
+ tbm_end_iterate(node->prefetch_iterator);
if (node->tbm)
tbm_free(node->tbm);
@@ -474,6 +565,9 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
scanstate->tbm = NULL;
scanstate->tbmiterator = NULL;
scanstate->tbmres = NULL;
+ scanstate->prefetch_iterator = NULL;
+ scanstate->prefetch_pages = 0;
+ scanstate->prefetch_target = 0;
/*
* Miscellaneous initialization
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 6046f6ef6aa..534c7516f78 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.244 2009/01/01 17:23:47 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.245 2009/01/12 05:10:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -65,6 +65,13 @@ bool zero_damaged_pages = false;
int bgwriter_lru_maxpages = 100;
double bgwriter_lru_multiplier = 2.0;
+/*
+ * How many buffers PrefetchBuffer callers should try to stay ahead of their
+ * ReadBuffer calls by. This is maintained by the assign hook for
+ * effective_io_concurrency. Zero means "never prefetch".
+ */
+int target_prefetch_pages = 0;
+
/* local state for StartBufferIO and related functions */
static volatile BufferDesc *InProgressBuf = NULL;
static bool IsForInput;
@@ -96,6 +103,56 @@ static void AtProcExit_Buffers(int code, Datum arg);
/*
+ * PrefetchBuffer -- initiate asynchronous read of a block of a relation
+ *
+ * This is named by analogy to ReadBuffer but doesn't actually allocate a
+ * buffer. Instead it tries to ensure that a future ReadBuffer for the given
+ * block will not be delayed by the I/O. Prefetching is optional.
+ * No-op if prefetching isn't compiled in.
+ */
+void
+PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
+{
+#ifdef USE_PREFETCH
+ Assert(RelationIsValid(reln));
+ Assert(BlockNumberIsValid(blockNum));
+
+ /* Open it at the smgr level if not already done */
+ RelationOpenSmgr(reln);
+
+ if (reln->rd_istemp)
+ {
+ /* pass it off to localbuf.c */
+ LocalPrefetchBuffer(reln->rd_smgr, forkNum, blockNum);
+ }
+ else
+ {
+ BufferTag newTag; /* identity of requested block */
+ uint32 newHash; /* hash value for newTag */
+ LWLockId newPartitionLock; /* buffer partition lock for it */
+ int buf_id;
+
+ /* create a tag so we can lookup the buffer */
+ INIT_BUFFERTAG(newTag, reln->rd_smgr->smgr_rnode, forkNum, blockNum);
+
+ /* determine its hash code and partition lock ID */
+ newHash = BufTableHashCode(&newTag);
+ newPartitionLock = BufMappingPartitionLock(newHash);
+
+ /* see if the block is in the buffer pool already */
+ LWLockAcquire(newPartitionLock, LW_SHARED);
+ buf_id = BufTableLookup(&newTag, newHash);
+ LWLockRelease(newPartitionLock);
+
+ /* If not in buffers, initiate prefetch */
+ if (buf_id < 0)
+ smgrprefetch(reln->rd_smgr, forkNum, blockNum);
+ }
+#endif /* USE_PREFETCH */
+}
+
+
+/*
* ReadBuffer -- a shorthand for ReadBufferExtended, for reading from main
* fork with RBM_NORMAL mode and default strategy.
*/
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 4dd5619f39f..5431419cfe6 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.85 2009/01/01 17:23:47 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.86 2009/01/12 05:10:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -53,6 +53,43 @@ static Block GetLocalBufferStorage(void);
/*
+ * LocalPrefetchBuffer -
+ * initiate asynchronous read of a block of a relation
+ *
+ * Do PrefetchBuffer's work for temporary relations.
+ * No-op if prefetching isn't compiled in.
+ */
+void
+LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum,
+ BlockNumber blockNum)
+{
+#ifdef USE_PREFETCH
+ BufferTag newTag; /* identity of requested block */
+ LocalBufferLookupEnt *hresult;
+
+ INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum);
+
+ /* Initialize local buffers if first request in this session */
+ if (LocalBufHash == NULL)
+ InitLocalBuffers();
+
+ /* See if the desired buffer already exists */
+ hresult = (LocalBufferLookupEnt *)
+ hash_search(LocalBufHash, (void *) &newTag, HASH_FIND, NULL);
+
+ if (hresult)
+ {
+ /* Yes, so nothing to do */
+ return;
+ }
+
+ /* Not in buffers, so initiate prefetch */
+ smgrprefetch(smgr, forkNum, blockNum);
+#endif /* USE_PREFETCH */
+}
+
+
+/*
* LocalBufferAlloc -
* Find or create a local buffer for the given page of the given relation.
*
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index f67ab94fd52..b91946a0350 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.146 2009/01/01 17:23:47 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.147 2009/01/12 05:10:44 tgl Exp $
*
* NOTES:
*
@@ -1029,6 +1029,42 @@ FileClose(File file)
FreeVfd(file);
}
+/*
+ * FilePrefetch - initiate asynchronous read of a given range of the file.
+ * The logical seek position is unaffected.
+ *
+ * Currently the only implementation of this function is using posix_fadvise
+ * which is the simplest standardized interface that accomplishes this.
+ * We could add an implementation using libaio in the future; but note that
+ * this API is inappropriate for libaio, which wants to have a buffer provided
+ * to read into.
+ */
+int
+FilePrefetch(File file, off_t offset, int amount)
+{
+#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_WILLNEED)
+ int returnCode;
+
+ Assert(FileIsValid(file));
+
+ DO_DB(elog(LOG, "FilePrefetch: %d (%s) " INT64_FORMAT " %d",
+ file, VfdCache[file].fileName,
+ (int64) offset, amount));
+
+ returnCode = FileAccess(file);
+ if (returnCode < 0)
+ return returnCode;
+
+ returnCode = posix_fadvise(VfdCache[file].fd, offset, amount,
+ POSIX_FADV_WILLNEED);
+
+ return returnCode;
+#else
+ Assert(FileIsValid(file));
+ return 0;
+#endif
+}
+
int
FileRead(File file, char *buffer, int amount)
{
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index b9c1273702f..643c75e538b 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.143 2009/01/01 17:23:48 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.144 2009/01/12 05:10:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -551,6 +551,26 @@ mdclose(SMgrRelation reln, ForkNumber forknum)
}
/*
+ * mdprefetch() -- Initiate asynchronous read of the specified block of a relation
+ */
+void
+mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
+{
+#ifdef USE_PREFETCH
+ off_t seekpos;
+ MdfdVec *v;
+
+ v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL);
+
+ seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+ Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+
+ (void) FilePrefetch(v->mdfd_vfd, seekpos, BLCKSZ);
+#endif /* USE_PREFETCH */
+}
+
+
+/*
* mdread() -- Read the specified block from a relation.
*/
void
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 6ed91bd96ff..f2cc449f175 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.115 2009/01/01 17:23:48 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.116 2009/01/12 05:10:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -48,6 +48,8 @@ typedef struct f_smgr
bool isRedo);
void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool isTemp);
+ void (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
+ BlockNumber blocknum);
void (*smgr_read) (SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer);
void (*smgr_write) (SMgrRelation reln, ForkNumber forknum,
@@ -65,7 +67,7 @@ typedef struct f_smgr
static const f_smgr smgrsw[] = {
/* magnetic disk */
{mdinit, NULL, mdclose, mdcreate, mdexists, mdunlink, mdextend,
- mdread, mdwrite, mdnblocks, mdtruncate, mdimmedsync,
+ mdprefetch, mdread, mdwrite, mdnblocks, mdtruncate, mdimmedsync,
mdpreckpt, mdsync, mdpostckpt
}
};
@@ -376,6 +378,15 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
}
/*
+ * smgrprefetch() -- Initiate asynchronous read of the specified block of a relation.
+ */
+void
+smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
+{
+ (*(smgrsw[reln->smgr_which].smgr_prefetch)) (reln, forknum, blocknum);
+}
+
+/*
* smgrread() -- read a particular block from a relation into the supplied
* buffer.
*
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 8d927ae1387..63e9628a5dc 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -10,7 +10,7 @@
* Written by Peter Eisentraut <peter_e@gmx.net>.
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.492 2009/01/09 10:13:18 mha Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.493 2009/01/12 05:10:44 tgl Exp $
*
*--------------------------------------------------------------------
*/
@@ -18,6 +18,7 @@
#include <ctype.h>
#include <float.h>
+#include <math.h>
#include <limits.h>
#include <unistd.h>
#include <sys/stat.h>
@@ -163,8 +164,9 @@ static bool assign_tcp_keepalives_count(int newval, bool doit, GucSource source)
static const char *show_tcp_keepalives_idle(void);
static const char *show_tcp_keepalives_interval(void);
static const char *show_tcp_keepalives_count(void);
-static bool assign_autovacuum_max_workers(int newval, bool doit, GucSource source);
static bool assign_maxconnections(int newval, bool doit, GucSource source);
+static bool assign_autovacuum_max_workers(int newval, bool doit, GucSource source);
+static bool assign_effective_io_concurrency(int newval, bool doit, GucSource source);
static const char *assign_pgstat_temp_directory(const char *newval, bool doit, GucSource source);
static char *config_enum_get_options(struct config_enum *record,
@@ -413,6 +415,7 @@ static int segment_size;
static int wal_block_size;
static int wal_segment_size;
static bool integer_datetimes;
+static int effective_io_concurrency;
/* should be static, but commands/variable.c needs to get at these */
char *role_string;
@@ -1701,6 +1704,20 @@ static struct config_int ConfigureNamesInt[] =
},
{
+ {"effective_io_concurrency", PGC_USERSET, RESOURCES,
+ gettext_noop("Number of simultaneous requests that can be handled efficiently by the disk subsystem."),
+ gettext_noop("For RAID arrays, this should be approximately the number of drive spindles in the array.")
+ },
+ &effective_io_concurrency,
+#ifdef USE_PREFETCH
+ 1, 0, 1000,
+#else
+ 0, 0, 0,
+#endif
+ assign_effective_io_concurrency, NULL
+ },
+
+ {
{"log_rotation_age", PGC_SIGHUP, LOGGING_WHERE,
gettext_noop("Automatic log file rotation will occur after N minutes."),
NULL,
@@ -7587,6 +7604,61 @@ assign_autovacuum_max_workers(int newval, bool doit, GucSource source)
return true;
}
+static bool
+assign_effective_io_concurrency(int newval, bool doit, GucSource source)
+{
+#ifdef USE_PREFETCH
+ double new_prefetch_pages = 0.0;
+ int i;
+
+ /*----------
+ * The user-visible GUC parameter is the number of drives (spindles),
+ * which we need to translate to a number-of-pages-to-prefetch target.
+ *
+ * The expected number of prefetch pages needed to keep N drives busy is:
+ *
+ * drives | I/O requests
+ * -------+----------------
+ * 1 | 1
+ * 2 | 2/1 + 2/2 = 3
+ * 3 | 3/1 + 3/2 + 3/3 = 5 1/2
+ * 4 | 4/1 + 4/2 + 4/3 + 4/4 = 8 1/3
+ * n | n * H(n)
+ *
+ * This is called the "coupon collector problem" and H(n) is called the
+ * harmonic series. This could be approximated by n * ln(n), but for
+ * reasonable numbers of drives we might as well just compute the series.
+ *
+ * Alternatively we could set the target to the number of pages necessary
+ * so that the expected number of active spindles is some arbitrary
+ * percentage of the total. This sounds the same but is actually slightly
+ * different. The result ends up being ln(1-P)/ln((n-1)/n) where P is
+ * that desired fraction.
+ *
+ * Experimental results show that both of these formulas aren't aggressive
+ * enough, but we don't really have any better proposals.
+ *
+ * Note that if newval = 0 (disabled), we must set target = 0.
+ *----------
+ */
+
+ for (i = 1; i <= newval; i++)
+ new_prefetch_pages += (double) newval / (double) i;
+
+ /* This range check shouldn't fail, but let's be paranoid */
+ if (new_prefetch_pages >= 0.0 && new_prefetch_pages < (double) INT_MAX)
+ {
+ if (doit)
+ target_prefetch_pages = (int) rint(new_prefetch_pages);
+ return true;
+ }
+ else
+ return false;
+#else
+ return true;
+#endif /* USE_PREFETCH */
+}
+
static const char *
assign_pgstat_temp_directory(const char *newval, bool doit, GucSource source)
{
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index ffa5055b768..977e13e0aff 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -131,6 +131,10 @@
#bgwriter_lru_maxpages = 100 # 0-1000 max buffers written/round
#bgwriter_lru_multiplier = 2.0 # 0-10.0 multipler on buffers scanned/round
+# - Asynchronous Behavior -
+
+#effective_io_concurrency = 1 # 1-1000, or 0 to disable prefetching
+
#------------------------------------------------------------------------------
# WRITE AHEAD LOG
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 506605df001..8d87ec19e1d 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.200 2009/01/10 21:08:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.201 2009/01/12 05:10:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1154,6 +1154,9 @@ typedef struct BitmapIndexScanState
* tbm bitmap obtained from child index scan(s)
* tbmiterator iterator for scanning current pages
* tbmres current-page data
+ * prefetch_iterator iterator for prefetching ahead of current page
+ * prefetch_pages # pages prefetch iterator is ahead of current
+ * prefetch_target target prefetch distance
* ----------------
*/
typedef struct BitmapHeapScanState
@@ -1163,6 +1166,9 @@ typedef struct BitmapHeapScanState
TIDBitmap *tbm;
TBMIterator *tbmiterator;
TBMIterateResult *tbmres;
+ TBMIterator *prefetch_iterator;
+ int prefetch_pages;
+ int prefetch_target;
} BitmapHeapScanState;
/* ----------------
diff --git a/src/include/pg_config_manual.h b/src/include/pg_config_manual.h
index ff9d6ce45de..bc66df2eb34 100644
--- a/src/include/pg_config_manual.h
+++ b/src/include/pg_config_manual.h
@@ -6,7 +6,7 @@
* for developers. If you edit any of these, be sure to do a *full*
* rebuild (and an initdb if noted).
*
- * $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.36 2009/01/11 18:02:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/pg_config_manual.h,v 1.37 2009/01/12 05:10:45 tgl Exp $
*------------------------------------------------------------------------
*/
@@ -136,6 +136,15 @@
#endif
/*
+ * USE_PREFETCH code should be compiled only if we have a way to implement
+ * prefetching. (This is decoupled from USE_POSIX_FADVISE because there
+ * might in future be support for alternative low-level prefetch APIs.)
+ */
+#ifdef USE_POSIX_FADVISE
+#define USE_PREFETCH
+#endif
+
+/*
* This is the default directory in which AF_UNIX socket files are
* placed. Caution: changing this risks breaking your existing client
* applications, which are likely to continue to look in the old
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 9ec9fcb98a5..12512d7428b 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.100 2009/01/01 17:24:01 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.101 2009/01/12 05:10:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -208,7 +208,9 @@ extern int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
/* localbuf.c */
-extern BufferDesc *LocalBufferAlloc(SMgrRelation reln, ForkNumber forkNum,
+extern void LocalPrefetchBuffer(SMgrRelation smgr, ForkNumber forkNum,
+ BlockNumber blockNum);
+extern BufferDesc *LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
BlockNumber blockNum, bool *foundPtr);
extern void MarkLocalBufferDirty(Buffer buffer);
extern void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 9d1f47d58a2..0ee09ced6d2 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.119 2009/01/01 17:24:01 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.120 2009/01/12 05:10:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -47,6 +47,7 @@ extern PGDLLIMPORT int NBuffers;
extern bool zero_damaged_pages;
extern int bgwriter_lru_maxpages;
extern double bgwriter_lru_multiplier;
+extern int target_prefetch_pages;
/* in buf_init.c */
extern PGDLLIMPORT char *BufferBlocks;
@@ -152,6 +153,8 @@ extern PGDLLIMPORT int32 *LocalRefCount;
/*
* prototypes for functions in bufmgr.c
*/
+extern void PrefetchBuffer(Relation reln, ForkNumber forkNum,
+ BlockNumber blockNum);
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
BlockNumber blockNum, ReadBufferMode mode,
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 17aa150aa03..98d091c9787 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.63 2009/01/01 17:24:01 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.64 2009/01/12 05:10:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -62,6 +62,7 @@ extern int max_files_per_process;
extern File PathNameOpenFile(FileName fileName, int fileFlags, int fileMode);
extern File OpenTemporaryFile(bool interXact);
extern void FileClose(File file);
+extern int FilePrefetch(File file, off_t offset, int amount);
extern int FileRead(File file, char *buffer, int amount);
extern int FileWrite(File file, char *buffer, int amount);
extern int FileSync(File file);
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 0392fdf81a3..e753af76dde 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.65 2009/01/01 17:24:01 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.66 2009/01/12 05:10:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -70,6 +70,8 @@ extern void smgrdounlink(SMgrRelation reln, ForkNumber forknum,
bool isTemp, bool isRedo);
extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool isTemp);
+extern void smgrprefetch(SMgrRelation reln, ForkNumber forknum,
+ BlockNumber blocknum);
extern void smgrread(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer);
extern void smgrwrite(SMgrRelation reln, ForkNumber forknum,
@@ -93,6 +95,8 @@ extern bool mdexists(SMgrRelation reln, ForkNumber forknum);
extern void mdunlink(RelFileNode rnode, ForkNumber forknum, bool isRedo);
extern void mdextend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool isTemp);
+extern void mdprefetch(SMgrRelation reln, ForkNumber forknum,
+ BlockNumber blocknum);
extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer);
extern void mdwrite(SMgrRelation reln, ForkNumber forknum,