aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/gist/gistvacuum.c8
-rw-r--r--src/backend/access/heap/heapam.c16
-rw-r--r--src/backend/access/heap/vacuumlazy.c21
-rw-r--r--src/backend/access/nbtree/nbtree.c8
-rw-r--r--src/backend/access/spgist/spgvacuum.c8
-rw-r--r--src/backend/commands/analyze.c7
-rw-r--r--src/backend/storage/aio/read_stream.c16
-rw-r--r--src/backend/storage/buffer/bufmgr.c8
-rw-r--r--src/include/storage/read_stream.h21
9 files changed, 104 insertions, 9 deletions
diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c
index 20b1bb5dbac..ce9d78d78d6 100644
--- a/src/backend/access/gist/gistvacuum.c
+++ b/src/backend/access/gist/gistvacuum.c
@@ -210,7 +210,13 @@ gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
needLock = !RELATION_IS_LOCAL(rel);
p.current_blocknum = GIST_ROOT_BLKNO;
- stream = read_stream_begin_relation(READ_STREAM_FULL,
+
+ /*
+ * It is safe to use batchmode as block_range_read_stream_cb takes no
+ * locks.
+ */
+ stream = read_stream_begin_relation(READ_STREAM_FULL |
+ READ_STREAM_USE_BATCHING,
info->strategy,
rel,
MAIN_FORKNUM,
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index b12b583c4d9..6e433db039e 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -1206,7 +1206,15 @@ heap_beginscan(Relation relation, Snapshot snapshot,
else
cb = heap_scan_stream_read_next_serial;
- scan->rs_read_stream = read_stream_begin_relation(READ_STREAM_SEQUENTIAL,
+ /* ---
+ * It is safe to use batchmode as the only locks taken by `cb`
+ * are never taken while waiting for IO:
+ * - SyncScanLock is used in the non-parallel case
+ * - in the parallel case, only spinlocks and atomics are used
+ * ---
+ */
+ scan->rs_read_stream = read_stream_begin_relation(READ_STREAM_SEQUENTIAL |
+ READ_STREAM_USE_BATCHING,
scan->rs_strategy,
scan->rs_base.rs_rd,
MAIN_FORKNUM,
@@ -1216,6 +1224,12 @@ heap_beginscan(Relation relation, Snapshot snapshot,
}
else if (scan->rs_base.rs_flags & SO_TYPE_BITMAPSCAN)
{
+ /*
+ * Currently we can't trivially use batching, due to the
+ * VM_ALL_VISIBLE check in bitmapheap_stream_read_next. While that
+ * could be made safe, we are about to remove the all-visible logic
+ * from bitmap scans due to its unsoundness.
+ */
scan->rs_read_stream = read_stream_begin_relation(READ_STREAM_DEFAULT,
scan->rs_strategy,
scan->rs_base.rs_rd,
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 6d287b38cf5..f28326bad09 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -1225,7 +1225,12 @@ lazy_scan_heap(LVRelState *vacrel)
vacrel->next_unskippable_eager_scanned = false;
vacrel->next_unskippable_vmbuffer = InvalidBuffer;
- /* Set up the read stream for vacuum's first pass through the heap */
+ /*
+ * Set up the read stream for vacuum's first pass through the heap.
+ *
+ * This could be made safe for READ_STREAM_USE_BATCHING, but only with
+ * explicit work in heap_vac_scan_next_block.
+ */
stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
vacrel->bstrategy,
vacrel->rel,
@@ -2669,6 +2674,8 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
* Read stream callback for vacuum's third phase (second pass over the heap).
* Gets the next block from the TID store and returns it or InvalidBlockNumber
* if there are no further blocks to vacuum.
+ *
+ * NB: Assumed to be safe to use with READ_STREAM_USE_BATCHING.
*/
static BlockNumber
vacuum_reap_lp_read_stream_next(ReadStream *stream,
@@ -2732,8 +2739,16 @@ lazy_vacuum_heap_rel(LVRelState *vacrel)
iter = TidStoreBeginIterate(vacrel->dead_items);
- /* Set up the read stream for vacuum's second pass through the heap */
- stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
+ /*
+ * Set up the read stream for vacuum's second pass through the heap.
+ *
+ * It is safe to use batchmode, as vacuum_reap_lp_read_stream_next() does
+ * not need to wait for IO and does not perform locking. Once we support
+ * parallelism it should still be fine, as presumably the holder of locks
+ * would never be blocked by IO while holding the lock.
+ */
+ stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
+ READ_STREAM_USE_BATCHING,
vacrel->bstrategy,
vacrel->rel,
MAIN_FORKNUM,
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 80b04d6ca2a..4a0bf069f99 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -1064,7 +1064,13 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
needLock = !RELATION_IS_LOCAL(rel);
p.current_blocknum = BTREE_METAPAGE + 1;
- stream = read_stream_begin_relation(READ_STREAM_FULL,
+
+ /*
+ * It is safe to use batchmode as block_range_read_stream_cb takes no
+ * locks.
+ */
+ stream = read_stream_begin_relation(READ_STREAM_FULL |
+ READ_STREAM_USE_BATCHING,
info->strategy,
rel,
MAIN_FORKNUM,
diff --git a/src/backend/access/spgist/spgvacuum.c b/src/backend/access/spgist/spgvacuum.c
index 77deb226b7e..b3df2d89074 100644
--- a/src/backend/access/spgist/spgvacuum.c
+++ b/src/backend/access/spgist/spgvacuum.c
@@ -822,7 +822,13 @@ spgvacuumscan(spgBulkDeleteState *bds)
/* We can skip locking for new or temp relations */
needLock = !RELATION_IS_LOCAL(index);
p.current_blocknum = SPGIST_METAPAGE_BLKNO + 1;
- stream = read_stream_begin_relation(READ_STREAM_FULL,
+
+ /*
+ * It is safe to use batchmode as block_range_read_stream_cb takes no
+ * locks.
+ */
+ stream = read_stream_begin_relation(READ_STREAM_FULL |
+ READ_STREAM_USE_BATCHING,
bds->info->strategy,
index,
MAIN_FORKNUM,
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index ca76c0d2668..4fffb76e557 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -1237,7 +1237,12 @@ acquire_sample_rows(Relation onerel, int elevel,
scan = table_beginscan_analyze(onerel);
slot = table_slot_create(onerel, NULL);
- stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
+ /*
+ * It is safe to use batching, as block_sampling_read_stream_next never
+ * blocks.
+ */
+ stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE |
+ READ_STREAM_USE_BATCHING,
vac_strategy,
scan->rs_rd,
MAIN_FORKNUM,
diff --git a/src/backend/storage/aio/read_stream.c b/src/backend/storage/aio/read_stream.c
index 26e5dfe77db..36c54fb695b 100644
--- a/src/backend/storage/aio/read_stream.c
+++ b/src/backend/storage/aio/read_stream.c
@@ -102,6 +102,7 @@ struct ReadStream
int16 initialized_buffers;
int read_buffers_flags;
bool sync_mode; /* using io_method=sync */
+ bool batch_mode; /* READ_STREAM_USE_BATCHING */
bool advice_enabled;
bool temporary;
@@ -403,6 +404,15 @@ read_stream_start_pending_read(ReadStream *stream)
static void
read_stream_look_ahead(ReadStream *stream)
{
+ /*
+ * Allow amortizing the cost of submitting IO over multiple IOs. This
+ * requires that we don't do any operations that could lead to a deadlock
+ * with staged-but-unsubmitted IO. The callback needs to opt-in to being
+ * careful.
+ */
+ if (stream->batch_mode)
+ pgaio_enter_batchmode();
+
while (stream->ios_in_progress < stream->max_ios &&
stream->pinned_buffers + stream->pending_read_nblocks < stream->distance)
{
@@ -450,6 +460,8 @@ read_stream_look_ahead(ReadStream *stream)
{
/* We've hit the buffer or I/O limit. Rewind and stop here. */
read_stream_unget_block(stream, blocknum);
+ if (stream->batch_mode)
+ pgaio_exit_batchmode();
return;
}
}
@@ -484,6 +496,9 @@ read_stream_look_ahead(ReadStream *stream)
* time.
*/
Assert(stream->pinned_buffers > 0 || stream->distance == 0);
+
+ if (stream->batch_mode)
+ pgaio_exit_batchmode();
}
/*
@@ -617,6 +632,7 @@ read_stream_begin_impl(int flags,
MAXALIGN(&stream->ios[Max(1, max_ios)]);
stream->sync_mode = io_method == IOMETHOD_SYNC;
+ stream->batch_mode = flags & READ_STREAM_USE_BATCHING;
#ifdef USE_PREFETCH
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 6a18e334809..f9681d09e1e 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -5100,7 +5100,13 @@ RelationCopyStorageUsingBuffer(RelFileLocator srclocator,
p.current_blocknum = 0;
p.last_exclusive = nblocks;
src_smgr = smgropen(srclocator, INVALID_PROC_NUMBER);
- src_stream = read_stream_begin_smgr_relation(READ_STREAM_FULL,
+
+ /*
+ * It is safe to use batchmode as block_range_read_stream_cb takes no
+ * locks.
+ */
+ src_stream = read_stream_begin_smgr_relation(READ_STREAM_FULL |
+ READ_STREAM_USE_BATCHING,
bstrategy_src,
src_smgr,
permanent ? RELPERSISTENCE_PERMANENT : RELPERSISTENCE_UNLOGGED,
diff --git a/src/include/storage/read_stream.h b/src/include/storage/read_stream.h
index c11d8ce3300..9b0d65161d0 100644
--- a/src/include/storage/read_stream.h
+++ b/src/include/storage/read_stream.h
@@ -42,6 +42,27 @@
*/
#define READ_STREAM_FULL 0x04
+/* ---
+ * Opt-in to using AIO batchmode.
+ *
+ * Submitting IO in larger batches can be more efficient than doing so
+ * one-by-one, particularly for many small reads. It does, however, require
+ * the ReadStreamBlockNumberCB callback to abide by the restrictions of AIO
+ * batching (c.f. pgaio_enter_batchmode()). Basically, the callback may not:
+ *
+ * a) block without first calling pgaio_submit_staged(), unless a
+ * to-be-waited-on lock cannot be part of a deadlock, e.g. because it is
+ * never held while waiting for IO.
+ *
+ * b) start another batch (without first exiting batchmode and re-entering
+ * before returning)
+ *
+ * As this requires care and is nontrivial in some cases, batching is only
+ * used with explicit opt-in.
+ * ---
+ */
+#define READ_STREAM_USE_BATCHING 0x08
+
struct ReadStream;
typedef struct ReadStream ReadStream;