aboutsummaryrefslogtreecommitdiff
path: root/src/backend/storage/smgr/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/storage/smgr/md.c')
-rw-r--r--src/backend/storage/smgr/md.c108
1 files changed, 108 insertions, 0 deletions
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 352958e1feb..1c2d1405f86 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -501,6 +501,114 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
}
/*
+ * mdzeroextend() -- Add new zeroed out blocks to the specified relation.
+ *
+ * Similar to mdextend(), except the relation can be extended by multiple
+ * blocks at once and the added blocks will be filled with zeroes.
+ */
+void
+mdzeroextend(SMgrRelation reln, ForkNumber forknum,
+ BlockNumber blocknum, int nblocks, bool skipFsync)
+{
+ MdfdVec *v;
+ BlockNumber curblocknum = blocknum;
+ int remblocks = nblocks;
+
+ Assert(nblocks > 0);
+
+ /* This assert is too expensive to have on normally ... */
+#ifdef CHECK_WRITE_VS_EXTEND
+ Assert(blocknum >= mdnblocks(reln, forknum));
+#endif
+
+ /*
+ * If a relation manages to grow to 2^32-1 blocks, refuse to extend it any
+ * more --- we mustn't create a block whose number actually is
+ * InvalidBlockNumber or larger.
+ */
+ if ((uint64) blocknum + nblocks >= (uint64) InvalidBlockNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("cannot extend file \"%s\" beyond %u blocks",
+ relpath(reln->smgr_rlocator, forknum),
+ InvalidBlockNumber)));
+
+ while (remblocks > 0)
+ {
+ BlockNumber segstartblock = curblocknum % ((BlockNumber) RELSEG_SIZE);
+ off_t seekpos = (off_t) BLCKSZ * segstartblock;
+ int numblocks;
+
+ if (segstartblock + remblocks > RELSEG_SIZE)
+ numblocks = RELSEG_SIZE - segstartblock;
+ else
+ numblocks = remblocks;
+
+ v = _mdfd_getseg(reln, forknum, curblocknum, skipFsync, EXTENSION_CREATE);
+
+ Assert(segstartblock < RELSEG_SIZE);
+ Assert(segstartblock + numblocks <= RELSEG_SIZE);
+
+ /*
+ * If available and useful, use posix_fallocate() (via FileAllocate())
+ * to extend the relation. That's often more efficient than using
+ * write(), as it commonly won't cause the kernel to allocate page
+ * cache space for the extended pages.
+ *
+ * However, we don't use FileAllocate() for small extensions, as it
+ * defeats delayed allocation on some filesystems. Not clear where
+ * that decision should be made though? For now just use a cutoff of
+ * 8, anything between 4 and 8 worked OK in some local testing.
+ */
+ if (numblocks > 8)
+ {
+ int ret;
+
+ ret = FileFallocate(v->mdfd_vfd,
+ seekpos, (off_t) BLCKSZ * numblocks,
+ WAIT_EVENT_DATA_FILE_EXTEND);
+ if (ret != 0)
+ {
+ ereport(ERROR,
+ errcode_for_file_access(),
+ errmsg("could not extend file \"%s\" with FileFallocate(): %m",
+ FilePathName(v->mdfd_vfd)),
+ errhint("Check free disk space."));
+ }
+ }
+ else
+ {
+ int ret;
+
+ /*
+ * Even if we don't want to use fallocate, we can still extend a
+ * bit more efficiently than writing each 8kB block individually.
+ * pg_pwrite_zeroes() (via FileZero()) uses
+ * pg_pwritev_with_retry() to avoid multiple writes or needing a
+ * zeroed buffer for the whole length of the extension.
+ */
+ ret = FileZero(v->mdfd_vfd,
+ seekpos, (off_t) BLCKSZ * numblocks,
+ WAIT_EVENT_DATA_FILE_EXTEND);
+ if (ret < 0)
+ ereport(ERROR,
+ errcode_for_file_access(),
+ errmsg("could not extend file \"%s\": %m",
+ FilePathName(v->mdfd_vfd)),
+ errhint("Check free disk space."));
+ }
+
+ if (!skipFsync && !SmgrIsTemp(reln))
+ register_dirty_segment(reln, forknum, v);
+
+ Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
+
+ remblocks -= numblocks;
+ curblocknum += numblocks;
+ }
+}
+
+/*
* mdopenfork() -- Open one fork of the specified relation.
*
* Note we only open the first segment, when there are multiple segments.