aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/storage/smgr/md.c96
1 files changed, 69 insertions, 27 deletions
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 578276d4fc1..2981b414b53 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -165,9 +165,14 @@ static CycleCtr mdckpt_cycle_ctr = 0;
typedef enum /* behavior for mdopen & _mdfd_getseg */
{
- EXTENSION_FAIL, /* ereport if segment not present */
- EXTENSION_RETURN_NULL, /* return NULL if not present */
- EXTENSION_CREATE /* create new segments as needed */
+ /* ereport if segment not present, create in recovery */
+ EXTENSION_FAIL,
+ /* return NULL if not present, create in recovery */
+ EXTENSION_RETURN_NULL,
+ /* return NULL if not present */
+ EXTENSION_REALLY_RETURN_NULL,
+ /* create new segments as needed */
+ EXTENSION_CREATE
} ExtensionBehavior;
/* local routines */
@@ -591,7 +596,8 @@ mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior)
fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
if (fd < 0)
{
- if (behavior == EXTENSION_RETURN_NULL &&
+ if ((behavior == EXTENSION_RETURN_NULL ||
+ behavior == EXTENSION_REALLY_RETURN_NULL) &&
FILE_POSSIBLY_DELETED(errno))
{
pfree(path);
@@ -685,7 +691,7 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum,
segnum_end;
v = _mdfd_getseg(reln, forknum, blocknum, false,
- EXTENSION_RETURN_NULL);
+ EXTENSION_REALLY_RETURN_NULL);
/*
* We might be flushing buffers of already removed relations, that's
@@ -1774,7 +1780,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
BlockNumber nextsegno;
if (!v)
- return NULL; /* only possible if EXTENSION_RETURN_NULL */
+ return NULL; /* if EXTENSION_(REALLY_)RETURN_NULL */
targetseg = blkno / ((BlockNumber) RELSEG_SIZE);
for (nextsegno = 1; nextsegno <= targetseg; nextsegno++)
@@ -1783,23 +1789,34 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
if (v->mdfd_chain == NULL)
{
- /*
- * Normally we will create new segments only if authorized by the
- * caller (i.e., we are doing mdextend()). But when doing WAL
- * recovery, create segments anyway; this allows cases such as
- * replaying WAL data that has a write into a high-numbered
- * segment of a relation that was later deleted. We want to go
- * ahead and create the segments so we can finish out the replay.
- *
- * We have to maintain the invariant that segments before the last
- * active segment are of size RELSEG_SIZE; therefore, pad them out
- * with zeroes if needed. (This only matters if caller is
- * extending the relation discontiguously, but that can happen in
- * hash indexes.)
- */
- if (behavior == EXTENSION_CREATE || InRecovery)
+ BlockNumber nblocks = _mdnblocks(reln, forknum, v);
+ int flags = 0;
+
+ if (nblocks > ((BlockNumber) RELSEG_SIZE))
+ elog(FATAL, "segment too big");
+
+ if (behavior == EXTENSION_CREATE ||
+ (InRecovery && behavior != EXTENSION_REALLY_RETURN_NULL))
{
- if (_mdnblocks(reln, forknum, v) < RELSEG_SIZE)
+ /*
+ * Normally we will create new segments only if authorized by
+ * the caller (i.e., we are doing mdextend()). But when doing
+ * WAL recovery, create segments anyway; this allows cases
+ * such as replaying WAL data that has a write into a
+ * high-numbered segment of a relation that was later deleted.
+ * We want to go ahead and create the segments so we can
+ * finish out the replay. However if the caller has specified
+ * EXTENSION_REALLY_RETURN_NULL, then extension is not desired
+ * even in recovery; we won't reach this point in that case.
+ *
+ * We have to maintain the invariant that segments before the
+ * last active segment are of size RELSEG_SIZE; therefore, if
+ * extending, pad them out with zeroes if needed. (This only
+ * matters if in recovery, or if the caller is extending the
+ * relation discontiguously, but that can happen in hash
+ * indexes.)
+ */
+ if (nblocks < ((BlockNumber) RELSEG_SIZE))
{
char *zerobuf = palloc0(BLCKSZ);
@@ -1808,16 +1825,41 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
zerobuf, skipFsync);
pfree(zerobuf);
}
- v->mdfd_chain = _mdfd_openseg(reln, forknum, +nextsegno, O_CREAT);
+ flags = O_CREAT;
}
- else
+ else if (nblocks < ((BlockNumber) RELSEG_SIZE))
{
- /* We won't create segment if not existent */
- v->mdfd_chain = _mdfd_openseg(reln, forknum, nextsegno, 0);
+ /*
+ * When not extending, only open the next segment if the
+ * current one is exactly RELSEG_SIZE. If not (this branch),
+ * either return NULL or fail.
+ */
+ if (behavior == EXTENSION_RETURN_NULL ||
+ behavior == EXTENSION_REALLY_RETURN_NULL)
+ {
+ /*
+ * Some callers discern between reasons for _mdfd_getseg()
+ * returning NULL based on errno. As there's no failing
+ * syscall involved in this case, explicitly set errno to
+ * ENOENT, as that seems the closest interpretation.
+ */
+ errno = ENOENT;
+ return NULL;
+ }
+
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\" (target block %u): previous segment is only %u blocks",
+ _mdfd_segpath(reln, forknum, nextsegno),
+ blkno, nblocks)));
}
+
+ v->mdfd_chain = _mdfd_openseg(reln, forknum, nextsegno, flags);
+
if (v->mdfd_chain == NULL)
{
- if (behavior == EXTENSION_RETURN_NULL &&
+ if ((behavior == EXTENSION_RETURN_NULL ||
+ behavior == EXTENSION_REALLY_RETURN_NULL) &&
FILE_POSSIBLY_DELETED(errno))
return NULL;
ereport(ERROR,