diff options
-rw-r--r-- | src/backend/storage/smgr/md.c | 96 |
1 files changed, 69 insertions, 27 deletions
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 578276d4fc1..2981b414b53 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -165,9 +165,14 @@ static CycleCtr mdckpt_cycle_ctr = 0; typedef enum /* behavior for mdopen & _mdfd_getseg */ { - EXTENSION_FAIL, /* ereport if segment not present */ - EXTENSION_RETURN_NULL, /* return NULL if not present */ - EXTENSION_CREATE /* create new segments as needed */ + /* ereport if segment not present, create in recovery */ + EXTENSION_FAIL, + /* return NULL if not present, create in recovery */ + EXTENSION_RETURN_NULL, + /* return NULL if not present */ + EXTENSION_REALLY_RETURN_NULL, + /* create new segments as needed */ + EXTENSION_CREATE } ExtensionBehavior; /* local routines */ @@ -591,7 +596,8 @@ mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior) fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600); if (fd < 0) { - if (behavior == EXTENSION_RETURN_NULL && + if ((behavior == EXTENSION_RETURN_NULL || + behavior == EXTENSION_REALLY_RETURN_NULL) && FILE_POSSIBLY_DELETED(errno)) { pfree(path); @@ -685,7 +691,7 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum, segnum_end; v = _mdfd_getseg(reln, forknum, blocknum, false, - EXTENSION_RETURN_NULL); + EXTENSION_REALLY_RETURN_NULL); /* * We might be flushing buffers of already removed relations, that's @@ -1774,7 +1780,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, BlockNumber nextsegno; if (!v) - return NULL; /* only possible if EXTENSION_RETURN_NULL */ + return NULL; /* if EXTENSION_(REALLY_)RETURN_NULL */ targetseg = blkno / ((BlockNumber) RELSEG_SIZE); for (nextsegno = 1; nextsegno <= targetseg; nextsegno++) @@ -1783,23 +1789,34 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, if (v->mdfd_chain == NULL) { - /* - * Normally we will create new segments only if authorized by the - * caller (i.e., we are doing mdextend()). But when doing WAL - * recovery, create segments anyway; this allows cases such as - * replaying WAL data that has a write into a high-numbered - * segment of a relation that was later deleted. We want to go - * ahead and create the segments so we can finish out the replay. - * - * We have to maintain the invariant that segments before the last - * active segment are of size RELSEG_SIZE; therefore, pad them out - * with zeroes if needed. (This only matters if caller is - * extending the relation discontiguously, but that can happen in - * hash indexes.) - */ - if (behavior == EXTENSION_CREATE || InRecovery) + BlockNumber nblocks = _mdnblocks(reln, forknum, v); + int flags = 0; + + if (nblocks > ((BlockNumber) RELSEG_SIZE)) + elog(FATAL, "segment too big"); + + if (behavior == EXTENSION_CREATE || + (InRecovery && behavior != EXTENSION_REALLY_RETURN_NULL)) { - if (_mdnblocks(reln, forknum, v) < RELSEG_SIZE) + /* + * Normally we will create new segments only if authorized by + * the caller (i.e., we are doing mdextend()). But when doing + * WAL recovery, create segments anyway; this allows cases + * such as replaying WAL data that has a write into a + * high-numbered segment of a relation that was later deleted. + * We want to go ahead and create the segments so we can + * finish out the replay. However if the caller has specified + * EXTENSION_REALLY_RETURN_NULL, then extension is not desired + * even in recovery; we won't reach this point in that case. + * + * We have to maintain the invariant that segments before the + * last active segment are of size RELSEG_SIZE; therefore, if + * extending, pad them out with zeroes if needed. (This only + * matters if in recovery, or if the caller is extending the + * relation discontiguously, but that can happen in hash + * indexes.) + */ + if (nblocks < ((BlockNumber) RELSEG_SIZE)) { char *zerobuf = palloc0(BLCKSZ); @@ -1808,16 +1825,41 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, zerobuf, skipFsync); pfree(zerobuf); } - v->mdfd_chain = _mdfd_openseg(reln, forknum, +nextsegno, O_CREAT); + flags = O_CREAT; } - else + else if (nblocks < ((BlockNumber) RELSEG_SIZE)) { - /* We won't create segment if not existent */ - v->mdfd_chain = _mdfd_openseg(reln, forknum, nextsegno, 0); + /* + * When not extending, only open the next segment if the + * current one is exactly RELSEG_SIZE. If not (this branch), + * either return NULL or fail. + */ + if (behavior == EXTENSION_RETURN_NULL || + behavior == EXTENSION_REALLY_RETURN_NULL) + { + /* + * Some callers discern between reasons for _mdfd_getseg() + * returning NULL based on errno. As there's no failing + * syscall involved in this case, explicitly set errno to + * ENOENT, as that seems the closest interpretation. + */ + errno = ENOENT; + return NULL; + } + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open file \"%s\" (target block %u): previous segment is only %u blocks", + _mdfd_segpath(reln, forknum, nextsegno), + blkno, nblocks))); } + + v->mdfd_chain = _mdfd_openseg(reln, forknum, nextsegno, flags); + if (v->mdfd_chain == NULL) { - if (behavior == EXTENSION_RETURN_NULL && + if ((behavior == EXTENSION_RETURN_NULL || + behavior == EXTENSION_REALLY_RETURN_NULL) && FILE_POSSIBLY_DELETED(errno)) return NULL; ereport(ERROR, |