aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/transam/xlog.c
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2022-09-27 13:25:21 -0400
committerRobert Haas <rhaas@postgresql.org>2022-09-27 13:25:21 -0400
commit05d4cbf9b6ba708858984b01ca0fc56d59d4ec7c (patch)
tree645e3ac17f002ae33e086dbf871c330986452c35 /src/backend/access/transam/xlog.c
parent2f47715cc8649f854b1df28dfc338af9801db217 (diff)
downloadpostgresql-05d4cbf9b6ba708858984b01ca0fc56d59d4ec7c.tar.gz
postgresql-05d4cbf9b6ba708858984b01ca0fc56d59d4ec7c.zip
Increase width of RelFileNumbers from 32 bits to 56 bits.
RelFileNumbers are now assigned using a separate counter, instead of being assigned from the OID counter. This counter never wraps around: if all 2^56 possible RelFileNumbers are used, an internal error occurs. As the cluster is limited to 2^64 total bytes of WAL, this limitation should not cause a problem in practice. If the counter were 64 bits wide rather than 56 bits wide, we would need to increase the width of the BufferTag, which might adversely impact buffer lookup performance. Also, this lets us use bigint for pg_class.relfilenode and other places where these values are exposed at the SQL level without worrying about overflow. This should remove the need to keep "tombstone" files around until the next checkpoint when relations are removed. We do that to keep RelFileNumbers from being recycled, but now that won't happen anyway. However, this patch doesn't actually change anything in this area; it just makes it possible for a future patch to do so. Dilip Kumar, based on an idea from Andres Freund, who also reviewed some earlier versions of the patch. Further review and some wordsmithing by me. Also reviewed at various points by Ashutosh Sharma, Vignesh C, Amul Sul, Álvaro Herrera, and Tom Lane. Discussion: http://postgr.es/m/CA+Tgmobp7+7kmi4gkq7Y+4AM9fTvL+O1oQ4-5gFTT+6Ng-dQ=g@mail.gmail.com
Diffstat (limited to 'src/backend/access/transam/xlog.c')
-rw-r--r--src/backend/access/transam/xlog.c60
1 files changed, 60 insertions, 0 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 1dd6df0fe15..dff9b8d2366 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -4712,6 +4712,7 @@ BootStrapXLOG(void)
checkPoint.nextXid =
FullTransactionIdFromEpochAndXid(0, FirstNormalTransactionId);
checkPoint.nextOid = FirstGenbkiObjectId;
+ checkPoint.nextRelFileNumber = FirstNormalRelFileNumber;
checkPoint.nextMulti = FirstMultiXactId;
checkPoint.nextMultiOffset = 0;
checkPoint.oldestXid = FirstNormalTransactionId;
@@ -4725,7 +4726,11 @@ BootStrapXLOG(void)
ShmemVariableCache->nextXid = checkPoint.nextXid;
ShmemVariableCache->nextOid = checkPoint.nextOid;
+ ShmemVariableCache->nextRelFileNumber = checkPoint.nextRelFileNumber;
ShmemVariableCache->oidCount = 0;
+ ShmemVariableCache->loggedRelFileNumber = checkPoint.nextRelFileNumber;
+ ShmemVariableCache->flushedRelFileNumber = checkPoint.nextRelFileNumber;
+ ShmemVariableCache->loggedRelFileNumberRecPtr = InvalidXLogRecPtr;
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
AdvanceOldestClogXid(checkPoint.oldestXid);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
@@ -5191,7 +5196,10 @@ StartupXLOG(void)
/* initialize shared memory variables from the checkpoint record */
ShmemVariableCache->nextXid = checkPoint.nextXid;
ShmemVariableCache->nextOid = checkPoint.nextOid;
+ ShmemVariableCache->nextRelFileNumber = checkPoint.nextRelFileNumber;
ShmemVariableCache->oidCount = 0;
+ ShmemVariableCache->loggedRelFileNumber = checkPoint.nextRelFileNumber;
+ ShmemVariableCache->flushedRelFileNumber = checkPoint.nextRelFileNumber;
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
AdvanceOldestClogXid(checkPoint.oldestXid);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
@@ -6663,6 +6671,24 @@ CreateCheckPoint(int flags)
checkPoint.nextOid += ShmemVariableCache->oidCount;
LWLockRelease(OidGenLock);
+ /*
+ * If this is a shutdown checkpoint then we can safely start allocating
+ * relfilenumber from the nextRelFileNumber value after the restart because
+ * no one one else can use the relfilenumber beyond that number before the
+ * shutdown. OTOH, if it is a normal checkpoint then if there is a crash
+ * after this point then we might end up reusing the same relfilenumbers
+ * after the restart so we need to set the nextRelFileNumber to the already
+ * logged relfilenumber as no one will use number beyond this limit without
+ * logging again.
+ */
+ LWLockAcquire(RelFileNumberGenLock, LW_SHARED);
+ if (shutdown)
+ checkPoint.nextRelFileNumber = ShmemVariableCache->nextRelFileNumber;
+ else
+ checkPoint.nextRelFileNumber = ShmemVariableCache->loggedRelFileNumber;
+
+ LWLockRelease(RelFileNumberGenLock);
+
MultiXactGetCheckptMulti(shutdown,
&checkPoint.nextMulti,
&checkPoint.nextMultiOffset,
@@ -7541,6 +7567,24 @@ XLogPutNextOid(Oid nextOid)
}
/*
+ * Similar to the XLogPutNextOid but instead of writing NEXTOID log record it
+ * writes a NEXT_RELFILENUMBER log record. It also returns the XLogRecPtr of
+ * the currently logged relfilenumber record, so that the caller can flush it
+ * at the appropriate time.
+ */
+XLogRecPtr
+LogNextRelFileNumber(RelFileNumber nextrelnumber)
+{
+ XLogRecPtr recptr;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) (&nextrelnumber), sizeof(RelFileNumber));
+ recptr = XLogInsert(RM_XLOG_ID, XLOG_NEXT_RELFILENUMBER);
+
+ return recptr;
+}
+
+/*
* Write an XLOG SWITCH record.
*
* Here we just blindly issue an XLogInsert request for the record.
@@ -7755,6 +7799,17 @@ xlog_redo(XLogReaderState *record)
ShmemVariableCache->oidCount = 0;
LWLockRelease(OidGenLock);
}
+ if (info == XLOG_NEXT_RELFILENUMBER)
+ {
+ RelFileNumber nextRelFileNumber;
+
+ memcpy(&nextRelFileNumber, XLogRecGetData(record), sizeof(RelFileNumber));
+ LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE);
+ ShmemVariableCache->nextRelFileNumber = nextRelFileNumber;
+ ShmemVariableCache->loggedRelFileNumber = nextRelFileNumber;
+ ShmemVariableCache->flushedRelFileNumber = nextRelFileNumber;
+ LWLockRelease(RelFileNumberGenLock);
+ }
else if (info == XLOG_CHECKPOINT_SHUTDOWN)
{
CheckPoint checkPoint;
@@ -7769,6 +7824,11 @@ xlog_redo(XLogReaderState *record)
ShmemVariableCache->nextOid = checkPoint.nextOid;
ShmemVariableCache->oidCount = 0;
LWLockRelease(OidGenLock);
+ LWLockAcquire(RelFileNumberGenLock, LW_EXCLUSIVE);
+ ShmemVariableCache->nextRelFileNumber = checkPoint.nextRelFileNumber;
+ ShmemVariableCache->loggedRelFileNumber = checkPoint.nextRelFileNumber;
+ ShmemVariableCache->flushedRelFileNumber = checkPoint.nextRelFileNumber;
+ LWLockRelease(RelFileNumberGenLock);
MultiXactSetNextMXact(checkPoint.nextMulti,
checkPoint.nextMultiOffset);