diff options
Diffstat (limited to 'src/backend/replication')
-rw-r--r-- | src/backend/replication/logical/launcher.c | 2 | ||||
-rw-r--r-- | src/backend/replication/logical/logical.c | 18 | ||||
-rw-r--r-- | src/backend/replication/logical/reorderbuffer.c | 196 | ||||
-rw-r--r-- | src/backend/replication/logical/snapbuild.c | 12 | ||||
-rw-r--r-- | src/backend/replication/slot.c | 57 |
5 files changed, 249 insertions, 36 deletions
diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index 10677da56b2..1c3c051403d 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -1016,7 +1016,7 @@ logicalrep_launcher_attach_dshmem(void) last_start_times_dsa = dsa_attach(LogicalRepCtx->last_start_dsa); dsa_pin_mapping(last_start_times_dsa); last_start_times = dshash_attach(last_start_times_dsa, &dsh_params, - LogicalRepCtx->last_start_dsh, 0); + LogicalRepCtx->last_start_dsh, NULL); } MemoryContextSwitchTo(oldcontext); diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c index 1d56d0c4ef3..f1eb798f3e9 100644 --- a/src/backend/replication/logical/logical.c +++ b/src/backend/replication/logical/logical.c @@ -29,6 +29,7 @@ #include "postgres.h" #include "access/xact.h" +#include "access/xlog_internal.h" #include "access/xlogutils.h" #include "fmgr.h" #include "miscadmin.h" @@ -41,6 +42,7 @@ #include "storage/proc.h" #include "storage/procarray.h" #include "utils/builtins.h" +#include "utils/injection_point.h" #include "utils/inval.h" #include "utils/memutils.h" @@ -1825,9 +1827,13 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn) { bool updated_xmin = false; bool updated_restart = false; + XLogRecPtr restart_lsn pg_attribute_unused(); SpinLockAcquire(&MyReplicationSlot->mutex); + /* remember the old restart lsn */ + restart_lsn = MyReplicationSlot->data.restart_lsn; + /* * Prevent moving the confirmed_flush backwards, as this could lead to * data duplication issues caused by replicating already replicated @@ -1881,6 +1887,18 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn) /* first write new xmin to disk, so we know what's up after a crash */ if (updated_xmin || updated_restart) { +#ifdef USE_INJECTION_POINTS + XLogSegNo seg1, + seg2; + + XLByteToSeg(restart_lsn, seg1, wal_segment_size); + XLByteToSeg(MyReplicationSlot->data.restart_lsn, seg2, wal_segment_size); + + /* trigger injection point, but only if segment changes */ + if (seg1 != seg2) + INJECTION_POINT("logical-replication-slot-advance-segment", NULL); +#endif + ReplicationSlotMarkDirty(); ReplicationSlotSave(); elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart); diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 67655111875..c4299c76fb1 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -109,10 +109,22 @@ #include "storage/procarray.h" #include "storage/sinval.h" #include "utils/builtins.h" +#include "utils/inval.h" #include "utils/memutils.h" #include "utils/rel.h" #include "utils/relfilenumbermap.h" +/* + * Each transaction has an 8MB limit for invalidation messages distributed from + * other transactions. This limit is set considering scenarios with many + * concurrent logical decoding operations. When the distributed invalidation + * messages reach this threshold, the transaction is marked as + * RBTXN_DISTR_INVAL_OVERFLOWED to invalidate the complete cache as we have lost + * some inval messages and hence don't know what needs to be invalidated. + */ +#define MAX_DISTR_INVAL_MSG_PER_TXN \ + ((8 * 1024 * 1024) / sizeof(SharedInvalidationMessage)) + /* entry for a hash table we use to map from xid to our transaction state */ typedef struct ReorderBufferTXNByIdEnt { @@ -472,6 +484,12 @@ ReorderBufferFreeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn) txn->invalidations = NULL; } + if (txn->invalidations_distributed) + { + pfree(txn->invalidations_distributed); + txn->invalidations_distributed = NULL; + } + /* Reset the toast hash */ ReorderBufferToastReset(rb, txn); @@ -2661,7 +2679,17 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, AbortCurrentTransaction(); /* make sure there's no cache pollution */ - ReorderBufferExecuteInvalidations(txn->ninvalidations, txn->invalidations); + if (rbtxn_distr_inval_overflowed(txn)) + { + Assert(txn->ninvalidations_distributed == 0); + InvalidateSystemCaches(); + } + else + { + ReorderBufferExecuteInvalidations(txn->ninvalidations, txn->invalidations); + ReorderBufferExecuteInvalidations(txn->ninvalidations_distributed, + txn->invalidations_distributed); + } if (using_subtxn) RollbackAndReleaseCurrentSubTransaction(); @@ -2710,8 +2738,17 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, AbortCurrentTransaction(); /* make sure there's no cache pollution */ - ReorderBufferExecuteInvalidations(txn->ninvalidations, - txn->invalidations); + if (rbtxn_distr_inval_overflowed(txn)) + { + Assert(txn->ninvalidations_distributed == 0); + InvalidateSystemCaches(); + } + else + { + ReorderBufferExecuteInvalidations(txn->ninvalidations, txn->invalidations); + ReorderBufferExecuteInvalidations(txn->ninvalidations_distributed, + txn->invalidations_distributed); + } if (using_subtxn) RollbackAndReleaseCurrentSubTransaction(); @@ -3060,7 +3097,8 @@ ReorderBufferAbort(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, * We might have decoded changes for this transaction that could load * the cache as per the current transaction's view (consider DDL's * happened in this transaction). We don't want the decoding of future - * transactions to use those cache entries so execute invalidations. + * transactions to use those cache entries so execute only the inval + * messages in this transaction. */ if (txn->ninvalidations > 0) ReorderBufferImmediateInvalidation(rb, txn->ninvalidations, @@ -3147,9 +3185,10 @@ ReorderBufferForget(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn) txn->final_lsn = lsn; /* - * Process cache invalidation messages if there are any. Even if we're not - * interested in the transaction's contents, it could have manipulated the - * catalog and we need to update the caches according to that. + * Process only cache invalidation messages in this transaction if there + * are any. Even if we're not interested in the transaction's contents, it + * could have manipulated the catalog and we need to update the caches + * according to that. */ if (txn->base_snapshot != NULL && txn->ninvalidations > 0) ReorderBufferImmediateInvalidation(rb, txn->ninvalidations, @@ -3422,6 +3461,57 @@ ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid, } /* + * Add new invalidation messages to the reorder buffer queue. + */ +static void +ReorderBufferQueueInvalidations(ReorderBuffer *rb, TransactionId xid, + XLogRecPtr lsn, Size nmsgs, + SharedInvalidationMessage *msgs) +{ + ReorderBufferChange *change; + + change = ReorderBufferAllocChange(rb); + change->action = REORDER_BUFFER_CHANGE_INVALIDATION; + change->data.inval.ninvalidations = nmsgs; + change->data.inval.invalidations = (SharedInvalidationMessage *) + palloc(sizeof(SharedInvalidationMessage) * nmsgs); + memcpy(change->data.inval.invalidations, msgs, + sizeof(SharedInvalidationMessage) * nmsgs); + + ReorderBufferQueueChange(rb, xid, lsn, change, false); +} + +/* + * A helper function for ReorderBufferAddInvalidations() and + * ReorderBufferAddDistributedInvalidations() to accumulate the invalidation + * messages to the **invals_out. + */ +static void +ReorderBufferAccumulateInvalidations(SharedInvalidationMessage **invals_out, + uint32 *ninvals_out, + SharedInvalidationMessage *msgs_new, + Size nmsgs_new) +{ + if (*ninvals_out == 0) + { + *ninvals_out = nmsgs_new; + *invals_out = (SharedInvalidationMessage *) + palloc(sizeof(SharedInvalidationMessage) * nmsgs_new); + memcpy(*invals_out, msgs_new, sizeof(SharedInvalidationMessage) * nmsgs_new); + } + else + { + /* Enlarge the array of inval messages */ + *invals_out = (SharedInvalidationMessage *) + repalloc(*invals_out, sizeof(SharedInvalidationMessage) * + (*ninvals_out + nmsgs_new)); + memcpy(*invals_out + *ninvals_out, msgs_new, + nmsgs_new * sizeof(SharedInvalidationMessage)); + *ninvals_out += nmsgs_new; + } +} + +/* * Accumulate the invalidations for executing them later. * * This needs to be called for each XLOG_XACT_INVALIDATIONS message and @@ -3441,7 +3531,6 @@ ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid, { ReorderBufferTXN *txn; MemoryContext oldcontext; - ReorderBufferChange *change; txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true); @@ -3456,35 +3545,76 @@ ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid, Assert(nmsgs > 0); - /* Accumulate invalidations. */ - if (txn->ninvalidations == 0) - { - txn->ninvalidations = nmsgs; - txn->invalidations = (SharedInvalidationMessage *) - palloc(sizeof(SharedInvalidationMessage) * nmsgs); - memcpy(txn->invalidations, msgs, - sizeof(SharedInvalidationMessage) * nmsgs); - } - else + ReorderBufferAccumulateInvalidations(&txn->invalidations, + &txn->ninvalidations, + msgs, nmsgs); + + ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Accumulate the invalidations distributed by other committed transactions + * for executing them later. + * + * This function is similar to ReorderBufferAddInvalidations() but stores + * the given inval messages to the txn->invalidations_distributed with the + * overflow check. + * + * This needs to be called by committed transactions to distribute their + * inval messages to in-progress transactions. + */ +void +ReorderBufferAddDistributedInvalidations(ReorderBuffer *rb, TransactionId xid, + XLogRecPtr lsn, Size nmsgs, + SharedInvalidationMessage *msgs) +{ + ReorderBufferTXN *txn; + MemoryContext oldcontext; + + txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true); + + oldcontext = MemoryContextSwitchTo(rb->context); + + /* + * Collect all the invalidations under the top transaction, if available, + * so that we can execute them all together. See comments + * ReorderBufferAddInvalidations. + */ + txn = rbtxn_get_toptxn(txn); + + Assert(nmsgs > 0); + + if (!rbtxn_distr_inval_overflowed(txn)) { - txn->invalidations = (SharedInvalidationMessage *) - repalloc(txn->invalidations, sizeof(SharedInvalidationMessage) * - (txn->ninvalidations + nmsgs)); + /* + * Check the transaction has enough space for storing distributed + * invalidation messages. + */ + if (txn->ninvalidations_distributed + nmsgs >= MAX_DISTR_INVAL_MSG_PER_TXN) + { + /* + * Mark the invalidation message as overflowed and free up the + * messages accumulated so far. + */ + txn->txn_flags |= RBTXN_DISTR_INVAL_OVERFLOWED; - memcpy(txn->invalidations + txn->ninvalidations, msgs, - nmsgs * sizeof(SharedInvalidationMessage)); - txn->ninvalidations += nmsgs; + if (txn->invalidations_distributed) + { + pfree(txn->invalidations_distributed); + txn->invalidations_distributed = NULL; + txn->ninvalidations_distributed = 0; + } + } + else + ReorderBufferAccumulateInvalidations(&txn->invalidations_distributed, + &txn->ninvalidations_distributed, + msgs, nmsgs); } - change = ReorderBufferAllocChange(rb); - change->action = REORDER_BUFFER_CHANGE_INVALIDATION; - change->data.inval.ninvalidations = nmsgs; - change->data.inval.invalidations = (SharedInvalidationMessage *) - palloc(sizeof(SharedInvalidationMessage) * nmsgs); - memcpy(change->data.inval.invalidations, msgs, - sizeof(SharedInvalidationMessage) * nmsgs); - - ReorderBufferQueueChange(rb, xid, lsn, change, false); + /* Queue the invalidation messages into the transaction */ + ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs); MemoryContextSwitchTo(oldcontext); } diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 0d7bddbe4ed..adf18c397db 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -794,6 +794,13 @@ SnapBuildDistributeSnapshotAndInval(SnapBuild *builder, XLogRecPtr lsn, Transact * contents built by the current transaction even after its decoding, * which should have been invalidated due to concurrent catalog * changing transaction. + * + * Distribute only the invalidation messages generated by the current + * committed transaction. Invalidation messages received from other + * transactions would have already been propagated to the relevant + * in-progress transactions. This transaction would have processed + * those invalidations, ensuring that subsequent transactions observe + * a consistent cache state. */ if (txn->xid != xid) { @@ -807,8 +814,9 @@ SnapBuildDistributeSnapshotAndInval(SnapBuild *builder, XLogRecPtr lsn, Transact { Assert(msgs != NULL); - ReorderBufferAddInvalidations(builder->reorder, txn->xid, lsn, - ninvalidations, msgs); + ReorderBufferAddDistributedInvalidations(builder->reorder, + txn->xid, lsn, + ninvalidations, msgs); } } } diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 600b87fa9cb..c64f020742f 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -424,6 +424,7 @@ ReplicationSlotCreate(const char *name, bool db_specific, slot->candidate_restart_valid = InvalidXLogRecPtr; slot->candidate_restart_lsn = InvalidXLogRecPtr; slot->last_saved_confirmed_flush = InvalidXLogRecPtr; + slot->last_saved_restart_lsn = InvalidXLogRecPtr; slot->inactive_since = 0; /* @@ -1165,20 +1166,41 @@ ReplicationSlotsComputeRequiredLSN(void) { ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i]; XLogRecPtr restart_lsn; + XLogRecPtr last_saved_restart_lsn; bool invalidated; + ReplicationSlotPersistency persistency; if (!s->in_use) continue; SpinLockAcquire(&s->mutex); + persistency = s->data.persistency; restart_lsn = s->data.restart_lsn; invalidated = s->data.invalidated != RS_INVAL_NONE; + last_saved_restart_lsn = s->last_saved_restart_lsn; SpinLockRelease(&s->mutex); /* invalidated slots need not apply */ if (invalidated) continue; + /* + * For persistent slot use last_saved_restart_lsn to compute the + * oldest LSN for removal of WAL segments. The segments between + * last_saved_restart_lsn and restart_lsn might be needed by a + * persistent slot in the case of database crash. Non-persistent + * slots can't survive the database crash, so we don't care about + * last_saved_restart_lsn for them. + */ + if (persistency == RS_PERSISTENT) + { + if (last_saved_restart_lsn != InvalidXLogRecPtr && + restart_lsn > last_saved_restart_lsn) + { + restart_lsn = last_saved_restart_lsn; + } + } + if (restart_lsn != InvalidXLogRecPtr && (min_required == InvalidXLogRecPtr || restart_lsn < min_required)) @@ -1216,7 +1238,9 @@ ReplicationSlotsComputeLogicalRestartLSN(void) { ReplicationSlot *s; XLogRecPtr restart_lsn; + XLogRecPtr last_saved_restart_lsn; bool invalidated; + ReplicationSlotPersistency persistency; s = &ReplicationSlotCtl->replication_slots[i]; @@ -1230,14 +1254,33 @@ ReplicationSlotsComputeLogicalRestartLSN(void) /* read once, it's ok if it increases while we're checking */ SpinLockAcquire(&s->mutex); + persistency = s->data.persistency; restart_lsn = s->data.restart_lsn; invalidated = s->data.invalidated != RS_INVAL_NONE; + last_saved_restart_lsn = s->last_saved_restart_lsn; SpinLockRelease(&s->mutex); /* invalidated slots need not apply */ if (invalidated) continue; + /* + * For persistent slot use last_saved_restart_lsn to compute the + * oldest LSN for removal of WAL segments. The segments between + * last_saved_restart_lsn and restart_lsn might be needed by a + * persistent slot in the case of database crash. Non-persistent + * slots can't survive the database crash, so we don't care about + * last_saved_restart_lsn for them. + */ + if (persistency == RS_PERSISTENT) + { + if (last_saved_restart_lsn != InvalidXLogRecPtr && + restart_lsn > last_saved_restart_lsn) + { + restart_lsn = last_saved_restart_lsn; + } + } + if (restart_lsn == InvalidXLogRecPtr) continue; @@ -1455,6 +1498,7 @@ ReplicationSlotReserveWal(void) Assert(slot != NULL); Assert(slot->data.restart_lsn == InvalidXLogRecPtr); + Assert(slot->last_saved_restart_lsn == InvalidXLogRecPtr); /* * The replication slot mechanism is used to prevent removal of required @@ -1766,6 +1810,8 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes, */ SpinLockAcquire(&s->mutex); + Assert(s->data.restart_lsn >= s->last_saved_restart_lsn); + restart_lsn = s->data.restart_lsn; /* we do nothing if the slot is already invalid */ @@ -1835,7 +1881,10 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes, * just rely on .invalidated. */ if (invalidation_cause == RS_INVAL_WAL_REMOVED) + { s->data.restart_lsn = InvalidXLogRecPtr; + s->last_saved_restart_lsn = InvalidXLogRecPtr; + } /* Let caller know */ *invalidated = true; @@ -2079,6 +2128,12 @@ CheckPointReplicationSlots(bool is_shutdown) SaveSlotToPath(s, path, LOG); } LWLockRelease(ReplicationSlotAllocationLock); + + /* + * Recompute the required LSN as SaveSlotToPath() updated + * last_saved_restart_lsn for slots. + */ + ReplicationSlotsComputeRequiredLSN(); } /* @@ -2354,6 +2409,7 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel) if (!slot->just_dirtied) slot->dirty = false; slot->last_saved_confirmed_flush = cp.slotdata.confirmed_flush; + slot->last_saved_restart_lsn = cp.slotdata.restart_lsn; SpinLockRelease(&slot->mutex); LWLockRelease(&slot->io_in_progress_lock); @@ -2569,6 +2625,7 @@ RestoreSlotFromDisk(const char *name) slot->effective_xmin = cp.slotdata.xmin; slot->effective_catalog_xmin = cp.slotdata.catalog_xmin; slot->last_saved_confirmed_flush = cp.slotdata.confirmed_flush; + slot->last_saved_restart_lsn = cp.slotdata.restart_lsn; slot->candidate_catalog_xmin = InvalidTransactionId; slot->candidate_xmin_lsn = InvalidXLogRecPtr; |