aboutsummaryrefslogtreecommitdiff
path: root/src/backend/commands
diff options
context:
space:
mode:
authorNoah Misch <noah@leadboat.com>2024-09-24 15:25:18 -0700
committerNoah Misch <noah@leadboat.com>2024-09-24 15:25:18 -0700
commita07e03fd8fa7daf4d1356f7cb501ffe784ea6257 (patch)
treefa7a55ec1c78beffe2a209fd626697c4d2b24ff3 /src/backend/commands
parentdbf3f974ee04d24547690268b1fc2b7eb12b4ebc (diff)
downloadpostgresql-a07e03fd8fa7daf4d1356f7cb501ffe784ea6257.tar.gz
postgresql-a07e03fd8fa7daf4d1356f7cb501ffe784ea6257.zip
Fix data loss at inplace update after heap_update().
As previously-added tests demonstrated, heap_inplace_update() could instead update an unrelated tuple of the same catalog. It could lose the update. Losing relhasindex=t was a source of index corruption. Inplace-updating commands like VACUUM will now wait for heap_update() commands like GRANT TABLE and GRANT DATABASE. That isn't ideal, but a long-running GRANT already hurts VACUUM progress more just by keeping an XID running. The VACUUM will behave like a DELETE or UPDATE waiting for the uncommitted change. For implementation details, start at the systable_inplace_update_begin() header comment and README.tuplock. Back-patch to v12 (all supported versions). In back branches, retain a deprecated heap_inplace_update(), for extensions. Reported by Smolkin Grigory. Reviewed by Nitin Motiani, (in earlier versions) Heikki Linnakangas, and (in earlier versions) Alexander Lakhin. Discussion: https://postgr.es/m/CAMp+ueZQz3yDk7qg42hk6-9gxniYbp-=bG2mgqecErqR5gGGOA@mail.gmail.com
Diffstat (limited to 'src/backend/commands')
-rw-r--r--src/backend/commands/dbcommands.c34
-rw-r--r--src/backend/commands/event_trigger.c27
-rw-r--r--src/backend/commands/vacuum.c32
3 files changed, 44 insertions, 49 deletions
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 8be435a79e9..40bfd093337 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -1651,7 +1651,7 @@ dropdb(const char *dbname, bool missing_ok, bool force)
Relation pgdbrel;
HeapTuple tup;
ScanKeyData scankey;
- SysScanDesc scan;
+ void *inplace_state;
Form_pg_database datform;
int notherbackends;
int npreparedxacts;
@@ -1790,24 +1790,6 @@ dropdb(const char *dbname, bool missing_ok, bool force)
pgstat_drop_database(db_id);
/*
- * Get the pg_database tuple to scribble on. Note that this does not
- * directly rely on the syscache to avoid issues with flattened toast
- * values for the in-place update.
- */
- ScanKeyInit(&scankey,
- Anum_pg_database_datname,
- BTEqualStrategyNumber, F_NAMEEQ,
- CStringGetDatum(dbname));
-
- scan = systable_beginscan(pgdbrel, DatabaseNameIndexId, true,
- NULL, 1, &scankey);
-
- tup = systable_getnext(scan);
- if (!HeapTupleIsValid(tup))
- elog(ERROR, "cache lookup failed for database %u", db_id);
- datform = (Form_pg_database) GETSTRUCT(tup);
-
- /*
* Except for the deletion of the catalog row, subsequent actions are not
* transactional (consider DropDatabaseBuffers() discarding modified
* buffers). But we might crash or get interrupted below. To prevent
@@ -1818,8 +1800,17 @@ dropdb(const char *dbname, bool missing_ok, bool force)
* modification is durable before performing irreversible filesystem
* operations.
*/
+ ScanKeyInit(&scankey,
+ Anum_pg_database_datname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(dbname));
+ systable_inplace_update_begin(pgdbrel, DatabaseNameIndexId, true,
+ NULL, 1, &scankey, &tup, &inplace_state);
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for database %u", db_id);
+ datform = (Form_pg_database) GETSTRUCT(tup);
datform->datconnlimit = DATCONNLIMIT_INVALID_DB;
- heap_inplace_update(pgdbrel, tup);
+ systable_inplace_update_finish(inplace_state, tup);
XLogFlush(XactLastRecEnd);
/*
@@ -1827,8 +1818,7 @@ dropdb(const char *dbname, bool missing_ok, bool force)
* the row will be gone, but if we fail, dropdb() can be invoked again.
*/
CatalogTupleDelete(pgdbrel, &tup->t_self);
-
- systable_endscan(scan);
+ heap_freetuple(tup);
/*
* Drop db-specific replication slots.
diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c
index 7a5ed6b9850..55baf10c856 100644
--- a/src/backend/commands/event_trigger.c
+++ b/src/backend/commands/event_trigger.c
@@ -946,25 +946,18 @@ EventTriggerOnLogin(void)
{
Relation pg_db = table_open(DatabaseRelationId, RowExclusiveLock);
HeapTuple tuple;
+ void *state;
Form_pg_database db;
ScanKeyData key[1];
- SysScanDesc scan;
- /*
- * Get the pg_database tuple to scribble on. Note that this does
- * not directly rely on the syscache to avoid issues with
- * flattened toast values for the in-place update.
- */
+ /* Fetch a copy of the tuple to scribble on */
ScanKeyInit(&key[0],
Anum_pg_database_oid,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(MyDatabaseId));
- scan = systable_beginscan(pg_db, DatabaseOidIndexId, true,
- NULL, 1, key);
- tuple = systable_getnext(scan);
- tuple = heap_copytuple(tuple);
- systable_endscan(scan);
+ systable_inplace_update_begin(pg_db, DatabaseOidIndexId, true,
+ NULL, 1, key, &tuple, &state);
if (!HeapTupleIsValid(tuple))
elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
@@ -980,13 +973,15 @@ EventTriggerOnLogin(void)
* that avoids possible waiting on the row-level lock. Second,
* that avoids dealing with TOAST.
*
- * It's known that changes made by heap_inplace_update() may
- * be lost due to concurrent normal updates. However, we are
- * OK with that. The subsequent connections will still have a
- * chance to set "dathasloginevt" to false.
+ * Changes made by inplace update may be lost due to
+ * concurrent normal updates; see inplace-inval.spec. However,
+ * we are OK with that. The subsequent connections will still
+ * have a chance to set "dathasloginevt" to false.
*/
- heap_inplace_update(pg_db, tuple);
+ systable_inplace_update_finish(state, tuple);
}
+ else
+ systable_inplace_update_cancel(state);
table_close(pg_db, RowExclusiveLock);
heap_freetuple(tuple);
}
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 23aabdc90dc..ac8f5d9c259 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1417,7 +1417,9 @@ vac_update_relstats(Relation relation,
{
Oid relid = RelationGetRelid(relation);
Relation rd;
+ ScanKeyData key[1];
HeapTuple ctup;
+ void *inplace_state;
Form_pg_class pgcform;
bool dirty,
futurexid,
@@ -1428,7 +1430,12 @@ vac_update_relstats(Relation relation,
rd = table_open(RelationRelationId, RowExclusiveLock);
/* Fetch a copy of the tuple to scribble on */
- ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
+ ScanKeyInit(&key[0],
+ Anum_pg_class_oid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(relid));
+ systable_inplace_update_begin(rd, ClassOidIndexId, true,
+ NULL, 1, key, &ctup, &inplace_state);
if (!HeapTupleIsValid(ctup))
elog(ERROR, "pg_class entry for relid %u vanished during vacuuming",
relid);
@@ -1536,7 +1543,9 @@ vac_update_relstats(Relation relation,
/* If anything changed, write out the tuple. */
if (dirty)
- heap_inplace_update(rd, ctup);
+ systable_inplace_update_finish(inplace_state, ctup);
+ else
+ systable_inplace_update_cancel(inplace_state);
table_close(rd, RowExclusiveLock);
@@ -1588,6 +1597,7 @@ vac_update_datfrozenxid(void)
bool bogus = false;
bool dirty = false;
ScanKeyData key[1];
+ void *inplace_state;
/*
* Restrict this task to one backend per database. This avoids race
@@ -1711,20 +1721,18 @@ vac_update_datfrozenxid(void)
relation = table_open(DatabaseRelationId, RowExclusiveLock);
/*
- * Get the pg_database tuple to scribble on. Note that this does not
- * directly rely on the syscache to avoid issues with flattened toast
- * values for the in-place update.
+ * Fetch a copy of the tuple to scribble on. We could check the syscache
+ * tuple first. If that concluded !dirty, we'd avoid waiting on
+ * concurrent heap_update() and would avoid exclusive-locking the buffer.
+ * For now, don't optimize that.
*/
ScanKeyInit(&key[0],
Anum_pg_database_oid,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(MyDatabaseId));
- scan = systable_beginscan(relation, DatabaseOidIndexId, true,
- NULL, 1, key);
- tuple = systable_getnext(scan);
- tuple = heap_copytuple(tuple);
- systable_endscan(scan);
+ systable_inplace_update_begin(relation, DatabaseOidIndexId, true,
+ NULL, 1, key, &tuple, &inplace_state);
if (!HeapTupleIsValid(tuple))
elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
@@ -1758,7 +1766,9 @@ vac_update_datfrozenxid(void)
newMinMulti = dbform->datminmxid;
if (dirty)
- heap_inplace_update(relation, tuple);
+ systable_inplace_update_finish(inplace_state, tuple);
+ else
+ systable_inplace_update_cancel(inplace_state);
heap_freetuple(tuple);
table_close(relation, RowExclusiveLock);