diff options
author | Noah Misch <noah@leadboat.com> | 2024-09-24 15:25:18 -0700 |
---|---|---|
committer | Noah Misch <noah@leadboat.com> | 2024-09-24 15:25:18 -0700 |
commit | a07e03fd8fa7daf4d1356f7cb501ffe784ea6257 (patch) | |
tree | fa7a55ec1c78beffe2a209fd626697c4d2b24ff3 /src/backend/commands | |
parent | dbf3f974ee04d24547690268b1fc2b7eb12b4ebc (diff) | |
download | postgresql-a07e03fd8fa7daf4d1356f7cb501ffe784ea6257.tar.gz postgresql-a07e03fd8fa7daf4d1356f7cb501ffe784ea6257.zip |
Fix data loss at inplace update after heap_update().
As previously-added tests demonstrated, heap_inplace_update() could
instead update an unrelated tuple of the same catalog. It could lose
the update. Losing relhasindex=t was a source of index corruption.
Inplace-updating commands like VACUUM will now wait for heap_update()
commands like GRANT TABLE and GRANT DATABASE. That isn't ideal, but a
long-running GRANT already hurts VACUUM progress more just by keeping an
XID running. The VACUUM will behave like a DELETE or UPDATE waiting for
the uncommitted change.
For implementation details, start at the systable_inplace_update_begin()
header comment and README.tuplock. Back-patch to v12 (all supported
versions). In back branches, retain a deprecated heap_inplace_update(),
for extensions.
Reported by Smolkin Grigory. Reviewed by Nitin Motiani, (in earlier
versions) Heikki Linnakangas, and (in earlier versions) Alexander
Lakhin.
Discussion: https://postgr.es/m/CAMp+ueZQz3yDk7qg42hk6-9gxniYbp-=bG2mgqecErqR5gGGOA@mail.gmail.com
Diffstat (limited to 'src/backend/commands')
-rw-r--r-- | src/backend/commands/dbcommands.c | 34 | ||||
-rw-r--r-- | src/backend/commands/event_trigger.c | 27 | ||||
-rw-r--r-- | src/backend/commands/vacuum.c | 32 |
3 files changed, 44 insertions, 49 deletions
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 8be435a79e9..40bfd093337 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -1651,7 +1651,7 @@ dropdb(const char *dbname, bool missing_ok, bool force) Relation pgdbrel; HeapTuple tup; ScanKeyData scankey; - SysScanDesc scan; + void *inplace_state; Form_pg_database datform; int notherbackends; int npreparedxacts; @@ -1790,24 +1790,6 @@ dropdb(const char *dbname, bool missing_ok, bool force) pgstat_drop_database(db_id); /* - * Get the pg_database tuple to scribble on. Note that this does not - * directly rely on the syscache to avoid issues with flattened toast - * values for the in-place update. - */ - ScanKeyInit(&scankey, - Anum_pg_database_datname, - BTEqualStrategyNumber, F_NAMEEQ, - CStringGetDatum(dbname)); - - scan = systable_beginscan(pgdbrel, DatabaseNameIndexId, true, - NULL, 1, &scankey); - - tup = systable_getnext(scan); - if (!HeapTupleIsValid(tup)) - elog(ERROR, "cache lookup failed for database %u", db_id); - datform = (Form_pg_database) GETSTRUCT(tup); - - /* * Except for the deletion of the catalog row, subsequent actions are not * transactional (consider DropDatabaseBuffers() discarding modified * buffers). But we might crash or get interrupted below. To prevent @@ -1818,8 +1800,17 @@ dropdb(const char *dbname, bool missing_ok, bool force) * modification is durable before performing irreversible filesystem * operations. */ + ScanKeyInit(&scankey, + Anum_pg_database_datname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(dbname)); + systable_inplace_update_begin(pgdbrel, DatabaseNameIndexId, true, + NULL, 1, &scankey, &tup, &inplace_state); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for database %u", db_id); + datform = (Form_pg_database) GETSTRUCT(tup); datform->datconnlimit = DATCONNLIMIT_INVALID_DB; - heap_inplace_update(pgdbrel, tup); + systable_inplace_update_finish(inplace_state, tup); XLogFlush(XactLastRecEnd); /* @@ -1827,8 +1818,7 @@ dropdb(const char *dbname, bool missing_ok, bool force) * the row will be gone, but if we fail, dropdb() can be invoked again. */ CatalogTupleDelete(pgdbrel, &tup->t_self); - - systable_endscan(scan); + heap_freetuple(tup); /* * Drop db-specific replication slots. diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c index 7a5ed6b9850..55baf10c856 100644 --- a/src/backend/commands/event_trigger.c +++ b/src/backend/commands/event_trigger.c @@ -946,25 +946,18 @@ EventTriggerOnLogin(void) { Relation pg_db = table_open(DatabaseRelationId, RowExclusiveLock); HeapTuple tuple; + void *state; Form_pg_database db; ScanKeyData key[1]; - SysScanDesc scan; - /* - * Get the pg_database tuple to scribble on. Note that this does - * not directly rely on the syscache to avoid issues with - * flattened toast values for the in-place update. - */ + /* Fetch a copy of the tuple to scribble on */ ScanKeyInit(&key[0], Anum_pg_database_oid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(MyDatabaseId)); - scan = systable_beginscan(pg_db, DatabaseOidIndexId, true, - NULL, 1, key); - tuple = systable_getnext(scan); - tuple = heap_copytuple(tuple); - systable_endscan(scan); + systable_inplace_update_begin(pg_db, DatabaseOidIndexId, true, + NULL, 1, key, &tuple, &state); if (!HeapTupleIsValid(tuple)) elog(ERROR, "could not find tuple for database %u", MyDatabaseId); @@ -980,13 +973,15 @@ EventTriggerOnLogin(void) * that avoids possible waiting on the row-level lock. Second, * that avoids dealing with TOAST. * - * It's known that changes made by heap_inplace_update() may - * be lost due to concurrent normal updates. However, we are - * OK with that. The subsequent connections will still have a - * chance to set "dathasloginevt" to false. + * Changes made by inplace update may be lost due to + * concurrent normal updates; see inplace-inval.spec. However, + * we are OK with that. The subsequent connections will still + * have a chance to set "dathasloginevt" to false. */ - heap_inplace_update(pg_db, tuple); + systable_inplace_update_finish(state, tuple); } + else + systable_inplace_update_cancel(state); table_close(pg_db, RowExclusiveLock); heap_freetuple(tuple); } diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 23aabdc90dc..ac8f5d9c259 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -1417,7 +1417,9 @@ vac_update_relstats(Relation relation, { Oid relid = RelationGetRelid(relation); Relation rd; + ScanKeyData key[1]; HeapTuple ctup; + void *inplace_state; Form_pg_class pgcform; bool dirty, futurexid, @@ -1428,7 +1430,12 @@ vac_update_relstats(Relation relation, rd = table_open(RelationRelationId, RowExclusiveLock); /* Fetch a copy of the tuple to scribble on */ - ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid)); + ScanKeyInit(&key[0], + Anum_pg_class_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + systable_inplace_update_begin(rd, ClassOidIndexId, true, + NULL, 1, key, &ctup, &inplace_state); if (!HeapTupleIsValid(ctup)) elog(ERROR, "pg_class entry for relid %u vanished during vacuuming", relid); @@ -1536,7 +1543,9 @@ vac_update_relstats(Relation relation, /* If anything changed, write out the tuple. */ if (dirty) - heap_inplace_update(rd, ctup); + systable_inplace_update_finish(inplace_state, ctup); + else + systable_inplace_update_cancel(inplace_state); table_close(rd, RowExclusiveLock); @@ -1588,6 +1597,7 @@ vac_update_datfrozenxid(void) bool bogus = false; bool dirty = false; ScanKeyData key[1]; + void *inplace_state; /* * Restrict this task to one backend per database. This avoids race @@ -1711,20 +1721,18 @@ vac_update_datfrozenxid(void) relation = table_open(DatabaseRelationId, RowExclusiveLock); /* - * Get the pg_database tuple to scribble on. Note that this does not - * directly rely on the syscache to avoid issues with flattened toast - * values for the in-place update. + * Fetch a copy of the tuple to scribble on. We could check the syscache + * tuple first. If that concluded !dirty, we'd avoid waiting on + * concurrent heap_update() and would avoid exclusive-locking the buffer. + * For now, don't optimize that. */ ScanKeyInit(&key[0], Anum_pg_database_oid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(MyDatabaseId)); - scan = systable_beginscan(relation, DatabaseOidIndexId, true, - NULL, 1, key); - tuple = systable_getnext(scan); - tuple = heap_copytuple(tuple); - systable_endscan(scan); + systable_inplace_update_begin(relation, DatabaseOidIndexId, true, + NULL, 1, key, &tuple, &inplace_state); if (!HeapTupleIsValid(tuple)) elog(ERROR, "could not find tuple for database %u", MyDatabaseId); @@ -1758,7 +1766,9 @@ vac_update_datfrozenxid(void) newMinMulti = dbform->datminmxid; if (dirty) - heap_inplace_update(relation, tuple); + systable_inplace_update_finish(inplace_state, tuple); + else + systable_inplace_update_cancel(inplace_state); heap_freetuple(tuple); table_close(relation, RowExclusiveLock); |