aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/commands/tablecmds.c11
-rw-r--r--src/backend/storage/smgr/md.c27
-rw-r--r--src/backend/utils/cache/relcache.c66
-rw-r--r--src/bin/pg_dump/pg_dump.c31
4 files changed, 120 insertions, 15 deletions
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 7fbee0c1f71..e7aef2f6b08 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -40,6 +40,7 @@
#include "catalog/pg_depend.h"
#include "catalog/pg_foreign_table.h"
#include "catalog/pg_inherits.h"
+#include "catalog/pg_largeobject.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_statistic_ext.h"
@@ -2185,7 +2186,15 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple)
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is not a table", relname)));
- if (!allowSystemTableMods && IsSystemClass(relid, reltuple))
+ /*
+ * Most system catalogs can't be truncated at all, or at least not unless
+ * allow_system_table_mods=on. As an exception, however, we allow
+ * pg_largeobject to be truncated as part of pg_upgrade, because we need
+ * to change its relfilenode to match the old cluster, and allowing a
+ * TRUNCATE command to be executed is the easiest way of doing that.
+ */
+ if (!allowSystemTableMods && IsSystemClass(relid, reltuple)
+ && (!IsBinaryUpgrade || relid != LargeObjectRelationId))
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("permission denied: \"%s\" is a system catalog",
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 3998296a62f..3deac496eed 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -319,6 +319,7 @@ mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forkNum, bool isRedo)
{
char *path;
int ret;
+ BlockNumber segno = 0;
path = relpath(rlocator, forkNum);
@@ -353,8 +354,22 @@ mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forkNum, bool isRedo)
/* Prevent other backends' fds from holding on to the disk space */
ret = do_truncate(path);
- /* Register request to unlink first segment later */
- register_unlink_segment(rlocator, forkNum, 0 /* first seg */ );
+ /*
+ * Except during a binary upgrade, register request to unlink first
+ * segment later, rather than now.
+ *
+ * If we're performing a binary upgrade, the dangers described in the
+ * header comments for mdunlink() do not exist, since after a crash
+ * or even a simple ERROR, the upgrade fails and the whole new cluster
+ * must be recreated from scratch. And, on the other hand, it is
+ * important to remove the files from disk immediately, because we
+ * may be about to reuse the same relfilenumber.
+ */
+ if (!IsBinaryUpgrade)
+ {
+ register_unlink_segment(rlocator, forkNum, 0 /* first seg */ );
+ ++segno;
+ }
}
/*
@@ -363,15 +378,17 @@ mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forkNum, bool isRedo)
if (ret >= 0)
{
char *segpath = (char *) palloc(strlen(path) + 12);
- BlockNumber segno;
/*
* Note that because we loop until getting ENOENT, we will correctly
* remove all inactive segments as well as active ones.
*/
- for (segno = 1;; segno++)
+ for (;; segno++)
{
- sprintf(segpath, "%s.%u", path, segno);
+ if (segno == 0)
+ strcpy(segpath, path);
+ else
+ sprintf(segpath, "%s.%u", path, segno);
if (!RelFileLocatorBackendIsTemp(rlocator))
{
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index bdb771d278f..00dc0f24037 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -41,6 +41,7 @@
#include "access/tupdesc_details.h"
#include "access/xact.h"
#include "access/xlog.h"
+#include "catalog/binary_upgrade.h"
#include "catalog/catalog.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
@@ -3707,9 +3708,36 @@ RelationSetNewRelfilenumber(Relation relation, char persistence)
TransactionId freezeXid = InvalidTransactionId;
RelFileLocator newrlocator;
- /* Allocate a new relfilenumber */
- newrelfilenumber = GetNewRelFileNumber(relation->rd_rel->reltablespace,
- NULL, persistence);
+ if (!IsBinaryUpgrade)
+ {
+ /* Allocate a new relfilenumber */
+ newrelfilenumber = GetNewRelFileNumber(relation->rd_rel->reltablespace,
+ NULL, persistence);
+ }
+ else if (relation->rd_rel->relkind == RELKIND_INDEX)
+ {
+ if (!OidIsValid(binary_upgrade_next_index_pg_class_relfilenumber))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("index relfilenumber value not set when in binary upgrade mode")));
+
+ newrelfilenumber = binary_upgrade_next_index_pg_class_relfilenumber;
+ binary_upgrade_next_index_pg_class_relfilenumber = InvalidOid;
+ }
+ else if (relation->rd_rel->relkind == RELKIND_RELATION)
+ {
+ if (!OidIsValid(binary_upgrade_next_heap_pg_class_relfilenumber))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("heap relfilenumber value not set when in binary upgrade mode")));
+
+ newrelfilenumber = binary_upgrade_next_heap_pg_class_relfilenumber;
+ binary_upgrade_next_heap_pg_class_relfilenumber = InvalidOid;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unexpected request for new relfilenumber in binary upgrade mode")));
/*
* Get a writable copy of the pg_class tuple for the given relation.
@@ -3724,9 +3752,37 @@ RelationSetNewRelfilenumber(Relation relation, char persistence)
classform = (Form_pg_class) GETSTRUCT(tuple);
/*
- * Schedule unlinking of the old storage at transaction commit.
+ * Schedule unlinking of the old storage at transaction commit, except
+ * when performing a binary upgrade, when we must do it immediately.
*/
- RelationDropStorage(relation);
+ if (IsBinaryUpgrade)
+ {
+ SMgrRelation srel;
+
+ /*
+ * During a binary upgrade, we use this code path to ensure that
+ * pg_largeobject and its index have the same relfilenumbers as in
+ * the old cluster. This is necessary because pg_upgrade treats
+ * pg_largeobject like a user table, not a system table. It is however
+ * possible that a table or index may need to end up with the same
+ * relfilenumber in the new cluster as what it had in the old cluster.
+ * Hence, we can't wait until commit time to remove the old storage.
+ *
+ * In general, this function needs to have transactional semantics,
+ * and removing the old storage before commit time surely isn't.
+ * However, it doesn't really matter, because if a binary upgrade
+ * fails at this stage, the new cluster will need to be recreated
+ * anyway.
+ */
+ srel = smgropen(relation->rd_locator, relation->rd_backend);
+ smgrdounlinkall(&srel, 1, false);
+ smgrclose(srel);
+ }
+ else
+ {
+ /* Not a binary upgrade, so just schedule it to happen later. */
+ RelationDropStorage(relation);
+ }
/*
* Create storage for the main fork of the new relfilenumber. If it's a
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index f9c51d1e679..25742a0e2ad 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -3141,6 +3141,7 @@ dumpDatabase(Archive *fout)
PGresult *lo_res;
PQExpBuffer loFrozenQry = createPQExpBuffer();
PQExpBuffer loOutQry = createPQExpBuffer();
+ PQExpBuffer loVacQry = createPQExpBuffer();
int i_relfrozenxid,
i_relfilenode,
i_oid,
@@ -3167,15 +3168,36 @@ dumpDatabase(Archive *fout)
i_relfilenode = PQfnumber(lo_res, "relfilenode");
i_oid = PQfnumber(lo_res, "oid");
- appendPQExpBufferStr(loOutQry, "\n-- For binary upgrade, preserve values for pg_largeobject and its index\n");
+ appendPQExpBufferStr(loOutQry, "\n-- For binary upgrade, set pg_largeobject relfrozenxid and relminmxid\n");
+ appendPQExpBufferStr(loVacQry, "\n-- For binary upgrade, preserve pg_largeobject and index relfilenodes\n");
for (int i = 0; i < PQntuples(lo_res); ++i)
+ {
+ Oid oid;
+ RelFileNumber relfilenumber;
+
appendPQExpBuffer(loOutQry, "UPDATE pg_catalog.pg_class\n"
- "SET relfrozenxid = '%u', relminmxid = '%u', relfilenode = '%u'\n"
+ "SET relfrozenxid = '%u', relminmxid = '%u'\n"
"WHERE oid = %u;\n",
atooid(PQgetvalue(lo_res, i, i_relfrozenxid)),
atooid(PQgetvalue(lo_res, i, i_relminmxid)),
- atooid(PQgetvalue(lo_res, i, i_relfilenode)),
- atooid(PQgetvalue(lo_res, i, i_oid)));
+ atooid(PQgetvalue(lo_res, i, i_relfilenode)));
+
+ oid = atooid(PQgetvalue(lo_res, i, i_oid));
+ relfilenumber = atooid(PQgetvalue(lo_res, i, i_relfilenode));
+
+ if (oid == LargeObjectRelationId)
+ appendPQExpBuffer(loVacQry,
+ "SELECT pg_catalog.binary_upgrade_set_next_heap_relfilenode('%u'::pg_catalog.oid);\n",
+ relfilenumber);
+ else if (oid == LargeObjectLOidPNIndexId)
+ appendPQExpBuffer(loVacQry,
+ "SELECT pg_catalog.binary_upgrade_set_next_index_relfilenode('%u'::pg_catalog.oid);\n",
+ relfilenumber);
+ }
+
+ appendPQExpBufferStr(loVacQry,
+ "TRUNCATE pg_catalog.pg_largeobject;\n");
+ appendPQExpBufferStr(loOutQry, loVacQry->data);
ArchiveEntry(fout, nilCatalogId, createDumpId(),
ARCHIVE_OPTS(.tag = "pg_largeobject",
@@ -3187,6 +3209,7 @@ dumpDatabase(Archive *fout)
destroyPQExpBuffer(loFrozenQry);
destroyPQExpBuffer(loOutQry);
+ destroyPQExpBuffer(loVacQry);
}
PQclear(res);