aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/bin/pg_dump/common.c18
-rw-r--r--src/bin/pg_dump/meson.build1
-rw-r--r--src/bin/pg_dump/pg_backup_archiver.c73
-rw-r--r--src/bin/pg_dump/pg_dump.c151
-rw-r--r--src/bin/pg_dump/pg_dump.h2
-rw-r--r--src/bin/pg_dump/t/004_pg_dump_parallel.pl81
6 files changed, 288 insertions, 38 deletions
diff --git a/src/bin/pg_dump/common.c b/src/bin/pg_dump/common.c
index a2b74901e43..44558b60e8f 100644
--- a/src/bin/pg_dump/common.c
+++ b/src/bin/pg_dump/common.c
@@ -230,6 +230,9 @@ getSchemaData(Archive *fout, int *numTablesPtr)
pg_log_info("flagging inherited columns in subtables");
flagInhAttrs(fout->dopt, tblinfo, numTables);
+ pg_log_info("reading partitioning data");
+ getPartitioningInfo(fout);
+
pg_log_info("reading indexes");
getIndexes(fout, tblinfo, numTables);
@@ -285,7 +288,6 @@ static void
flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables,
InhInfo *inhinfo, int numInherits)
{
- DumpOptions *dopt = fout->dopt;
int i,
j;
@@ -301,18 +303,18 @@ flagInhTables(Archive *fout, TableInfo *tblinfo, int numTables,
continue;
/*
- * Normally, we don't bother computing anything for non-target tables,
- * but if load-via-partition-root is specified, we gather information
- * on every partition in the system so that getRootTableInfo can trace
- * from any given to leaf partition all the way up to the root. (We
- * don't need to mark them as interesting for getTableAttrs, though.)
+ * Normally, we don't bother computing anything for non-target tables.
+ * However, we must find the parents of non-root partitioned tables in
+ * any case, so that we can trace from leaf partitions up to the root
+ * (in case a leaf is to be dumped but its parents are not). We need
+ * not mark such parents interesting for getTableAttrs, though.
*/
if (!tblinfo[i].dobj.dump)
{
mark_parents = false;
- if (!dopt->load_via_partition_root ||
- !tblinfo[i].ispartition)
+ if (!(tblinfo[i].relkind == RELKIND_PARTITIONED_TABLE &&
+ tblinfo[i].ispartition))
find_parents = false;
}
diff --git a/src/bin/pg_dump/meson.build b/src/bin/pg_dump/meson.build
index ab4c25c7811..b2fb7ac77fd 100644
--- a/src/bin/pg_dump/meson.build
+++ b/src/bin/pg_dump/meson.build
@@ -96,6 +96,7 @@ tests += {
't/001_basic.pl',
't/002_pg_dump.pl',
't/003_pg_dump_with_server.pl',
+ 't/004_pg_dump_parallel.pl',
't/010_dump_connstr.pl',
],
},
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 61ebb8fe85d..3337d34e405 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -86,6 +86,7 @@ static RestorePass _tocEntryRestorePass(TocEntry *te);
static bool _tocEntryIsACL(TocEntry *te);
static void _disableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te);
static void _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te);
+static bool is_load_via_partition_root(TocEntry *te);
static void buildTocEntryArrays(ArchiveHandle *AH);
static void _moveBefore(TocEntry *pos, TocEntry *te);
static int _discoverArchiveFormat(ArchiveHandle *AH);
@@ -887,6 +888,8 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel)
}
else
{
+ bool use_truncate;
+
_disableTriggersIfNecessary(AH, te);
/* Select owner and schema as necessary */
@@ -898,13 +901,24 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel)
/*
* In parallel restore, if we created the table earlier in
- * the run then we wrap the COPY in a transaction and
- * precede it with a TRUNCATE. If archiving is not on
- * this prevents WAL-logging the COPY. This obtains a
- * speedup similar to that from using single_txn mode in
- * non-parallel restores.
+ * this run (so that we know it is empty) and we are not
+ * restoring a load-via-partition-root data item then we
+ * wrap the COPY in a transaction and precede it with a
+ * TRUNCATE. If wal_level is set to minimal this prevents
+ * WAL-logging the COPY. This obtains a speedup similar
+ * to that from using single_txn mode in non-parallel
+ * restores.
+ *
+ * We mustn't do this for load-via-partition-root cases
+ * because some data might get moved across partition
+ * boundaries, risking deadlock and/or loss of previously
+ * loaded data. (We assume that all partitions of a
+ * partitioned table will be treated the same way.)
*/
- if (is_parallel && te->created)
+ use_truncate = is_parallel && te->created &&
+ !is_load_via_partition_root(te);
+
+ if (use_truncate)
{
/*
* Parallel restore is always talking directly to a
@@ -942,7 +956,7 @@ restore_toc_entry(ArchiveHandle *AH, TocEntry *te, bool is_parallel)
AH->outputKind = OUTPUT_SQLCMDS;
/* close out the transaction started above */
- if (is_parallel && te->created)
+ if (use_truncate)
CommitTransaction(&AH->public);
_enableTriggersIfNecessary(AH, te);
@@ -1037,6 +1051,43 @@ _enableTriggersIfNecessary(ArchiveHandle *AH, TocEntry *te)
}
/*
+ * Detect whether a TABLE DATA TOC item is performing "load via partition
+ * root", that is the target table is an ancestor partition rather than the
+ * table the TOC item is nominally for.
+ *
+ * In newer archive files this can be detected by checking for a special
+ * comment placed in te->defn. In older files we have to fall back to seeing
+ * if the COPY statement targets the named table or some other one. This
+ * will not work for data dumped as INSERT commands, so we could give a false
+ * negative in that case; fortunately, that's a rarely-used option.
+ */
+static bool
+is_load_via_partition_root(TocEntry *te)
+{
+ if (te->defn &&
+ strncmp(te->defn, "-- load via partition root ", 27) == 0)
+ return true;
+ if (te->copyStmt && *te->copyStmt)
+ {
+ PQExpBuffer copyStmt = createPQExpBuffer();
+ bool result;
+
+ /*
+ * Build the initial part of the COPY as it would appear if the
+ * nominal target table is the actual target. If we see anything
+ * else, it must be a load-via-partition-root case.
+ */
+ appendPQExpBuffer(copyStmt, "COPY %s ",
+ fmtQualifiedId(te->namespace, te->tag));
+ result = strncmp(te->copyStmt, copyStmt->data, copyStmt->len) != 0;
+ destroyPQExpBuffer(copyStmt);
+ return result;
+ }
+ /* Assume it's not load-via-partition-root */
+ return false;
+}
+
+/*
* This is a routine that is part of the dumper interface, hence the 'Archive*' parameter.
*/
@@ -2955,8 +3006,12 @@ _tocEntryRequired(TocEntry *te, teSection curSection, ArchiveHandle *AH)
res = res & ~REQ_DATA;
}
- /* If there's no definition command, there's no schema component */
- if (!te->defn || !te->defn[0])
+ /*
+ * If there's no definition command, there's no schema component. Treat
+ * "load via partition root" comments as not schema.
+ */
+ if (!te->defn || !te->defn[0] ||
+ strncmp(te->defn, "-- load via partition root ", 27) == 0)
res = res & ~REQ_SCHEMA;
/*
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 2e068c6620e..c94aedc145b 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -325,6 +325,7 @@ static void appendReloptionsArrayAH(PQExpBuffer buffer, const char *reloptions,
static char *get_synchronized_snapshot(Archive *fout);
static void setupDumpWorker(Archive *AH);
static TableInfo *getRootTableInfo(const TableInfo *tbinfo);
+static bool forcePartitionRootLoad(const TableInfo *tbinfo);
int
@@ -2287,11 +2288,13 @@ dumpTableData_insert(Archive *fout, const void *dcontext)
insertStmt = createPQExpBuffer();
/*
- * When load-via-partition-root is set, get the root table name
- * for the partition table, so that we can reload data through the
- * root table.
+ * When load-via-partition-root is set or forced, get the root
+ * table name for the partition table, so that we can reload data
+ * through the root table.
*/
- if (dopt->load_via_partition_root && tbinfo->ispartition)
+ if (tbinfo->ispartition &&
+ (dopt->load_via_partition_root ||
+ forcePartitionRootLoad(tbinfo)))
targettab = getRootTableInfo(tbinfo);
else
targettab = tbinfo;
@@ -2490,6 +2493,35 @@ getRootTableInfo(const TableInfo *tbinfo)
}
/*
+ * forcePartitionRootLoad
+ * Check if we must force load_via_partition_root for this partition.
+ *
+ * This is required if any level of ancestral partitioned table has an
+ * unsafe partitioning scheme.
+ */
+static bool
+forcePartitionRootLoad(const TableInfo *tbinfo)
+{
+ TableInfo *parentTbinfo;
+
+ Assert(tbinfo->ispartition);
+ Assert(tbinfo->numParents == 1);
+
+ parentTbinfo = tbinfo->parents[0];
+ if (parentTbinfo->unsafe_partitions)
+ return true;
+ while (parentTbinfo->ispartition)
+ {
+ Assert(parentTbinfo->numParents == 1);
+ parentTbinfo = parentTbinfo->parents[0];
+ if (parentTbinfo->unsafe_partitions)
+ return true;
+ }
+
+ return false;
+}
+
+/*
* dumpTableData -
* dump the contents of a single table
*
@@ -2503,34 +2535,40 @@ dumpTableData(Archive *fout, const TableDataInfo *tdinfo)
PQExpBuffer copyBuf = createPQExpBuffer();
PQExpBuffer clistBuf = createPQExpBuffer();
DataDumperPtr dumpFn;
+ char *tdDefn = NULL;
char *copyStmt;
const char *copyFrom;
/* We had better have loaded per-column details about this table */
Assert(tbinfo->interesting);
+ /*
+ * When load-via-partition-root is set or forced, get the root table name
+ * for the partition table, so that we can reload data through the root
+ * table. Then construct a comment to be inserted into the TOC entry's
+ * defn field, so that such cases can be identified reliably.
+ */
+ if (tbinfo->ispartition &&
+ (dopt->load_via_partition_root ||
+ forcePartitionRootLoad(tbinfo)))
+ {
+ TableInfo *parentTbinfo;
+
+ parentTbinfo = getRootTableInfo(tbinfo);
+ copyFrom = fmtQualifiedDumpable(parentTbinfo);
+ printfPQExpBuffer(copyBuf, "-- load via partition root %s",
+ copyFrom);
+ tdDefn = pg_strdup(copyBuf->data);
+ }
+ else
+ copyFrom = fmtQualifiedDumpable(tbinfo);
+
if (dopt->dump_inserts == 0)
{
/* Dump/restore using COPY */
dumpFn = dumpTableData_copy;
-
- /*
- * When load-via-partition-root is set, get the root table name for
- * the partition table, so that we can reload data through the root
- * table.
- */
- if (dopt->load_via_partition_root && tbinfo->ispartition)
- {
- TableInfo *parentTbinfo;
-
- parentTbinfo = getRootTableInfo(tbinfo);
- copyFrom = fmtQualifiedDumpable(parentTbinfo);
- }
- else
- copyFrom = fmtQualifiedDumpable(tbinfo);
-
/* must use 2 steps here 'cause fmtId is nonreentrant */
- appendPQExpBuffer(copyBuf, "COPY %s ",
+ printfPQExpBuffer(copyBuf, "COPY %s ",
copyFrom);
appendPQExpBuffer(copyBuf, "%s FROM stdin;\n",
fmtCopyColumnList(tbinfo, clistBuf));
@@ -2558,6 +2596,7 @@ dumpTableData(Archive *fout, const TableDataInfo *tdinfo)
.owner = tbinfo->rolname,
.description = "TABLE DATA",
.section = SECTION_DATA,
+ .createStmt = tdDefn,
.copyStmt = copyStmt,
.deps = &(tbinfo->dobj.dumpId),
.nDeps = 1,
@@ -6820,6 +6859,76 @@ getInherits(Archive *fout, int *numInherits)
}
/*
+ * getPartitioningInfo
+ * get information about partitioning
+ *
+ * For the most part, we only collect partitioning info about tables we
+ * intend to dump. However, this function has to consider all partitioned
+ * tables in the database, because we need to know about parents of partitions
+ * we are going to dump even if the parents themselves won't be dumped.
+ *
+ * Specifically, what we need to know is whether each partitioned table
+ * has an "unsafe" partitioning scheme that requires us to force
+ * load-via-partition-root mode for its children. Currently the only case
+ * for which we force that is hash partitioning on enum columns, since the
+ * hash codes depend on enum value OIDs which won't be replicated across
+ * dump-and-reload. There are other cases in which load-via-partition-root
+ * might be necessary, but we expect users to cope with them.
+ */
+void
+getPartitioningInfo(Archive *fout)
+{
+ PQExpBuffer query;
+ PGresult *res;
+ int ntups;
+
+ /* hash partitioning didn't exist before v11 */
+ if (fout->remoteVersion < 110000)
+ return;
+ /* needn't bother if schema-only dump */
+ if (fout->dopt->schemaOnly)
+ return;
+
+ query = createPQExpBuffer();
+
+ /*
+ * Unsafe partitioning schemes are exactly those for which hash enum_ops
+ * appears among the partition opclasses. We needn't check partstrat.
+ *
+ * Note that this query may well retrieve info about tables we aren't
+ * going to dump and hence have no lock on. That's okay since we need not
+ * invoke any unsafe server-side functions.
+ */
+ appendPQExpBufferStr(query,
+ "SELECT partrelid FROM pg_partitioned_table WHERE\n"
+ "(SELECT c.oid FROM pg_opclass c JOIN pg_am a "
+ "ON c.opcmethod = a.oid\n"
+ "WHERE opcname = 'enum_ops' "
+ "AND opcnamespace = 'pg_catalog'::regnamespace "
+ "AND amname = 'hash') = ANY(partclass)");
+
+ res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
+
+ ntups = PQntuples(res);
+
+ for (int i = 0; i < ntups; i++)
+ {
+ Oid tabrelid = atooid(PQgetvalue(res, i, 0));
+ TableInfo *tbinfo;
+
+ tbinfo = findTableByOid(tabrelid);
+ if (tbinfo == NULL)
+ pg_fatal("failed sanity check, table OID %u appearing in pg_partitioned_table not found",
+ tabrelid);
+ tbinfo->unsafe_partitions = true;
+ }
+
+ PQclear(res);
+
+ destroyPQExpBuffer(query);
+}
+
+/*
* getIndexes
* get information about every index on a dumpable table
*
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index cdca0b993df..ffa37265c82 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -318,6 +318,7 @@ typedef struct _tableInfo
bool dummy_view; /* view's real definition must be postponed */
bool postponed_def; /* matview must be postponed into post-data */
bool ispartition; /* is table a partition? */
+ bool unsafe_partitions; /* is it an unsafe partitioned table? */
/*
* These fields are computed only if we decide the table is interesting
@@ -714,6 +715,7 @@ extern ConvInfo *getConversions(Archive *fout, int *numConversions);
extern TableInfo *getTables(Archive *fout, int *numTables);
extern void getOwnedSeqs(Archive *fout, TableInfo tblinfo[], int numTables);
extern InhInfo *getInherits(Archive *fout, int *numInherits);
+extern void getPartitioningInfo(Archive *fout);
extern void getIndexes(Archive *fout, TableInfo tblinfo[], int numTables);
extern void getExtendedStatistics(Archive *fout);
extern void getConstraints(Archive *fout, TableInfo tblinfo[], int numTables);
diff --git a/src/bin/pg_dump/t/004_pg_dump_parallel.pl b/src/bin/pg_dump/t/004_pg_dump_parallel.pl
new file mode 100644
index 00000000000..f41c2fa2233
--- /dev/null
+++ b/src/bin/pg_dump/t/004_pg_dump_parallel.pl
@@ -0,0 +1,81 @@
+
+# Copyright (c) 2021-2023, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $dbname1 = 'regression_src';
+my $dbname2 = 'regression_dest1';
+my $dbname3 = 'regression_dest2';
+
+my $node = PostgreSQL::Test::Cluster->new('main');
+$node->init;
+$node->start;
+
+my $backupdir = $node->backup_dir;
+
+$node->run_log([ 'createdb', $dbname1 ]);
+$node->run_log([ 'createdb', $dbname2 ]);
+$node->run_log([ 'createdb', $dbname3 ]);
+
+$node->safe_psql(
+ $dbname1,
+ qq{
+create type digit as enum ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9');
+
+-- plain table with index
+create table tplain (en digit, data int unique);
+insert into tplain select (x%10)::text::digit, x from generate_series(1,1000) x;
+
+-- non-troublesome hashed partitioning
+create table ths (mod int, data int, unique(mod, data)) partition by hash(mod);
+create table ths_p1 partition of ths for values with (modulus 3, remainder 0);
+create table ths_p2 partition of ths for values with (modulus 3, remainder 1);
+create table ths_p3 partition of ths for values with (modulus 3, remainder 2);
+insert into ths select (x%10), x from generate_series(1,1000) x;
+
+-- dangerous hashed partitioning
+create table tht (en digit, data int, unique(en, data)) partition by hash(en);
+create table tht_p1 partition of tht for values with (modulus 3, remainder 0);
+create table tht_p2 partition of tht for values with (modulus 3, remainder 1);
+create table tht_p3 partition of tht for values with (modulus 3, remainder 2);
+insert into tht select (x%10)::text::digit, x from generate_series(1,1000) x;
+ });
+
+$node->command_ok(
+ [
+ 'pg_dump', '-Fd', '--no-sync', '-j2', '-f', "$backupdir/dump1",
+ $node->connstr($dbname1)
+ ],
+ 'parallel dump');
+
+$node->command_ok(
+ [
+ 'pg_restore', '-v',
+ '-d', $node->connstr($dbname2),
+ '-j3', "$backupdir/dump1"
+ ],
+ 'parallel restore');
+
+$node->command_ok(
+ [
+ 'pg_dump', '-Fd',
+ '--no-sync', '-j2',
+ '-f', "$backupdir/dump2",
+ '--inserts', $node->connstr($dbname1)
+ ],
+ 'parallel dump as inserts');
+
+$node->command_ok(
+ [
+ 'pg_restore', '-v',
+ '-d', $node->connstr($dbname3),
+ '-j3', "$backupdir/dump2"
+ ],
+ 'parallel restore as inserts');
+
+done_testing();