aboutsummaryrefslogtreecommitdiff
path: root/src/backend/commands/copyfromparse.c
diff options
context:
space:
mode:
authorPeter Eisentraut <peter@eisentraut.org>2022-03-30 08:56:58 +0200
committerPeter Eisentraut <peter@eisentraut.org>2022-03-30 09:02:31 +0200
commit072132f04e55c1c3b0f1a582318da78de7334379 (patch)
treed855c0b1716968cd26966e34f41f77de0c0d0af6 /src/backend/commands/copyfromparse.c
parentedcedcc2c7bb8390858bbccda9637318598f2473 (diff)
downloadpostgresql-072132f04e55c1c3b0f1a582318da78de7334379.tar.gz
postgresql-072132f04e55c1c3b0f1a582318da78de7334379.zip
Add header matching mode to COPY FROM
COPY FROM supports the HEADER option to silently discard the header line from a CSV or text file. It is possible to load by mistake a file that matches the expected format, for example, if two text columns have been swapped, resulting in garbage in the database. This adds a new option value HEADER MATCH that checks the column names in the header line against the actual column names and errors out if they do not match. Author: Rémi Lapeyre <remi.lapeyre@lenstra.fr> Reviewed-by: Daniel Verite <daniel@manitou-mail.org> Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com> Discussion: https://www.postgresql.org/message-id/flat/CAF1-J-0PtCWMeLtswwGV2M70U26n4g33gpe1rcKQqe6wVQDrFA@mail.gmail.com
Diffstat (limited to 'src/backend/commands/copyfromparse.c')
-rw-r--r--src/backend/commands/copyfromparse.c53
1 files changed, 50 insertions, 3 deletions
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index baf328b620c..58017ec53b0 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -72,6 +72,7 @@
#include "miscadmin.h"
#include "pgstat.h"
#include "port/pg_bswap.h"
+#include "utils/builtins.h"
#include "utils/memutils.h"
#include "utils/rel.h"
@@ -758,12 +759,58 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
/* only available for text or csv input */
Assert(!cstate->opts.binary);
- /* on input just throw the header line away */
+ /* on input check that the header line is correct if needed */
if (cstate->cur_lineno == 0 && cstate->opts.header_line)
{
+ ListCell *cur;
+ TupleDesc tupDesc;
+
+ tupDesc = RelationGetDescr(cstate->rel);
+
cstate->cur_lineno++;
- if (CopyReadLine(cstate))
- return false; /* done */
+ done = CopyReadLine(cstate);
+
+ if (cstate->opts.header_line == COPY_HEADER_MATCH)
+ {
+ int fldnum;
+
+ if (cstate->opts.csv_mode)
+ fldct = CopyReadAttributesCSV(cstate);
+ else
+ fldct = CopyReadAttributesText(cstate);
+
+ if (fldct != list_length(cstate->attnumlist))
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("wrong number of fields in header line: field count is %d, expected %d",
+ fldct, list_length(cstate->attnumlist))));
+
+ fldnum = 0;
+ foreach(cur, cstate->attnumlist)
+ {
+ int attnum = lfirst_int(cur);
+ char *colName = cstate->raw_fields[attnum - 1];
+ Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
+
+ fldnum++;
+
+ if (colName == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
+ fldnum, cstate->opts.null_print, NameStr(attr->attname))));
+
+ if (namestrcmp(&attr->attname, colName) != 0) {
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
+ fldnum, colName, NameStr(attr->attname))));
+ }
+ }
+ }
+
+ if (done)
+ return false;
}
cstate->cur_lineno++;