diff options
author | Peter Eisentraut <peter@eisentraut.org> | 2022-03-30 08:56:58 +0200 |
---|---|---|
committer | Peter Eisentraut <peter@eisentraut.org> | 2022-03-30 09:02:31 +0200 |
commit | 072132f04e55c1c3b0f1a582318da78de7334379 (patch) | |
tree | d855c0b1716968cd26966e34f41f77de0c0d0af6 /src/backend/commands/copyfromparse.c | |
parent | edcedcc2c7bb8390858bbccda9637318598f2473 (diff) | |
download | postgresql-072132f04e55c1c3b0f1a582318da78de7334379.tar.gz postgresql-072132f04e55c1c3b0f1a582318da78de7334379.zip |
Add header matching mode to COPY FROM
COPY FROM supports the HEADER option to silently discard the header
line from a CSV or text file. It is possible to load by mistake a
file that matches the expected format, for example, if two text
columns have been swapped, resulting in garbage in the database.
This adds a new option value HEADER MATCH that checks the column names
in the header line against the actual column names and errors out if
they do not match.
Author: Rémi Lapeyre <remi.lapeyre@lenstra.fr>
Reviewed-by: Daniel Verite <daniel@manitou-mail.org>
Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com>
Discussion: https://www.postgresql.org/message-id/flat/CAF1-J-0PtCWMeLtswwGV2M70U26n4g33gpe1rcKQqe6wVQDrFA@mail.gmail.com
Diffstat (limited to 'src/backend/commands/copyfromparse.c')
-rw-r--r-- | src/backend/commands/copyfromparse.c | 53 |
1 files changed, 50 insertions, 3 deletions
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index baf328b620c..58017ec53b0 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -72,6 +72,7 @@ #include "miscadmin.h" #include "pgstat.h" #include "port/pg_bswap.h" +#include "utils/builtins.h" #include "utils/memutils.h" #include "utils/rel.h" @@ -758,12 +759,58 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields) /* only available for text or csv input */ Assert(!cstate->opts.binary); - /* on input just throw the header line away */ + /* on input check that the header line is correct if needed */ if (cstate->cur_lineno == 0 && cstate->opts.header_line) { + ListCell *cur; + TupleDesc tupDesc; + + tupDesc = RelationGetDescr(cstate->rel); + cstate->cur_lineno++; - if (CopyReadLine(cstate)) - return false; /* done */ + done = CopyReadLine(cstate); + + if (cstate->opts.header_line == COPY_HEADER_MATCH) + { + int fldnum; + + if (cstate->opts.csv_mode) + fldct = CopyReadAttributesCSV(cstate); + else + fldct = CopyReadAttributesText(cstate); + + if (fldct != list_length(cstate->attnumlist)) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("wrong number of fields in header line: field count is %d, expected %d", + fldct, list_length(cstate->attnumlist)))); + + fldnum = 0; + foreach(cur, cstate->attnumlist) + { + int attnum = lfirst_int(cur); + char *colName = cstate->raw_fields[attnum - 1]; + Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1); + + fldnum++; + + if (colName == NULL) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"", + fldnum, cstate->opts.null_print, NameStr(attr->attname)))); + + if (namestrcmp(&attr->attname, colName) != 0) { + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"", + fldnum, colName, NameStr(attr->attname)))); + } + } + } + + if (done) + return false; } cstate->cur_lineno++; |