aboutsummaryrefslogtreecommitdiff
path: root/src/backend/commands/copyfromparse.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2024-09-30 17:57:12 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2024-09-30 17:57:12 -0400
commit7702337489810f645b3501d99215c2b525c5abca (patch)
tree6f7491e11685972ddb619064e31efa2538ab8436 /src/backend/commands/copyfromparse.c
parenta19f83f87966f763991cc76404f8e42a36e7e842 (diff)
downloadpostgresql-7702337489810f645b3501d99215c2b525c5abca.tar.gz
postgresql-7702337489810f645b3501d99215c2b525c5abca.zip
Do not treat \. as an EOF marker in CSV mode for COPY IN.
Since backslash is (typically) not special in CSV data, we should not be treating \. as special either. The server historically did this to keep CSV and TEXT modes more alike and to support V2 protocol; but V2 protocol is long dead, and the inconsistency with CSV standards is annoying. Remove that behavior in CopyReadLineText, and make some minor consequent code simplifications. On the client side, we need to fix psql so that it does not check for \. except when reading data from STDIN (that is, the script source). We must do that regardless of TEXT/CSV mode or there is no way to end the COPY short of script EOF. Also, be careful not to send the \. to the server in that case. This is a small compatibility break in that other applications beside psql may need similar adjustment. Also, using an older version of psql with a v18 server may result in misbehavior during CSV-mode COPY IN. Daniel Vérité, reviewed by vignesh C, Robert Haas, and myself Discussion: https://postgr.es/m/ed659f37-a9dd-42a7-82b9-0da562cc4006@manitou-mail.org
Diffstat (limited to 'src/backend/commands/copyfromparse.c')
-rw-r--r--src/backend/commands/copyfromparse.c80
1 files changed, 19 insertions, 61 deletions
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index 97a4c387a30..a280efe23f9 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -136,14 +136,6 @@ if (1) \
} \
} else ((void) 0)
-/* Undo any read-ahead and jump out of the block. */
-#define NO_END_OF_COPY_GOTO \
-if (1) \
-{ \
- input_buf_ptr = prev_raw_ptr + 1; \
- goto not_end_of_copy; \
-} else ((void) 0)
-
/* NOTE: there's a copy of this in copyto.c */
static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
@@ -1182,7 +1174,6 @@ CopyReadLineText(CopyFromState cstate)
bool result = false;
/* CSV variables */
- bool first_char_in_line = true;
bool in_quote = false,
last_was_esc = false;
char quotec = '\0';
@@ -1268,12 +1259,12 @@ CopyReadLineText(CopyFromState cstate)
if (cstate->opts.csv_mode)
{
/*
- * If character is '\\' or '\r', we may need to look ahead below.
- * Force fetch of the next character if we don't already have it.
- * We need to do this before changing CSV state, in case one of
- * these characters is also the quote or escape character.
+ * If character is '\r', we may need to look ahead below. Force
+ * fetch of the next character if we don't already have it. We
+ * need to do this before changing CSV state, in case '\r' is also
+ * the quote or escape character.
*/
- if (c == '\\' || c == '\r')
+ if (c == '\r')
{
IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
}
@@ -1377,10 +1368,10 @@ CopyReadLineText(CopyFromState cstate)
}
/*
- * In CSV mode, we only recognize \. alone on a line. This is because
- * \. is a valid CSV data value.
+ * Process backslash, except in CSV mode where backslash is a normal
+ * character.
*/
- if (c == '\\' && (!cstate->opts.csv_mode || first_char_in_line))
+ if (c == '\\' && !cstate->opts.csv_mode)
{
char c2;
@@ -1398,12 +1389,6 @@ CopyReadLineText(CopyFromState cstate)
if (c2 == '.')
{
input_buf_ptr++; /* consume the '.' */
-
- /*
- * Note: if we loop back for more data here, it does not
- * matter that the CSV state change checks are re-executed; we
- * will come back here with no important state changed.
- */
if (cstate->eol_type == EOL_CRNL)
{
/* Get the next character */
@@ -1412,23 +1397,13 @@ CopyReadLineText(CopyFromState cstate)
c2 = copy_input_buf[input_buf_ptr++];
if (c2 == '\n')
- {
- if (!cstate->opts.csv_mode)
- ereport(ERROR,
- (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- errmsg("end-of-copy marker does not match previous newline style")));
- else
- NO_END_OF_COPY_GOTO;
- }
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("end-of-copy marker does not match previous newline style")));
else if (c2 != '\r')
- {
- if (!cstate->opts.csv_mode)
- ereport(ERROR,
- (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- errmsg("end-of-copy marker corrupt")));
- else
- NO_END_OF_COPY_GOTO;
- }
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("end-of-copy marker corrupt")));
}
/* Get the next character */
@@ -1437,14 +1412,9 @@ CopyReadLineText(CopyFromState cstate)
c2 = copy_input_buf[input_buf_ptr++];
if (c2 != '\r' && c2 != '\n')
- {
- if (!cstate->opts.csv_mode)
- ereport(ERROR,
- (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- errmsg("end-of-copy marker corrupt")));
- else
- NO_END_OF_COPY_GOTO;
- }
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("end-of-copy marker corrupt")));
if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
(cstate->eol_type == EOL_CRNL && c2 != '\n') ||
@@ -1467,7 +1437,7 @@ CopyReadLineText(CopyFromState cstate)
result = true; /* report EOF */
break;
}
- else if (!cstate->opts.csv_mode)
+ else
{
/*
* If we are here, it means we found a backslash followed by
@@ -1475,23 +1445,11 @@ CopyReadLineText(CopyFromState cstate)
* after a backslash is special, so we skip over that second
* character too. If we didn't do that \\. would be
* considered an eof-of copy, while in non-CSV mode it is a
- * literal backslash followed by a period. In CSV mode,
- * backslashes are not special, so we want to process the
- * character after the backslash just like a normal character,
- * so we don't increment in those cases.
+ * literal backslash followed by a period.
*/
input_buf_ptr++;
}
}
-
- /*
- * This label is for CSV cases where \. appears at the start of a
- * line, but there is more text after it, meaning it was a data value.
- * We are more strict for \. in CSV mode because \. could be a data
- * value, while in non-CSV mode, \. cannot be a data value.
- */
-not_end_of_copy:
- first_char_in_line = false;
} /* end of outer loop */
/*