aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Dunstan <andrew@dunslane.net>2007-09-12 20:49:27 +0000
committerAndrew Dunstan <andrew@dunslane.net>2007-09-12 20:49:27 +0000
commiteb0a7735ba1ede6a35b80d73f6c371a8b1220552 (patch)
treeb27786b01e95974689542ebe76a5daa8e2a3b9d3
parent22b613ebd9dc0b22820662127110ccc90e333bbe (diff)
downloadpostgresql-eb0a7735ba1ede6a35b80d73f6c371a8b1220552.tar.gz
postgresql-eb0a7735ba1ede6a35b80d73f6c371a8b1220552.zip
Perform post-escaping encoding validity checks on SQL literals and COPY input
so that invalidly encoded data cannot enter the database by these means.
-rw-r--r--src/backend/commands/copy.c19
-rw-r--r--src/backend/parser/scan.l22
2 files changed, 38 insertions, 3 deletions
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index d28a6ad11c2..fdfe5ea965f 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.286 2007/09/07 20:59:26 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.287 2007/09/12 20:49:27 adunstan Exp $
*
*-------------------------------------------------------------------------
*/
@@ -2685,6 +2685,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
char *start_ptr;
char *end_ptr;
int input_len;
+ bool saw_high_bit = false;
/* Make sure space remains in fieldvals[] */
if (fieldno >= maxfields)
@@ -2749,6 +2750,8 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
}
}
c = val & 0377;
+ if (IS_HIGHBIT_SET(c))
+ saw_high_bit = true;
}
break;
case 'x':
@@ -2772,6 +2775,8 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
}
}
c = val & 0xff;
+ if (IS_HIGHBIT_SET(c))
+ saw_high_bit = true;
}
}
break;
@@ -2799,7 +2804,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
* literally
*/
}
- }
+ }
/* Add c to output string */
*output_ptr++ = c;
@@ -2808,6 +2813,16 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
/* Terminate attribute value in output area */
*output_ptr++ = '\0';
+ /* If we de-escaped a char with the high bit set, make sure
+ * we still have valid data for the db encoding. Avoid calling strlen
+ * here for the sake of efficiency.
+ */
+ if (saw_high_bit)
+ {
+ char *fld = fieldvals[fieldno];
+ pg_verifymbstr(fld, output_ptr - (fld + 1), false);
+ }
+
/* Check whether raw input matched null marker */
input_len = end_ptr - start_ptr;
if (input_len == cstate->null_print_len &&
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index baa59922771..a138a66131a 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -24,7 +24,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.140 2007/08/12 20:18:06 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.141 2007/09/12 20:49:27 adunstan Exp $
*
*-------------------------------------------------------------------------
*/
@@ -60,6 +60,7 @@ bool escape_string_warning = true;
bool standard_conforming_strings = false;
static bool warn_on_first_escape;
+static bool saw_high_bit = false;
/*
* literalbuf is used to accumulate literal values when multiple rules
@@ -426,6 +427,7 @@ other .
{xqstart} {
warn_on_first_escape = true;
+ saw_high_bit = false;
SET_YYLLOC();
if (standard_conforming_strings)
BEGIN(xq);
@@ -435,6 +437,7 @@ other .
}
{xestart} {
warn_on_first_escape = false;
+ saw_high_bit = false;
SET_YYLLOC();
BEGIN(xe);
startlit();
@@ -443,6 +446,11 @@ other .
<xq,xe>{quotefail} {
yyless(1);
BEGIN(INITIAL);
+ /* check that the data remains valid if it might have been
+ * made invalid by unescaping any chars.
+ */
+ if (saw_high_bit)
+ pg_verifymbstr(literalbuf, literallen, false);
yylval.str = litbufdup();
return SCONST;
}
@@ -475,12 +483,16 @@ other .
check_escape_warning();
addlitchar(c);
+ if (IS_HIGHBIT_SET(c))
+ saw_high_bit = true;
}
<xe>{xehexesc} {
unsigned char c = strtoul(yytext+2, NULL, 16);
check_escape_warning();
addlitchar(c);
+ if (IS_HIGHBIT_SET(c))
+ saw_high_bit = true;
}
<xq,xe>{quotecontinue} {
/* ignore */
@@ -892,6 +904,14 @@ litbufdup(void)
static unsigned char
unescape_single_char(unsigned char c)
{
+ /* Normally we wouldn't expect to see \n where n has its high bit set
+ * but we set the flag to check the string if we do get it, so
+ * that this doesn't become a way of getting around the coding validity
+ * checks.
+ */
+ if (IS_HIGHBIT_SET(c))
+ saw_high_bit = true;
+
switch (c)
{
case 'b':