aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/commands/copy.c60
-rw-r--r--src/backend/parser/gram.y4
-rw-r--r--src/backend/utils/mb/mbutils.c46
-rw-r--r--src/include/mb/pg_wchar.h2
-rw-r--r--src/test/regress/expected/copy2.out6
-rw-r--r--src/test/regress/sql/copy2.sql6
6 files changed, 90 insertions, 34 deletions
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 294450ef660..cac11a6c641 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -95,8 +95,8 @@ typedef struct CopyStateData
* dest == COPY_NEW_FE in COPY FROM */
bool fe_eof; /* true if detected end of copy data */
EolType eol_type; /* EOL type of input */
- int client_encoding; /* remote side's character encoding */
- bool need_transcoding; /* client encoding diff from server? */
+ int file_encoding; /* file or remote side's character encoding */
+ bool need_transcoding; /* file encoding diff from server? */
bool encoding_embeds_ascii; /* ASCII can be non-first byte? */
/* parameters from the COPY command */
@@ -110,7 +110,7 @@ typedef struct CopyStateData
bool header_line; /* CSV header line? */
char *null_print; /* NULL marker string (server encoding!) */
int null_print_len; /* length of same */
- char *null_print_client; /* same converted to client encoding */
+ char *null_print_client; /* same converted to file encoding */
char *delim; /* column delimiter (must be 1 byte) */
char *quote; /* CSV quote char (must be 1 byte) */
char *escape; /* CSV escape char (must be 1 byte) */
@@ -845,6 +845,8 @@ ProcessCopyOptions(CopyState cstate,
if (cstate == NULL)
cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
+ cstate->file_encoding = -1;
+
/* Extract options from the statement node tree */
foreach(option, options)
{
@@ -948,6 +950,19 @@ ProcessCopyOptions(CopyState cstate,
errmsg("argument to option \"%s\" must be a list of column names",
defel->defname)));
}
+ else if (strcmp(defel->defname, "encoding") == 0)
+ {
+ if (cstate->file_encoding >= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting or redundant options")));
+ cstate->file_encoding = pg_char_to_encoding(defGetString(defel));
+ if (cstate->file_encoding < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("argument to option \"%s\" must be a valid encoding name",
+ defel->defname)));
+ }
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -1278,17 +1293,20 @@ BeginCopy(bool is_from,
}
}
+ /* Use client encoding when ENCODING option is not specified. */
+ if (cstate->file_encoding < 0)
+ cstate->file_encoding = pg_get_client_encoding();
+
/*
- * Set up encoding conversion info. Even if the client and server
- * encodings are the same, we must apply pg_client_to_server() to validate
+ * Set up encoding conversion info. Even if the file and server
+ * encodings are the same, we must apply pg_any_to_server() to validate
* data in multibyte encodings.
*/
- cstate->client_encoding = pg_get_client_encoding();
cstate->need_transcoding =
- (cstate->client_encoding != GetDatabaseEncoding() ||
+ (cstate->file_encoding != GetDatabaseEncoding() ||
pg_database_encoding_max_length() > 1);
/* See Multibyte encoding comment above */
- cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->client_encoding);
+ cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding);
cstate->copy_dest = COPY_FILE; /* default */
@@ -1526,12 +1544,13 @@ CopyTo(CopyState cstate)
else
{
/*
- * For non-binary copy, we need to convert null_print to client
+ * For non-binary copy, we need to convert null_print to file
* encoding, because it will be sent directly with CopySendString.
*/
if (cstate->need_transcoding)
- cstate->null_print_client = pg_server_to_client(cstate->null_print,
- cstate->null_print_len);
+ cstate->null_print_client = pg_server_to_any(cstate->null_print,
+ cstate->null_print_len,
+ cstate->file_encoding);
/* if a header has been requested send the line */
if (cstate->header_line)
@@ -2608,8 +2627,9 @@ CopyReadLine(CopyState cstate)
{
char *cvt;
- cvt = pg_client_to_server(cstate->line_buf.data,
- cstate->line_buf.len);
+ cvt = pg_any_to_server(cstate->line_buf.data,
+ cstate->line_buf.len,
+ cstate->file_encoding);
if (cvt != cstate->line_buf.data)
{
/* transfer converted data back to line_buf */
@@ -2854,7 +2874,7 @@ CopyReadLineText(CopyState cstate)
/* -----
* get next character
* Note: we do not change c so if it isn't \., we can fall
- * through and continue processing for client encoding.
+ * through and continue processing for file encoding.
* -----
*/
c2 = copy_raw_buf[raw_buf_ptr];
@@ -2968,7 +2988,7 @@ not_end_of_copy:
mblen_str[0] = c;
/* All our encodings only read the first byte to get the length */
- mblen = pg_encoding_mblen(cstate->client_encoding, mblen_str);
+ mblen = pg_encoding_mblen(cstate->file_encoding, mblen_str);
IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(mblen - 1);
IF_NEED_REFILL_AND_EOF_BREAK(mblen - 1);
raw_buf_ptr += mblen - 1;
@@ -3467,7 +3487,7 @@ CopyAttributeOutText(CopyState cstate, char *string)
char delimc = cstate->delim[0];
if (cstate->need_transcoding)
- ptr = pg_server_to_client(string, strlen(string));
+ ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
else
ptr = string;
@@ -3540,7 +3560,7 @@ CopyAttributeOutText(CopyState cstate, char *string)
start = ptr++; /* we include char in next run */
}
else if (IS_HIGHBIT_SET(c))
- ptr += pg_encoding_mblen(cstate->client_encoding, ptr);
+ ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
else
ptr++;
}
@@ -3627,7 +3647,7 @@ CopyAttributeOutCSV(CopyState cstate, char *string,
use_quote = true;
if (cstate->need_transcoding)
- ptr = pg_server_to_client(string, strlen(string));
+ ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
else
ptr = string;
@@ -3654,7 +3674,7 @@ CopyAttributeOutCSV(CopyState cstate, char *string,
break;
}
if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
- tptr += pg_encoding_mblen(cstate->client_encoding, tptr);
+ tptr += pg_encoding_mblen(cstate->file_encoding, tptr);
else
tptr++;
}
@@ -3678,7 +3698,7 @@ CopyAttributeOutCSV(CopyState cstate, char *string,
start = ptr; /* we include char in next run */
}
if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
- ptr += pg_encoding_mblen(cstate->client_encoding, ptr);
+ ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
else
ptr++;
}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index c6811a11bd1..cbfacec4495 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -2236,6 +2236,10 @@ copy_opt_item:
{
$$ = makeDefElem("force_not_null", (Node *)$4);
}
+ | ENCODING Sconst
+ {
+ $$ = makeDefElem("encoding", (Node *)makeString($2));
+ }
;
/* The following exist for backward compatibility with very old versions */
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 5ee74f747d0..b8a2728e4f5 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -497,14 +497,25 @@ pg_encoding_max_length_sql(PG_FUNCTION_ARGS)
char *
pg_client_to_server(const char *s, int len)
{
+ Assert(ClientEncoding);
+
+ return pg_any_to_server(s, len, ClientEncoding->encoding);
+}
+
+/*
+ * convert any encoding to server encoding.
+ */
+char *
+pg_any_to_server(const char *s, int len, int encoding)
+{
Assert(DatabaseEncoding);
Assert(ClientEncoding);
if (len <= 0)
return (char *) s;
- if (ClientEncoding->encoding == DatabaseEncoding->encoding ||
- ClientEncoding->encoding == PG_SQL_ASCII)
+ if (encoding == DatabaseEncoding->encoding ||
+ encoding == PG_SQL_ASCII)
{
/*
* No conversion is needed, but we must still validate the data.
@@ -524,8 +535,8 @@ pg_client_to_server(const char *s, int len)
* to the parser but we have no way to convert it. We compromise by
* rejecting the data if it contains any non-ASCII characters.
*/
- if (PG_VALID_BE_ENCODING(ClientEncoding->encoding))
- (void) pg_verify_mbstr(ClientEncoding->encoding, s, len, false);
+ if (PG_VALID_BE_ENCODING(encoding))
+ (void) pg_verify_mbstr(encoding, s, len, false);
else
{
int i;
@@ -543,7 +554,11 @@ pg_client_to_server(const char *s, int len)
return (char *) s;
}
- return perform_default_encoding_conversion(s, len, true);
+ if (ClientEncoding->encoding == encoding)
+ return perform_default_encoding_conversion(s, len, true);
+ else
+ return (char *) pg_do_encoding_conversion(
+ (unsigned char *) s, len, encoding, DatabaseEncoding->encoding);
}
/*
@@ -552,18 +567,33 @@ pg_client_to_server(const char *s, int len)
char *
pg_server_to_client(const char *s, int len)
{
+ Assert(ClientEncoding);
+
+ return pg_any_to_server(s, len, ClientEncoding->encoding);
+}
+
+/*
+ * convert server encoding to any encoding.
+ */
+char *
+pg_server_to_any(const char *s, int len, int encoding)
+{
Assert(DatabaseEncoding);
Assert(ClientEncoding);
if (len <= 0)
return (char *) s;
- if (ClientEncoding->encoding == DatabaseEncoding->encoding ||
- ClientEncoding->encoding == PG_SQL_ASCII ||
+ if (encoding == DatabaseEncoding->encoding ||
+ encoding == PG_SQL_ASCII ||
DatabaseEncoding->encoding == PG_SQL_ASCII)
return (char *) s; /* assume data is valid */
- return perform_default_encoding_conversion(s, len, false);
+ if (ClientEncoding->encoding == encoding)
+ return perform_default_encoding_conversion(s, len, false);
+ else
+ return (char *) pg_do_encoding_conversion(
+ (unsigned char *) s, len, DatabaseEncoding->encoding, encoding);
}
/*
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index 565b53b3e6e..85a7b2f87dd 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -420,6 +420,8 @@ extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len,
extern char *pg_client_to_server(const char *s, int len);
extern char *pg_server_to_client(const char *s, int len);
+extern char *pg_any_to_server(const char *s, int len, int encoding);
+extern char *pg_server_to_any(const char *s, int len, int encoding);
extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 15cbe029770..8e2bc0c2504 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -46,10 +46,10 @@ CONTEXT: COPY x, line 1: "2001 231 \N \N"
COPY x from stdin;
ERROR: extra data after last expected column
CONTEXT: COPY x, line 1: "2002 232 40 50 60 70 80"
--- various COPY options: delimiters, oids, NULL string
+-- various COPY options: delimiters, oids, NULL string, encoding
COPY x (b, c, d, e) from stdin with oids delimiter ',' null 'x';
COPY x from stdin WITH DELIMITER AS ';' NULL AS '';
-COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X';
+COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X' ENCODING 'sql_ascii';
-- check results of copy in
SELECT * FROM x;
a | b | c | d | e
@@ -187,7 +187,7 @@ COPY y TO stdout WITH CSV QUOTE '''' DELIMITER '|';
Jackson, Sam|\h
It is "perfect".|
''|
-COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\';
+COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\' ENCODING 'sql_ascii';
"Jackson, Sam","\\h"
"It is \"perfect\"."," "
"",
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index c2e8b037e74..6322c8fba43 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -72,7 +72,7 @@ COPY x from stdin;
2002 232 40 50 60 70 80
\.
--- various COPY options: delimiters, oids, NULL string
+-- various COPY options: delimiters, oids, NULL string, encoding
COPY x (b, c, d, e) from stdin with oids delimiter ',' null 'x';
500000,x,45,80,90
500001,x,\x,\\x,\\\x
@@ -83,7 +83,7 @@ COPY x from stdin WITH DELIMITER AS ';' NULL AS '';
3000;;c;;
\.
-COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X';
+COPY x from stdin WITH DELIMITER AS ':' NULL AS E'\\X' ENCODING 'sql_ascii';
4000:\X:C:\X:\X
4001:1:empty::
4002:2:null:\X:\X
@@ -127,7 +127,7 @@ INSERT INTO y VALUES ('', NULL);
COPY y TO stdout WITH CSV;
COPY y TO stdout WITH CSV QUOTE '''' DELIMITER '|';
-COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\';
+COPY y TO stdout WITH CSV FORCE QUOTE col2 ESCAPE E'\\' ENCODING 'sql_ascii';
COPY y TO stdout WITH CSV FORCE QUOTE *;
-- Repeat above tests with new 9.0 option syntax