aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2005-08-06 20:41:58 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2005-08-06 20:41:58 +0000
commit9b29f9f3f86d0f03f61d8b9e449d5107615a3615 (patch)
treedd0a702ac5ca5c44e29ec2b41e7baf2665da3dba /src
parent28d3ee477176e8e6847736c48e9346c5bcde73d1 (diff)
downloadpostgresql-9b29f9f3f86d0f03f61d8b9e449d5107615a3615.tar.gz
postgresql-9b29f9f3f86d0f03f61d8b9e449d5107615a3615.zip
COPY performance improvements. Avoid calling CopyGetData for each input
character, tighten the inner loops of CopyReadLine and CopyReadAttribute, arrange to parse out all the attributes of a line in just one call instead of one CopyReadAttribute call per attribute, be smarter about which client encodings require slow pg_encoding_mblen() loops. Also, clean up the mishmash of static variables and overly-long parameter lists in favor of passing around a single CopyState struct containing all the state data. Original patch by Alon Goldshuv, reworked by Tom Lane.
Diffstat (limited to 'src')
-rw-r--r--src/backend/commands/copy.c2538
1 files changed, 1501 insertions, 1037 deletions
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index d25189d0ac3..008413cc97f 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.247 2005/07/10 21:13:58 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.248 2005/08/06 20:41:58 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -63,16 +63,6 @@ typedef enum CopyDest
} CopyDest;
/*
- * State indicator showing what stopped CopyReadAttribute()
- */
-typedef enum CopyReadResult
-{
- NORMAL_ATTR,
- END_OF_LINE,
- UNTERMINATED_FIELD
-} CopyReadResult;
-
-/*
* Represents the end-of-line terminator type of the input
*/
typedef enum EolType
@@ -83,92 +73,130 @@ typedef enum EolType
EOL_CRNL
} EolType;
-
-static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
-
/*
- * Static communication variables ... pretty grotty, but COPY has
- * never been reentrant...
+ * This struct contains all the state variables used throughout a COPY
+ * operation. For simplicity, we use the same struct for all variants
+ * of COPY, even though some fields are used in only some cases.
+ *
+ * A word about encoding considerations: encodings that are only supported on
+ * the client side are those where multibyte characters may have second or
+ * later bytes with the high bit not set. When scanning data in such an
+ * encoding to look for a match to a single-byte (ie ASCII) character,
+ * we must use the full pg_encoding_mblen() machinery to skip over
+ * multibyte characters, else we might find a false match to a trailing
+ * byte. In supported server encodings, there is no possibility of
+ * a false match, and it's faster to make useless comparisons to trailing
+ * bytes than it is to invoke pg_encoding_mblen() to skip over them.
+ * client_only_encoding is TRUE when we have to do it the hard way.
*/
-static CopyDest copy_dest;
-static FILE *copy_file; /* used if copy_dest == COPY_FILE */
-static StringInfo copy_msgbuf; /* used if copy_dest == COPY_NEW_FE */
-static bool fe_eof; /* true if detected end of copy data */
-static EolType eol_type; /* EOL type of input */
-static int client_encoding; /* remote side's character encoding */
-static int server_encoding; /* local encoding */
+typedef struct CopyStateData
+{
+ /* low-level state data */
+ CopyDest copy_dest; /* type of copy source/destination */
+ FILE *copy_file; /* used if copy_dest == COPY_FILE */
+ StringInfo fe_msgbuf; /* used if copy_dest == COPY_NEW_FE */
+ bool fe_copy; /* true for all FE copy dests */
+ bool fe_eof; /* true if detected end of copy data */
+ EolType eol_type; /* EOL type of input */
+ int client_encoding; /* remote side's character encoding */
+ bool need_transcoding; /* client encoding diff from server? */
+ bool client_only_encoding; /* encoding not valid on server? */
+
+ /* parameters from the COPY command */
+ Relation rel; /* relation to copy to or from */
+ List *attnumlist; /* integer list of attnums to copy */
+ bool binary; /* binary format? */
+ bool oids; /* include OIDs? */
+ bool csv_mode; /* Comma Separated Value format? */
+ bool header_line; /* CSV header line? */
+ char *null_print; /* NULL marker string (server encoding!) */
+ int null_print_len; /* length of same */
+ char *delim; /* column delimiter (must be 1 byte) */
+ char *quote; /* CSV quote char (must be 1 byte) */
+ char *escape; /* CSV escape char (must be 1 byte) */
+ List *force_quote_atts; /* integer list of attnums to FQ */
+ List *force_notnull_atts; /* integer list of attnums to FNN */
+
+ /* these are just for error messages, see copy_in_error_callback */
+ const char *cur_relname; /* table name for error messages */
+ int cur_lineno; /* line number for error messages */
+ const char *cur_attname; /* current att for error messages */
+ const char *cur_attval; /* current att value for error messages */
-/* these are just for error messages, see copy_in_error_callback */
-static bool copy_binary; /* is it a binary copy? */
-static const char *copy_relname; /* table name for error messages */
-static int copy_lineno; /* line number for error messages */
-static const char *copy_attname; /* current att for error messages */
+ /*
+ * These variables are used to reduce overhead in textual COPY FROM.
+ *
+ * attribute_buf holds the separated, de-escaped text for each field of
+ * the current line. The CopyReadAttributes functions return arrays of
+ * pointers into this buffer. We avoid palloc/pfree overhead by re-using
+ * the buffer on each cycle.
+ */
+ StringInfoData attribute_buf;
+ /*
+ * Similarly, line_buf holds the whole input line being processed.
+ * The input cycle is first to read the whole line into line_buf,
+ * convert it to server encoding there, and then extract the individual
+ * attribute fields into attribute_buf. line_buf is preserved unmodified
+ * so that we can display it in error messages if appropriate.
+ */
+ StringInfoData line_buf;
+ bool line_buf_converted; /* converted to server encoding? */
-/*
- * These static variables are used to avoid incurring overhead for each
- * attribute processed. attribute_buf is reused on each CopyReadAttribute
- * call to hold the string being read in. Under normal use it will soon
- * grow to a suitable size, and then we will avoid palloc/pfree overhead
- * for subsequent attributes. Note that CopyReadAttribute returns a pointer
- * to attribute_buf's data buffer!
- */
-static StringInfoData attribute_buf;
+ /*
+ * Finally, raw_buf holds raw data read from the data source (file or
+ * client connection). CopyReadLine parses this data sufficiently to
+ * locate line boundaries, then transfers the data to line_buf and
+ * converts it. Note: we guarantee that there is a \0 at
+ * raw_buf[raw_buf_len].
+ */
+#define RAW_BUF_SIZE 65536 /* we palloc RAW_BUF_SIZE+1 bytes */
+ char *raw_buf;
+ int raw_buf_index; /* next byte to process */
+ int raw_buf_len; /* total # of bytes stored */
+} CopyStateData;
+
+typedef CopyStateData *CopyState;
+
+
+static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
-/*
- * Similarly, line_buf holds the whole input line being processed (its
- * cursor field points to the next character to be read by CopyReadAttribute).
- * The input cycle is first to read the whole line into line_buf, convert it
- * to server encoding, and then extract individual attribute fields into
- * attribute_buf. (We used to have CopyReadAttribute read the input source
- * directly, but that caused a lot of encoding issues and unnecessary logic
- * complexity.)
- */
-static StringInfoData line_buf;
-static bool line_buf_converted;
/* non-export function prototypes */
-static void DoCopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
- char *delim, char *null_print, bool csv_mode, char *quote,
- char *escape, List *force_quote_atts, bool header_line, bool fe_copy);
-static void CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
- char *delim, char *null_print, bool csv_mode, char *quote, char *escape,
- List *force_quote_atts, bool header_line);
-static void CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
- char *delim, char *null_print, bool csv_mode, char *quote, char *escape,
- List *force_notnull_atts, bool header_line);
-static bool CopyReadLine(char * quote, char * escape);
-static char *CopyReadAttribute(const char *delim, const char *null_print,
- CopyReadResult *result, bool *isnull);
-static char *CopyReadAttributeCSV(const char *delim, const char *null_print,
- char *quote, char *escape,
- CopyReadResult *result, bool *isnull);
-static Datum CopyReadBinaryAttribute(int column_no, FmgrInfo *flinfo,
- Oid typioparam, int32 typmod, bool *isnull);
-static void CopyAttributeOut(char *string, char *delim);
-static void CopyAttributeOutCSV(char *string, char *delim, char *quote,
- char *escape, bool force_quote);
+static void DoCopyTo(CopyState cstate);
+static void CopyTo(CopyState cstate);
+static void CopyFrom(CopyState cstate);
+static bool CopyReadLine(CopyState cstate);
+static bool CopyReadLineText(CopyState cstate);
+static bool CopyReadLineCSV(CopyState cstate);
+static int CopyReadAttributesText(CopyState cstate, int maxfields,
+ char **fieldvals);
+static int CopyReadAttributesCSV(CopyState cstate, int maxfields,
+ char **fieldvals);
+static Datum CopyReadBinaryAttribute(CopyState cstate,
+ int column_no, FmgrInfo *flinfo,
+ Oid typioparam, int32 typmod,
+ bool *isnull);
+static void CopyAttributeOutText(CopyState cstate, char *server_string);
+static void CopyAttributeOutCSV(CopyState cstate, char *server_string,
+ bool use_quote);
static List *CopyGetAttnums(Relation rel, List *attnamelist);
-static void limit_printout_length(StringInfo buf);
-
-/* Internal communications functions */
-static void SendCopyBegin(bool binary, int natts);
-static void ReceiveCopyBegin(bool binary, int natts);
-static void SendCopyEnd(bool binary);
-static void CopySendData(void *databuf, int datasize);
-static void CopySendString(const char *str);
-static void CopySendChar(char c);
-static void CopySendEndOfRow(bool binary);
-static void CopyGetData(void *databuf, int datasize);
-static int CopyGetChar(void);
-
-#define CopyGetEof() (fe_eof)
-static int CopyPeekChar(void);
-static void CopyDonePeek(int c, bool pickup);
-static void CopySendInt32(int32 val);
-static int32 CopyGetInt32(void);
-static void CopySendInt16(int16 val);
-static int16 CopyGetInt16(void);
+static char *limit_printout_length(const char *str);
+
+/* Low-level communications functions */
+static void SendCopyBegin(CopyState cstate);
+static void ReceiveCopyBegin(CopyState cstate);
+static void SendCopyEnd(CopyState cstate);
+static void CopySendData(CopyState cstate, void *databuf, int datasize);
+static void CopySendString(CopyState cstate, const char *str);
+static void CopySendChar(CopyState cstate, char c);
+static void CopySendEndOfRow(CopyState cstate);
+static int CopyGetData(CopyState cstate, void *databuf,
+ int minread, int maxread);
+static void CopySendInt32(CopyState cstate, int32 val);
+static bool CopyGetInt32(CopyState cstate, int32 *val);
+static void CopySendInt16(CopyState cstate, int16 val);
+static bool CopyGetInt16(CopyState cstate, int16 *val);
/*
@@ -176,13 +204,14 @@ static int16 CopyGetInt16(void);
* in past protocol redesigns.
*/
static void
-SendCopyBegin(bool binary, int natts)
+SendCopyBegin(CopyState cstate)
{
if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
{
/* new way */
StringInfoData buf;
- int16 format = (binary ? 1 : 0);
+ int natts = list_length(cstate->attnumlist);
+ int16 format = (cstate->binary ? 1 : 0);
int i;
pq_beginmessage(&buf, 'H');
@@ -191,43 +220,44 @@ SendCopyBegin(bool binary, int natts)
for (i = 0; i < natts; i++)
pq_sendint(&buf, format, 2); /* per-column formats */
pq_endmessage(&buf);
- copy_dest = COPY_NEW_FE;
- copy_msgbuf = makeStringInfo();
+ cstate->copy_dest = COPY_NEW_FE;
+ cstate->fe_msgbuf = makeStringInfo();
}
else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
{
/* old way */
- if (binary)
+ if (cstate->binary)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY BINARY is not supported to stdout or from stdin")));
pq_putemptymessage('H');
/* grottiness needed for old COPY OUT protocol */
pq_startcopyout();
- copy_dest = COPY_OLD_FE;
+ cstate->copy_dest = COPY_OLD_FE;
}
else
{
/* very old way */
- if (binary)
+ if (cstate->binary)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY BINARY is not supported to stdout or from stdin")));
pq_putemptymessage('B');
/* grottiness needed for old COPY OUT protocol */
pq_startcopyout();
- copy_dest = COPY_OLD_FE;
+ cstate->copy_dest = COPY_OLD_FE;
}
}
static void
-ReceiveCopyBegin(bool binary, int natts)
+ReceiveCopyBegin(CopyState cstate)
{
if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 3)
{
/* new way */
StringInfoData buf;
- int16 format = (binary ? 1 : 0);
+ int natts = list_length(cstate->attnumlist);
+ int16 format = (cstate->binary ? 1 : 0);
int i;
pq_beginmessage(&buf, 'G');
@@ -236,47 +266,47 @@ ReceiveCopyBegin(bool binary, int natts)
for (i = 0; i < natts; i++)
pq_sendint(&buf, format, 2); /* per-column formats */
pq_endmessage(&buf);
- copy_dest = COPY_NEW_FE;
- copy_msgbuf = makeStringInfo();
+ cstate->copy_dest = COPY_NEW_FE;
+ cstate->fe_msgbuf = makeStringInfo();
}
else if (PG_PROTOCOL_MAJOR(FrontendProtocol) >= 2)
{
/* old way */
- if (binary)
+ if (cstate->binary)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY BINARY is not supported to stdout or from stdin")));
pq_putemptymessage('G');
- copy_dest = COPY_OLD_FE;
+ cstate->copy_dest = COPY_OLD_FE;
}
else
{
/* very old way */
- if (binary)
+ if (cstate->binary)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY BINARY is not supported to stdout or from stdin")));
pq_putemptymessage('D');
- copy_dest = COPY_OLD_FE;
+ cstate->copy_dest = COPY_OLD_FE;
}
/* We *must* flush here to ensure FE knows it can send. */
pq_flush();
}
static void
-SendCopyEnd(bool binary)
+SendCopyEnd(CopyState cstate)
{
- if (copy_dest == COPY_NEW_FE)
+ if (cstate->copy_dest == COPY_NEW_FE)
{
- if (binary)
+ if (cstate->binary)
{
/* Need to flush out file trailer word */
- CopySendEndOfRow(true);
+ CopySendEndOfRow(cstate);
}
else
{
/* Shouldn't have any unsent data */
- Assert(copy_msgbuf->len == 0);
+ Assert(cstate->fe_msgbuf->len == 0);
}
/* Send Copy Done message */
pq_putemptymessage('c');
@@ -284,7 +314,7 @@ SendCopyEnd(bool binary)
else
{
/* The FE/BE protocol uses \n as newline for all platforms */
- CopySendData("\\.\n", 3);
+ CopySendData(cstate, "\\.\n", 3);
pq_endcopyout(false);
}
}
@@ -299,13 +329,13 @@ SendCopyEnd(bool binary)
*----------
*/
static void
-CopySendData(void *databuf, int datasize)
+CopySendData(CopyState cstate, void *databuf, int datasize)
{
- switch (copy_dest)
+ switch (cstate->copy_dest)
{
case COPY_FILE:
- fwrite(databuf, datasize, 1, copy_file);
- if (ferror(copy_file))
+ fwrite(databuf, datasize, 1, cstate->copy_file);
+ if (ferror(cstate->copy_file))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to COPY file: %m")));
@@ -316,98 +346,113 @@ CopySendData(void *databuf, int datasize)
/* no hope of recovering connection sync, so FATAL */
ereport(FATAL,
(errcode(ERRCODE_CONNECTION_FAILURE),
- errmsg("connection lost during COPY to stdout")));
+ errmsg("connection lost during COPY to stdout")));
}
break;
case COPY_NEW_FE:
- appendBinaryStringInfo(copy_msgbuf, (char *) databuf, datasize);
+ appendBinaryStringInfo(cstate->fe_msgbuf,
+ (char *) databuf, datasize);
break;
}
}
static void
-CopySendString(const char *str)
+CopySendString(CopyState cstate, const char *str)
{
- CopySendData((void *) str, strlen(str));
+ CopySendData(cstate, (void *) str, strlen(str));
}
static void
-CopySendChar(char c)
+CopySendChar(CopyState cstate, char c)
{
- CopySendData(&c, 1);
+ CopySendData(cstate, &c, 1);
}
static void
-CopySendEndOfRow(bool binary)
+CopySendEndOfRow(CopyState cstate)
{
- switch (copy_dest)
+ switch (cstate->copy_dest)
{
case COPY_FILE:
- if (!binary)
+ if (!cstate->binary)
{
/* Default line termination depends on platform */
#ifndef WIN32
- CopySendChar('\n');
+ CopySendChar(cstate, '\n');
#else
- CopySendString("\r\n");
+ CopySendString(cstate, "\r\n");
#endif
}
break;
case COPY_OLD_FE:
/* The FE/BE protocol uses \n as newline for all platforms */
- if (!binary)
- CopySendChar('\n');
+ if (!cstate->binary)
+ CopySendChar(cstate, '\n');
break;
case COPY_NEW_FE:
/* The FE/BE protocol uses \n as newline for all platforms */
- if (!binary)
- CopySendChar('\n');
+ if (!cstate->binary)
+ CopySendChar(cstate, '\n');
/* Dump the accumulated row as one CopyData message */
- (void) pq_putmessage('d', copy_msgbuf->data, copy_msgbuf->len);
- /* Reset copy_msgbuf to empty */
- copy_msgbuf->len = 0;
- copy_msgbuf->data[0] = '\0';
+ (void) pq_putmessage('d', cstate->fe_msgbuf->data,
+ cstate->fe_msgbuf->len);
+ /* Reset fe_msgbuf to empty */
+ cstate->fe_msgbuf->len = 0;
+ cstate->fe_msgbuf->data[0] = '\0';
break;
}
}
/*
* CopyGetData reads data from the source (file or frontend)
- * CopyGetChar does the same for single characters
*
- * CopyGetEof checks if EOF was detected by previous Get operation.
+ * We attempt to read at least minread, and at most maxread, bytes from
+ * the source. The actual number of bytes read is returned; if this is
+ * less than minread, EOF was detected.
*
* Note: when copying from the frontend, we expect a proper EOF mark per
* protocol; if the frontend simply drops the connection, we raise error.
* It seems unwise to allow the COPY IN to complete normally in that case.
*
- * NB: no data conversion is applied by these functions
+ * NB: no data conversion is applied here.
*/
-static void
-CopyGetData(void *databuf, int datasize)
+static int
+CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
{
- switch (copy_dest)
+ int bytesread = 0;
+
+ switch (cstate->copy_dest)
{
case COPY_FILE:
- fread(databuf, datasize, 1, copy_file);
- if (feof(copy_file))
- fe_eof = true;
+ bytesread = fread(databuf, 1, maxread, cstate->copy_file);
+ if (ferror(cstate->copy_file))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read from COPY file: %m")));
break;
case COPY_OLD_FE:
- if (pq_getbytes((char *) databuf, datasize))
+ /*
+ * We cannot read more than minread bytes (which in practice is 1)
+ * because old protocol doesn't have any clear way of separating
+ * the COPY stream from following data. This is slow, but not
+ * any slower than the code path was originally, and we don't
+ * care much anymore about the performance of old protocol.
+ */
+ if (pq_getbytes((char *) databuf, minread))
{
/* Only a \. terminator is legal EOF in old protocol */
ereport(ERROR,
(errcode(ERRCODE_CONNECTION_FAILURE),
errmsg("unexpected EOF on client connection")));
}
+ bytesread = minread;
break;
case COPY_NEW_FE:
- while (datasize > 0 && !fe_eof)
+ while (maxread > 0 && bytesread < minread && !cstate->fe_eof)
{
int avail;
- while (copy_msgbuf->cursor >= copy_msgbuf->len)
+ while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
{
/* Try to receive another message */
int mtype;
@@ -418,7 +463,7 @@ CopyGetData(void *databuf, int datasize)
ereport(ERROR,
(errcode(ERRCODE_CONNECTION_FAILURE),
errmsg("unexpected EOF on client connection")));
- if (pq_getmessage(copy_msgbuf, 0))
+ if (pq_getmessage(cstate->fe_msgbuf, 0))
ereport(ERROR,
(errcode(ERRCODE_CONNECTION_FAILURE),
errmsg("unexpected EOF on client connection")));
@@ -428,13 +473,13 @@ CopyGetData(void *databuf, int datasize)
break;
case 'c': /* CopyDone */
/* COPY IN correctly terminated by frontend */
- fe_eof = true;
- return;
+ cstate->fe_eof = true;
+ return bytesread;
case 'f': /* CopyFail */
ereport(ERROR,
(errcode(ERRCODE_QUERY_CANCELED),
errmsg("COPY from stdin failed: %s",
- pq_getmsgstring(copy_msgbuf))));
+ pq_getmsgstring(cstate->fe_msgbuf))));
break;
case 'H': /* Flush */
case 'S': /* Sync */
@@ -454,142 +499,18 @@ CopyGetData(void *databuf, int datasize)
break;
}
}
- avail = copy_msgbuf->len - copy_msgbuf->cursor;
- if (avail > datasize)
- avail = datasize;
- pq_copymsgbytes(copy_msgbuf, databuf, avail);
+ avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
+ if (avail > maxread)
+ avail = maxread;
+ pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
databuf = (void *) ((char *) databuf + avail);
- datasize -= avail;
- }
- break;
- }
-}
-
-static int
-CopyGetChar(void)
-{
- int ch;
-
- switch (copy_dest)
- {
- case COPY_FILE:
- ch = getc(copy_file);
- break;
- case COPY_OLD_FE:
- ch = pq_getbyte();
- if (ch == EOF)
- {
- /* Only a \. terminator is legal EOF in old protocol */
- ereport(ERROR,
- (errcode(ERRCODE_CONNECTION_FAILURE),
- errmsg("unexpected EOF on client connection")));
- }
- break;
- case COPY_NEW_FE:
- {
- unsigned char cc;
-
- CopyGetData(&cc, 1);
- if (fe_eof)
- ch = EOF;
- else
- ch = cc;
- break;
- }
- default:
- ch = EOF;
- break;
- }
- if (ch == EOF)
- fe_eof = true;
- return ch;
-}
-
-/*
- * CopyPeekChar reads a byte in "peekable" mode.
- *
- * after each call to CopyPeekChar, a call to CopyDonePeek _must_
- * follow, unless EOF was returned.
- *
- * CopyDonePeek will either take the peeked char off the stream
- * (if pickup is true) or leave it on the stream (if pickup is false).
- */
-static int
-CopyPeekChar(void)
-{
- int ch;
-
- switch (copy_dest)
- {
- case COPY_FILE:
- ch = getc(copy_file);
- break;
- case COPY_OLD_FE:
- ch = pq_peekbyte();
- if (ch == EOF)
- {
- /* Only a \. terminator is legal EOF in old protocol */
- ereport(ERROR,
- (errcode(ERRCODE_CONNECTION_FAILURE),
- errmsg("unexpected EOF on client connection")));
+ maxread -= avail;
+ bytesread += avail;
}
break;
- case COPY_NEW_FE:
- {
- unsigned char cc;
-
- CopyGetData(&cc, 1);
- if (fe_eof)
- ch = EOF;
- else
- ch = cc;
- break;
- }
- default:
- ch = EOF;
- break;
}
- if (ch == EOF)
- fe_eof = true;
- return ch;
-}
-static void
-CopyDonePeek(int c, bool pickup)
-{
- if (fe_eof)
- return; /* can't unget an EOF */
- switch (copy_dest)
- {
- case COPY_FILE:
- if (!pickup)
- {
- /* We don't want to pick it up - so put it back in there */
- ungetc(c, copy_file);
- }
- /* If we wanted to pick it up, it's already done */
- break;
- case COPY_OLD_FE:
- if (pickup)
- {
- /* We want to pick it up */
- (void) pq_getbyte();
- }
-
- /*
- * If we didn't want to pick it up, just leave it where it
- * sits
- */
- break;
- case COPY_NEW_FE:
- if (!pickup)
- {
- /* We don't want to pick it up - so put it back in there */
- copy_msgbuf->cursor--;
- }
- /* If we wanted to pick it up, it's already done */
- break;
- }
+ return bytesread;
}
@@ -601,48 +522,90 @@ CopyDonePeek(int c, bool pickup)
* CopySendInt32 sends an int32 in network byte order
*/
static void
-CopySendInt32(int32 val)
+CopySendInt32(CopyState cstate, int32 val)
{
uint32 buf;
buf = htonl((uint32) val);
- CopySendData(&buf, sizeof(buf));
+ CopySendData(cstate, &buf, sizeof(buf));
}
/*
* CopyGetInt32 reads an int32 that appears in network byte order
+ *
+ * Returns true if OK, false if EOF
*/
-static int32
-CopyGetInt32(void)
+static bool
+CopyGetInt32(CopyState cstate, int32 *val)
{
uint32 buf;
- CopyGetData(&buf, sizeof(buf));
- return (int32) ntohl(buf);
+ if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
+ return false;
+ *val = (int32) ntohl(buf);
+ return true;
}
/*
* CopySendInt16 sends an int16 in network byte order
*/
static void
-CopySendInt16(int16 val)
+CopySendInt16(CopyState cstate, int16 val)
{
uint16 buf;
buf = htons((uint16) val);
- CopySendData(&buf, sizeof(buf));
+ CopySendData(cstate, &buf, sizeof(buf));
}
/*
* CopyGetInt16 reads an int16 that appears in network byte order
*/
-static int16
-CopyGetInt16(void)
+static bool
+CopyGetInt16(CopyState cstate, int16 *val)
{
uint16 buf;
- CopyGetData(&buf, sizeof(buf));
- return (int16) ntohs(buf);
+ if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
+ return false;
+ *val = (int16) ntohs(buf);
+ return true;
+}
+
+
+/*
+ * CopyLoadRawBuf loads some more data into raw_buf
+ *
+ * Returns TRUE if able to obtain at least one more byte, else FALSE.
+ *
+ * If raw_buf_index < raw_buf_len, the unprocessed bytes are transferred
+ * down to the start of the buffer and then we load more data after that.
+ * This case is used only when a frontend multibyte character crosses a
+ * bufferload boundary.
+ */
+static bool
+CopyLoadRawBuf(CopyState cstate)
+{
+ int nbytes;
+ int inbytes;
+
+ if (cstate->raw_buf_index < cstate->raw_buf_len)
+ {
+ /* Copy down the unprocessed data */
+ nbytes = cstate->raw_buf_len - cstate->raw_buf_index;
+ memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
+ nbytes);
+ }
+ else
+ nbytes = 0; /* no data need be saved */
+
+ inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
+ 1, RAW_BUF_SIZE - nbytes);
+ nbytes += inbytes;
+ cstate->raw_buf[nbytes] = '\0';
+ cstate->raw_buf_index = 0;
+ cstate->raw_buf_len = nbytes;
+ return (inbytes > 0);
}
@@ -669,11 +632,6 @@ CopyGetInt16(void)
* If in the text format, delimit columns with delimiter <delim> and print
* NULL values as <null_print>.
*
- * When loading in the text format from an input stream (as opposed to
- * a file), recognize a "." on a line by itself as EOF. Also recognize
- * a stream EOF. When unloading in the text format to an output stream,
- * write a "." on a line by itself at the end of the data.
- *
* Do not allow a Postgres user without superuser privilege to read from
* or write to a file.
*
@@ -683,29 +641,20 @@ CopyGetInt16(void)
void
DoCopy(const CopyStmt *stmt)
{
+ CopyState cstate;
RangeVar *relation = stmt->relation;
char *filename = stmt->filename;
bool is_from = stmt->is_from;
bool pipe = (stmt->filename == NULL);
- ListCell *option;
List *attnamelist = stmt->attlist;
- List *attnumlist;
- bool fe_copy = false;
- bool binary = false;
- bool oids = false;
- bool csv_mode = false;
- bool header_line = false;
- char *delim = NULL;
- char *quote = NULL;
- char *escape = NULL;
- char *null_print = NULL;
List *force_quote = NIL;
List *force_notnull = NIL;
- List *force_quote_atts = NIL;
- List *force_notnull_atts = NIL;
- Relation rel;
AclMode required_access = (is_from ? ACL_INSERT : ACL_SELECT);
AclResult aclresult;
+ ListCell *option;
+
+ /* Allocate workspace and zero all fields */
+ cstate = (CopyStateData *) palloc0(sizeof(CopyStateData));
/* Extract options from the statement node tree */
foreach(option, stmt->options)
@@ -714,67 +663,67 @@ DoCopy(const CopyStmt *stmt)
if (strcmp(defel->defname, "binary") == 0)
{
- if (binary)
+ if (cstate->binary)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
- binary = intVal(defel->arg);
+ cstate->binary = intVal(defel->arg);
}
else if (strcmp(defel->defname, "oids") == 0)
{
- if (oids)
+ if (cstate->oids)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
- oids = intVal(defel->arg);
+ cstate->oids = intVal(defel->arg);
}
else if (strcmp(defel->defname, "delimiter") == 0)
{
- if (delim)
+ if (cstate->delim)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
- delim = strVal(defel->arg);
+ cstate->delim = strVal(defel->arg);
}
else if (strcmp(defel->defname, "null") == 0)
{
- if (null_print)
+ if (cstate->null_print)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
- null_print = strVal(defel->arg);
+ cstate->null_print = strVal(defel->arg);
}
else if (strcmp(defel->defname, "csv") == 0)
{
- if (csv_mode)
+ if (cstate->csv_mode)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
- csv_mode = intVal(defel->arg);
+ cstate->csv_mode = intVal(defel->arg);
}
else if (strcmp(defel->defname, "header") == 0)
{
- if (header_line)
+ if (cstate->header_line)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
- header_line = intVal(defel->arg);
+ cstate->header_line = intVal(defel->arg);
}
else if (strcmp(defel->defname, "quote") == 0)
{
- if (quote)
+ if (cstate->quote)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
- quote = strVal(defel->arg);
+ cstate->quote = strVal(defel->arg);
}
else if (strcmp(defel->defname, "escape") == 0)
{
- if (escape)
+ if (cstate->escape)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options")));
- escape = strVal(defel->arg);
+ cstate->escape = strVal(defel->arg);
}
else if (strcmp(defel->defname, "force_quote") == 0)
{
@@ -797,72 +746,74 @@ DoCopy(const CopyStmt *stmt)
defel->defname);
}
- if (binary && delim)
+ /* Check for incompatible options */
+ if (cstate->binary && cstate->delim)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot specify DELIMITER in BINARY mode")));
- if (binary && csv_mode)
+ if (cstate->binary && cstate->csv_mode)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot specify CSV in BINARY mode")));
- if (binary && null_print)
+ if (cstate->binary && cstate->null_print)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot specify NULL in BINARY mode")));
- /* Set defaults */
- if (!delim)
- delim = csv_mode ? "," : "\t";
+ /* Set defaults for omitted options */
+ if (!cstate->delim)
+ cstate->delim = cstate->csv_mode ? "," : "\t";
- if (!null_print)
- null_print = csv_mode ? "" : "\\N";
+ if (!cstate->null_print)
+ cstate->null_print = cstate->csv_mode ? "" : "\\N";
+ cstate->null_print_len = strlen(cstate->null_print);
- if (csv_mode)
+ if (cstate->csv_mode)
{
- if (!quote)
- quote = "\"";
- if (!escape)
- escape = quote;
+ if (!cstate->quote)
+ cstate->quote = "\"";
+ if (!cstate->escape)
+ cstate->escape = cstate->quote;
}
/* Only single-character delimiter strings are supported. */
- if (strlen(delim) != 1)
+ if (strlen(cstate->delim) != 1)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY delimiter must be a single character")));
/* Check header */
- if (!csv_mode && header_line)
+ if (!cstate->csv_mode && cstate->header_line)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY HEADER available only in CSV mode")));
/* Check quote */
- if (!csv_mode && quote != NULL)
+ if (!cstate->csv_mode && cstate->quote != NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY quote available only in CSV mode")));
- if (csv_mode && strlen(quote) != 1)
+ if (cstate->csv_mode && strlen(cstate->quote) != 1)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY quote must be a single character")));
/* Check escape */
- if (!csv_mode && escape != NULL)
+ if (!cstate->csv_mode && cstate->escape != NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY escape available only in CSV mode")));
- if (csv_mode && strlen(escape) != 1)
+ if (cstate->csv_mode && strlen(cstate->escape) != 1)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY escape must be a single character")));
/* Check force_quote */
- if (!csv_mode && force_quote != NIL)
+ if (!cstate->csv_mode && force_quote != NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force quote available only in CSV mode")));
@@ -872,7 +823,7 @@ DoCopy(const CopyStmt *stmt)
errmsg("COPY force quote only available using COPY TO")));
/* Check force_notnull */
- if (!csv_mode && force_notnull != NIL)
+ if (!cstate->csv_mode && force_notnull != NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY force not null available only in CSV mode")));
@@ -882,32 +833,35 @@ DoCopy(const CopyStmt *stmt)
errmsg("COPY force not null only available using COPY FROM")));
/* Don't allow the delimiter to appear in the null string. */
- if (strchr(null_print, delim[0]) != NULL)
+ if (strchr(cstate->null_print, cstate->delim[0]) != NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY delimiter must not appear in the NULL specification")));
- /* Don't allow the csv quote char to appear in the null string. */
- if (csv_mode && strchr(null_print, quote[0]) != NULL)
+ /* Don't allow the CSV quote char to appear in the null string. */
+ if (cstate->csv_mode &&
+ strchr(cstate->null_print, cstate->quote[0]) != NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("CSV quote character must not appear in the NULL specification")));
/* Open and lock the relation, using the appropriate lock type. */
- rel = heap_openrv(relation, (is_from ? RowExclusiveLock : AccessShareLock));
+ cstate->rel = heap_openrv(relation,
+ (is_from ? RowExclusiveLock : AccessShareLock));
/* check read-only transaction */
- if (XactReadOnly && !is_from && !isTempNamespace(RelationGetNamespace(rel)))
+ if (XactReadOnly && !is_from &&
+ !isTempNamespace(RelationGetNamespace(cstate->rel)))
ereport(ERROR,
(errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
errmsg("transaction is read-only")));
/* Check permissions. */
- aclresult = pg_class_aclcheck(RelationGetRelid(rel), GetUserId(),
+ aclresult = pg_class_aclcheck(RelationGetRelid(cstate->rel), GetUserId(),
required_access);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, ACL_KIND_CLASS,
- RelationGetRelationName(rel));
+ RelationGetRelationName(cstate->rel));
if (!pipe && !superuser())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
@@ -916,29 +870,29 @@ DoCopy(const CopyStmt *stmt)
"psql's \\copy command also works for anyone.")));
/* Don't allow COPY w/ OIDs to or from a table without them */
- if (oids && !rel->rd_rel->relhasoids)
+ if (cstate->oids && !cstate->rel->rd_rel->relhasoids)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_COLUMN),
errmsg("table \"%s\" does not have OIDs",
- RelationGetRelationName(rel))));
+ RelationGetRelationName(cstate->rel))));
/* Generate or convert list of attributes to process */
- attnumlist = CopyGetAttnums(rel, attnamelist);
+ cstate->attnumlist = CopyGetAttnums(cstate->rel, attnamelist);
- /* Check that FORCE QUOTE references valid COPY columns */
+ /* Convert FORCE QUOTE name list to column numbers, check validity */
if (force_quote)
{
- TupleDesc tupDesc = RelationGetDescr(rel);
+ TupleDesc tupDesc = RelationGetDescr(cstate->rel);
Form_pg_attribute *attr = tupDesc->attrs;
ListCell *cur;
- force_quote_atts = CopyGetAttnums(rel, force_quote);
+ cstate->force_quote_atts = CopyGetAttnums(cstate->rel, force_quote);
- foreach(cur, force_quote_atts)
+ foreach(cur, cstate->force_quote_atts)
{
int attnum = lfirst_int(cur);
- if (!list_member_int(attnumlist, attnum))
+ if (!list_member_int(cstate->attnumlist, attnum))
ereport(ERROR,
(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
errmsg("FORCE QUOTE column \"%s\" not referenced by COPY",
@@ -946,20 +900,21 @@ DoCopy(const CopyStmt *stmt)
}
}
- /* Check that FORCE NOT NULL references valid COPY columns */
+ /* Convert FORCE NOT NULL name list to column numbers, check validity */
if (force_notnull)
{
- ListCell *cur;
- TupleDesc tupDesc = RelationGetDescr(rel);
+ TupleDesc tupDesc = RelationGetDescr(cstate->rel);
Form_pg_attribute *attr = tupDesc->attrs;
+ ListCell *cur;
- force_notnull_atts = CopyGetAttnums(rel, force_notnull);
+ cstate->force_notnull_atts = CopyGetAttnums(cstate->rel,
+ force_notnull);
- foreach(cur, force_notnull_atts)
+ foreach(cur, cstate->force_notnull_atts)
{
int attnum = lfirst_int(cur);
- if (!list_member_int(attnumlist, attnum))
+ if (!list_member_int(cstate->attnumlist, attnum))
ereport(ERROR,
(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
errmsg("FORCE NOT NULL column \"%s\" not referenced by COPY",
@@ -968,95 +923,96 @@ DoCopy(const CopyStmt *stmt)
}
/* Set up variables to avoid per-attribute overhead. */
- initStringInfo(&attribute_buf);
- initStringInfo(&line_buf);
- line_buf_converted = false;
+ initStringInfo(&cstate->attribute_buf);
+ initStringInfo(&cstate->line_buf);
+ cstate->line_buf_converted = false;
+ cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
+ cstate->raw_buf_index = cstate->raw_buf_len = 0;
- client_encoding = pg_get_client_encoding();
- server_encoding = GetDatabaseEncoding();
+ /* Set up encoding conversion info */
+ cstate->client_encoding = pg_get_client_encoding();
+ cstate->need_transcoding = (cstate->client_encoding != GetDatabaseEncoding());
+ cstate->client_only_encoding = PG_ENCODING_IS_CLIENT_ONLY(cstate->client_encoding);
- copy_dest = COPY_FILE; /* default */
- copy_file = NULL;
- copy_msgbuf = NULL;
- fe_eof = false;
+ cstate->copy_dest = COPY_FILE; /* default */
if (is_from)
{ /* copy from file to database */
- if (rel->rd_rel->relkind != RELKIND_RELATION)
+ if (cstate->rel->rd_rel->relkind != RELKIND_RELATION)
{
- if (rel->rd_rel->relkind == RELKIND_VIEW)
+ if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot copy to view \"%s\"",
- RelationGetRelationName(rel))));
- else if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
+ RelationGetRelationName(cstate->rel))));
+ else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot copy to sequence \"%s\"",
- RelationGetRelationName(rel))));
+ RelationGetRelationName(cstate->rel))));
else
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot copy to non-table relation \"%s\"",
- RelationGetRelationName(rel))));
+ RelationGetRelationName(cstate->rel))));
}
if (pipe)
{
if (whereToSendOutput == Remote)
- ReceiveCopyBegin(binary, list_length(attnumlist));
+ ReceiveCopyBegin(cstate);
else
- copy_file = stdin;
+ cstate->copy_file = stdin;
}
else
{
struct stat st;
- copy_file = AllocateFile(filename, PG_BINARY_R);
+ cstate->copy_file = AllocateFile(filename, PG_BINARY_R);
- if (copy_file == NULL)
+ if (cstate->copy_file == NULL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\" for reading: %m",
filename)));
- fstat(fileno(copy_file), &st);
+ fstat(fileno(cstate->copy_file), &st);
if (S_ISDIR(st.st_mode))
{
- FreeFile(copy_file);
+ FreeFile(cstate->copy_file);
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is a directory", filename)));
}
}
- CopyFrom(rel, attnumlist, binary, oids, delim, null_print, csv_mode,
- quote, escape, force_notnull_atts, header_line);
+
+ CopyFrom(cstate);
}
else
{ /* copy from database to file */
- if (rel->rd_rel->relkind != RELKIND_RELATION)
+ if (cstate->rel->rd_rel->relkind != RELKIND_RELATION)
{
- if (rel->rd_rel->relkind == RELKIND_VIEW)
+ if (cstate->rel->rd_rel->relkind == RELKIND_VIEW)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot copy from view \"%s\"",
- RelationGetRelationName(rel))));
- else if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
+ RelationGetRelationName(cstate->rel))));
+ else if (cstate->rel->rd_rel->relkind == RELKIND_SEQUENCE)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot copy from sequence \"%s\"",
- RelationGetRelationName(rel))));
+ RelationGetRelationName(cstate->rel))));
else
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot copy from non-table relation \"%s\"",
- RelationGetRelationName(rel))));
+ RelationGetRelationName(cstate->rel))));
}
if (pipe)
{
if (whereToSendOutput == Remote)
- fe_copy = true;
+ cstate->fe_copy = true;
else
- copy_file = stdout;
+ cstate->copy_file = stdout;
}
else
{
@@ -1073,40 +1029,37 @@ DoCopy(const CopyStmt *stmt)
errmsg("relative path not allowed for COPY to file")));
oumask = umask((mode_t) 022);
- copy_file = AllocateFile(filename, PG_BINARY_W);
+ cstate->copy_file = AllocateFile(filename, PG_BINARY_W);
umask(oumask);
- if (copy_file == NULL)
+ if (cstate->copy_file == NULL)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not open file \"%s\" for writing: %m",
filename)));
- fstat(fileno(copy_file), &st);
+ fstat(fileno(cstate->copy_file), &st);
if (S_ISDIR(st.st_mode))
{
- FreeFile(copy_file);
+ FreeFile(cstate->copy_file);
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("\"%s\" is a directory", filename)));
}
}
- DoCopyTo(rel, attnumlist, binary, oids, delim, null_print, csv_mode,
- quote, escape, force_quote_atts, header_line, fe_copy);
+ DoCopyTo(cstate);
}
if (!pipe)
{
/* we assume only the write case could fail here */
- if (FreeFile(copy_file))
+ if (FreeFile(cstate->copy_file))
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m",
filename)));
}
- pfree(attribute_buf.data);
- pfree(line_buf.data);
/*
* Close the relation. If reading, we can release the AccessShareLock
@@ -1114,7 +1067,13 @@ DoCopy(const CopyStmt *stmt)
* transaction to ensure that updates will be committed before lock is
* released.
*/
- heap_close(rel, (is_from ? NoLock : AccessShareLock));
+ heap_close(cstate->rel, (is_from ? NoLock : AccessShareLock));
+
+ /* Clean up storage (probably not really necessary) */
+ pfree(cstate->attribute_buf.data);
+ pfree(cstate->line_buf.data);
+ pfree(cstate->raw_buf);
+ pfree(cstate);
}
@@ -1123,20 +1082,17 @@ DoCopy(const CopyStmt *stmt)
* so we don't need to plaster a lot of variables with "volatile".
*/
static void
-DoCopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
- char *delim, char *null_print, bool csv_mode, char *quote,
- char *escape, List *force_quote_atts, bool header_line, bool fe_copy)
+DoCopyTo(CopyState cstate)
{
PG_TRY();
{
- if (fe_copy)
- SendCopyBegin(binary, list_length(attnumlist));
+ if (cstate->fe_copy)
+ SendCopyBegin(cstate);
- CopyTo(rel, attnumlist, binary, oids, delim, null_print, csv_mode,
- quote, escape, force_quote_atts, header_line);
+ CopyTo(cstate);
- if (fe_copy)
- SendCopyEnd(binary);
+ if (cstate->fe_copy)
+ SendCopyEnd(cstate);
}
PG_CATCH();
{
@@ -1155,9 +1111,7 @@ DoCopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
* Copy from relation TO file.
*/
static void
-CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
- char *delim, char *null_print, bool csv_mode, char *quote,
- char *escape, List *force_quote_atts, bool header_line)
+CopyTo(CopyState cstate)
{
HeapTuple tuple;
TupleDesc tupDesc;
@@ -1168,25 +1122,27 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
FmgrInfo *out_functions;
bool *force_quote;
char *string;
+ char *null_print_client;
ListCell *cur;
MemoryContext oldcontext;
MemoryContext mycontext;
- tupDesc = rel->rd_att;
+ tupDesc = cstate->rel->rd_att;
attr = tupDesc->attrs;
num_phys_attrs = tupDesc->natts;
- attr_count = list_length(attnumlist);
+ attr_count = list_length(cstate->attnumlist);
+ null_print_client = cstate->null_print; /* default */
/* Get info about the columns we need to process. */
out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
force_quote = (bool *) palloc(num_phys_attrs * sizeof(bool));
- foreach(cur, attnumlist)
+ foreach(cur, cstate->attnumlist)
{
int attnum = lfirst_int(cur);
Oid out_func_oid;
bool isvarlena;
- if (binary)
+ if (cstate->binary)
getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
&out_func_oid,
&isvarlena);
@@ -1196,7 +1152,7 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
&isvarlena);
fmgr_info(out_func_oid, &out_functions[attnum - 1]);
- if (list_member_int(force_quote_atts, attnum))
+ if (list_member_int(cstate->force_quote_atts, attnum))
force_quote[attnum - 1] = true;
else
force_quote[attnum - 1] = false;
@@ -1214,21 +1170,21 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
- if (binary)
+ if (cstate->binary)
{
/* Generate header for a binary copy */
int32 tmp;
/* Signature */
- CopySendData((char *) BinarySignature, 11);
+ CopySendData(cstate, (char *) BinarySignature, 11);
/* Flags field */
tmp = 0;
- if (oids)
+ if (cstate->oids)
tmp |= (1 << 16);
- CopySendInt32(tmp);
+ CopySendInt32(cstate, tmp);
/* No header extension */
tmp = 0;
- CopySendInt32(tmp);
+ CopySendInt32(cstate, tmp);
}
else
{
@@ -1236,37 +1192,35 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
* For non-binary copy, we need to convert null_print to client
* encoding, because it will be sent directly with CopySendString.
*/
- if (server_encoding != client_encoding)
- null_print = (char *)
- pg_server_to_client((unsigned char *) null_print,
- strlen(null_print));
+ if (cstate->need_transcoding)
+ null_print_client = (char *)
+ pg_server_to_client((unsigned char *) cstate->null_print,
+ cstate->null_print_len);
/* if a header has been requested send the line */
- if (header_line)
+ if (cstate->header_line)
{
bool hdr_delim = false;
- char *colname;
- foreach(cur, attnumlist)
+ foreach(cur, cstate->attnumlist)
{
int attnum = lfirst_int(cur);
+ char *colname;
if (hdr_delim)
- CopySendChar(delim[0]);
+ CopySendChar(cstate, cstate->delim[0]);
hdr_delim = true;
colname = NameStr(attr[attnum - 1]->attname);
- CopyAttributeOutCSV(colname, delim, quote, escape,
- strcmp(colname, null_print) == 0);
+ CopyAttributeOutCSV(cstate, colname, false);
}
- CopySendEndOfRow(binary);
-
+ CopySendEndOfRow(cstate);
}
}
- scandesc = heap_beginscan(rel, ActiveSnapshot, 0, NULL);
+ scandesc = heap_beginscan(cstate->rel, ActiveSnapshot, 0, NULL);
while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL)
{
@@ -1277,33 +1231,34 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
MemoryContextReset(mycontext);
oldcontext = MemoryContextSwitchTo(mycontext);
- if (binary)
+ if (cstate->binary)
{
/* Binary per-tuple header */
- CopySendInt16(attr_count);
+ CopySendInt16(cstate, attr_count);
/* Send OID if wanted --- note attr_count doesn't include it */
- if (oids)
+ if (cstate->oids)
{
Oid oid = HeapTupleGetOid(tuple);
/* Hack --- assume Oid is same size as int32 */
- CopySendInt32(sizeof(int32));
- CopySendInt32(oid);
+ CopySendInt32(cstate, sizeof(int32));
+ CopySendInt32(cstate, oid);
}
}
else
{
/* Text format has no per-tuple header, but send OID if wanted */
- if (oids)
+ /* Assume digits don't need any quoting or encoding conversion */
+ if (cstate->oids)
{
string = DatumGetCString(DirectFunctionCall1(oidout,
ObjectIdGetDatum(HeapTupleGetOid(tuple))));
- CopySendString(string);
+ CopySendString(cstate, string);
need_delim = true;
}
}
- foreach(cur, attnumlist)
+ foreach(cur, cstate->attnumlist)
{
int attnum = lfirst_int(cur);
Datum value;
@@ -1311,35 +1266,31 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
value = heap_getattr(tuple, attnum, tupDesc, &isnull);
- if (!binary)
+ if (!cstate->binary)
{
if (need_delim)
- CopySendChar(delim[0]);
+ CopySendChar(cstate, cstate->delim[0]);
need_delim = true;
}
if (isnull)
{
- if (!binary)
- CopySendString(null_print); /* null indicator */
+ if (!cstate->binary)
+ CopySendString(cstate, null_print_client);
else
- CopySendInt32(-1); /* null marker */
+ CopySendInt32(cstate, -1);
}
else
{
- if (!binary)
+ if (!cstate->binary)
{
string = DatumGetCString(FunctionCall1(&out_functions[attnum - 1],
value));
- if (csv_mode)
- {
- CopyAttributeOutCSV(string, delim, quote, escape,
- (strcmp(string, null_print) == 0 ||
- force_quote[attnum - 1]));
- }
+ if (cstate->csv_mode)
+ CopyAttributeOutCSV(cstate, string,
+ force_quote[attnum - 1]);
else
- CopyAttributeOut(string, delim);
-
+ CopyAttributeOutText(cstate, string);
}
else
{
@@ -1348,24 +1299,24 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
outputbytes = DatumGetByteaP(FunctionCall1(&out_functions[attnum - 1],
value));
/* We assume the result will not have been toasted */
- CopySendInt32(VARSIZE(outputbytes) - VARHDRSZ);
- CopySendData(VARDATA(outputbytes),
+ CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
+ CopySendData(cstate, VARDATA(outputbytes),
VARSIZE(outputbytes) - VARHDRSZ);
}
}
}
- CopySendEndOfRow(binary);
+ CopySendEndOfRow(cstate);
MemoryContextSwitchTo(oldcontext);
}
heap_endscan(scandesc);
- if (binary)
+ if (cstate->binary)
{
/* Generate trailer for a binary copy */
- CopySendInt16(-1);
+ CopySendInt16(cstate, -1);
}
MemoryContextDelete(mycontext);
@@ -1381,35 +1332,43 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
static void
copy_in_error_callback(void *arg)
{
- if (copy_binary)
+ CopyState cstate = (CopyState) arg;
+
+ if (cstate->binary)
{
/* can't usefully display the data */
- if (copy_attname)
+ if (cstate->cur_attname)
errcontext("COPY %s, line %d, column %s",
- copy_relname, copy_lineno, copy_attname);
+ cstate->cur_relname, cstate->cur_lineno,
+ cstate->cur_attname);
else
- errcontext("COPY %s, line %d", copy_relname, copy_lineno);
+ errcontext("COPY %s, line %d",
+ cstate->cur_relname, cstate->cur_lineno);
}
else
{
- if (copy_attname)
+ if (cstate->cur_attname && cstate->cur_attval)
{
/* error is relevant to a particular column */
- limit_printout_length(&attribute_buf);
+ char *attval;
+
+ attval = limit_printout_length(cstate->cur_attval);
errcontext("COPY %s, line %d, column %s: \"%s\"",
- copy_relname, copy_lineno, copy_attname,
- attribute_buf.data);
+ cstate->cur_relname, cstate->cur_lineno,
+ cstate->cur_attname, attval);
+ pfree(attval);
}
else
{
/* error is relevant to a particular line */
- if (line_buf_converted ||
- client_encoding == server_encoding)
+ if (cstate->line_buf_converted || !cstate->need_transcoding)
{
- limit_printout_length(&line_buf);
+ char *lineval;
+
+ lineval = limit_printout_length(cstate->line_buf.data);
errcontext("COPY %s, line %d: \"%s\"",
- copy_relname, copy_lineno,
- line_buf.data);
+ cstate->cur_relname, cstate->cur_lineno, lineval);
+ pfree(lineval);
}
else
{
@@ -1421,7 +1380,8 @@ copy_in_error_callback(void *arg)
* to regurgitate it without conversion. So we have to punt
* and just report the line number.
*/
- errcontext("COPY %s, line %d", copy_relname, copy_lineno);
+ errcontext("COPY %s, line %d",
+ cstate->cur_relname, cstate->cur_lineno);
}
}
}
@@ -1434,38 +1394,39 @@ copy_in_error_callback(void *arg)
* truncate the string. However, some versions of glibc have a bug/misfeature
* that vsnprintf will always fail (return -1) if it is asked to truncate
* a string that contains invalid byte sequences for the current encoding.
- * So, do our own truncation. We assume we can alter the StringInfo buffer
- * holding the input data.
+ * So, do our own truncation. We return a pstrdup'd copy of the input.
*/
-static void
-limit_printout_length(StringInfo buf)
+static char *
+limit_printout_length(const char *str)
{
#define MAX_COPY_DATA_DISPLAY 100
+ int slen = strlen(str);
int len;
+ char *res;
/* Fast path if definitely okay */
- if (buf->len <= MAX_COPY_DATA_DISPLAY)
- return;
+ if (slen <= MAX_COPY_DATA_DISPLAY)
+ return pstrdup(str);
/* Apply encoding-dependent truncation */
- len = pg_mbcliplen(buf->data, buf->len, MAX_COPY_DATA_DISPLAY);
- if (buf->len <= len)
- return; /* no need to truncate */
- buf->len = len;
- buf->data[len] = '\0';
-
- /* Add "..." to show we truncated the input */
- appendStringInfoString(buf, "...");
+ len = pg_mbcliplen(str, slen, MAX_COPY_DATA_DISPLAY);
+
+ /*
+ * Truncate, and add "..." to show we truncated the input.
+ */
+ res = (char *) palloc(len + 4);
+ memcpy(res, str, len);
+ strcpy(res + len, "...");
+
+ return res;
}
/*
* Copy FROM file to relation.
*/
static void
-CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
- char *delim, char *null_print, bool csv_mode, char *quote,
- char *escape, List *force_notnull_atts, bool header_line)
+CopyFrom(CopyState cstate)
{
HeapTuple tuple;
TupleDesc tupDesc;
@@ -1485,6 +1446,8 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
Oid in_func_oid;
Datum *values;
char *nulls;
+ int nfields;
+ char **field_strings;
bool done = false;
bool isnull;
ResultRelInfo *resultRelInfo;
@@ -1497,10 +1460,10 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
MemoryContext oldcontext = CurrentMemoryContext;
ErrorContextCallback errcontext;
- tupDesc = RelationGetDescr(rel);
+ tupDesc = RelationGetDescr(cstate->rel);
attr = tupDesc->attrs;
num_phys_attrs = tupDesc->natts;
- attr_count = list_length(attnumlist);
+ attr_count = list_length(cstate->attnumlist);
num_defaults = 0;
/*
@@ -1510,8 +1473,8 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
*/
resultRelInfo = makeNode(ResultRelInfo);
resultRelInfo->ri_RangeTableIndex = 1; /* dummy */
- resultRelInfo->ri_RelationDesc = rel;
- resultRelInfo->ri_TrigDesc = CopyTriggerDesc(rel->trigdesc);
+ resultRelInfo->ri_RelationDesc = cstate->rel;
+ resultRelInfo->ri_TrigDesc = CopyTriggerDesc(cstate->rel->trigdesc);
if (resultRelInfo->ri_TrigDesc)
resultRelInfo->ri_TrigFunctions = (FmgrInfo *)
palloc0(resultRelInfo->ri_TrigDesc->numtriggers * sizeof(FmgrInfo));
@@ -1548,7 +1511,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
continue;
/* Fetch the input function and typioparam info */
- if (binary)
+ if (cstate->binary)
getTypeBinaryInputInfo(attr[attnum - 1]->atttypid,
&in_func_oid, &typioparams[attnum - 1]);
else
@@ -1556,17 +1519,17 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
&in_func_oid, &typioparams[attnum - 1]);
fmgr_info(in_func_oid, &in_functions[attnum - 1]);
- if (list_member_int(force_notnull_atts, attnum))
+ if (list_member_int(cstate->force_notnull_atts, attnum))
force_notnull[attnum - 1] = true;
else
force_notnull[attnum - 1] = false;
/* Get default info if needed */
- if (!list_member_int(attnumlist, attnum))
+ if (!list_member_int(cstate->attnumlist, attnum))
{
/* attribute is NOT to be copied from input */
/* use default value if one exists */
- Node *defexpr = build_column_default(rel, attnum);
+ Node *defexpr = build_column_default(cstate->rel, attnum);
if (defexpr != NULL)
{
@@ -1619,8 +1582,8 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
*/
ExecBSInsertTriggers(estate, resultRelInfo);
- if (!binary)
- file_has_oids = oids; /* must rely on user to tell us this... */
+ if (!cstate->binary)
+ file_has_oids = cstate->oids; /* must rely on user to tell us... */
else
{
/* Read and verify binary header */
@@ -1628,14 +1591,13 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
int32 tmp;
/* Signature */
- CopyGetData(readSig, 11);
- if (CopyGetEof() || memcmp(readSig, BinarySignature, 11) != 0)
+ if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
+ memcmp(readSig, BinarySignature, 11) != 0)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("COPY file signature not recognized")));
/* Flags field */
- tmp = CopyGetInt32();
- if (CopyGetEof())
+ if (!CopyGetInt32(cstate, &tmp))
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("invalid COPY file header (missing flags)")));
@@ -1646,23 +1608,22 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("unrecognized critical flags in COPY file header")));
/* Header extension length */
- tmp = CopyGetInt32();
- if (CopyGetEof() || tmp < 0)
+ if (!CopyGetInt32(cstate, &tmp) ||
+ tmp < 0)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("invalid COPY file header (missing length)")));
/* Skip extension header, if present */
while (tmp-- > 0)
{
- CopyGetData(readSig, 1);
- if (CopyGetEof())
+ if (CopyGetData(cstate, readSig, 1, 1) != 1)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("invalid COPY file header (wrong length)")));
}
}
- if (file_has_oids && binary)
+ if (file_has_oids && cstate->binary)
{
getTypeBinaryInputInfo(OIDOID,
&in_func_oid, &oid_typioparam);
@@ -1672,30 +1633,34 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
values = (Datum *) palloc(num_phys_attrs * sizeof(Datum));
nulls = (char *) palloc(num_phys_attrs * sizeof(char));
+ /* create workspace for CopyReadAttributes results */
+ nfields = file_has_oids ? (attr_count + 1) : attr_count;
+ field_strings = (char **) palloc(nfields * sizeof(char *));
+
/* Make room for a PARAM_EXEC value for domain constraint checks */
if (hasConstraints)
econtext->ecxt_param_exec_vals = (ParamExecData *)
palloc0(sizeof(ParamExecData));
- /* Initialize static variables */
- fe_eof = false;
- eol_type = EOL_UNKNOWN;
- copy_binary = binary;
- copy_relname = RelationGetRelationName(rel);
- copy_lineno = 0;
- copy_attname = NULL;
+ /* Initialize state variables */
+ cstate->fe_eof = false;
+ cstate->eol_type = EOL_UNKNOWN;
+ cstate->cur_relname = RelationGetRelationName(cstate->rel);
+ cstate->cur_lineno = 0;
+ cstate->cur_attname = NULL;
+ cstate->cur_attval = NULL;
/* Set up callback to identify error line number */
errcontext.callback = copy_in_error_callback;
- errcontext.arg = NULL;
+ errcontext.arg = (void *) cstate;
errcontext.previous = error_context_stack;
error_context_stack = &errcontext;
/* on input just throw the header line away */
- if (header_line)
+ if (cstate->header_line)
{
- copy_lineno++;
- done = CopyReadLine(quote, escape) ;
+ cstate->cur_lineno++;
+ done = CopyReadLine(cstate);
}
while (!done)
@@ -1705,7 +1670,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
CHECK_FOR_INTERRUPTS();
- copy_lineno++;
+ cstate->cur_lineno++;
/* Reset the per-tuple exprcontext */
ResetPerTupleExprContext(estate);
@@ -1717,15 +1682,15 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
MemSet(values, 0, num_phys_attrs * sizeof(Datum));
MemSet(nulls, 'n', num_phys_attrs * sizeof(char));
- if (!binary)
+ if (!cstate->binary)
{
- CopyReadResult result = NORMAL_ATTR;
- char *string;
ListCell *cur;
+ int fldct;
+ int fieldno;
+ char *string;
/* Actually read the line into memory here */
- done = csv_mode ?
- CopyReadLine(quote, escape) : CopyReadLine(NULL, NULL);
+ done = CopyReadLine(cstate);
/*
* EOF at start of line means we're done. If we see EOF after
@@ -1733,91 +1698,79 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
* by EOF, ie, process the line and then exit loop on next
* iteration.
*/
- if (done && line_buf.len == 0)
+ if (done && cstate->line_buf.len == 0)
break;
+ /* Parse the line into de-escaped field values */
+ if (cstate->csv_mode)
+ fldct = CopyReadAttributesCSV(cstate, nfields, field_strings);
+ else
+ fldct = CopyReadAttributesText(cstate, nfields, field_strings);
+ fieldno = 0;
+
+ /* Read the OID field if present */
if (file_has_oids)
{
- /* can't be in CSV mode here */
- string = CopyReadAttribute(delim, null_print,
- &result, &isnull);
+ if (fieldno >= fldct)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("missing data for OID column")));
+ string = field_strings[fieldno++];
- if (isnull)
+ if (string == NULL)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("null OID in COPY data")));
else
{
- copy_attname = "oid";
+ cstate->cur_attname = "oid";
+ cstate->cur_attval = string;
loaded_oid = DatumGetObjectId(DirectFunctionCall1(oidin,
CStringGetDatum(string)));
if (loaded_oid == InvalidOid)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("invalid OID in COPY data")));
- copy_attname = NULL;
+ cstate->cur_attname = NULL;
+ cstate->cur_attval = NULL;
}
}
/* Loop to read the user attributes on the line. */
- foreach(cur, attnumlist)
+ foreach(cur, cstate->attnumlist)
{
int attnum = lfirst_int(cur);
int m = attnum - 1;
- /*
- * If prior attr on this line was ended by newline,
- * complain.
- */
- if (result != NORMAL_ATTR)
+ if (fieldno >= fldct)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("missing data for column \"%s\"",
NameStr(attr[m]->attname))));
+ string = field_strings[fieldno++];
- if (csv_mode)
+ if (cstate->csv_mode && string == NULL && force_notnull[m])
{
- string = CopyReadAttributeCSV(delim, null_print, quote,
- escape, &result, &isnull);
- if (result == UNTERMINATED_FIELD)
- ereport(ERROR,
- (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- errmsg("unterminated CSV quoted field")));
+ /* Go ahead and read the NULL string */
+ string = cstate->null_print;
}
- else
- string = CopyReadAttribute(delim, null_print,
- &result, &isnull);
- if (csv_mode && isnull && force_notnull[m])
+ /* If we read an SQL NULL, no need to do anything */
+ if (string != NULL)
{
- string = null_print; /* set to NULL string */
- isnull = false;
- }
-
- /* we read an SQL NULL, no need to do anything */
- if (!isnull)
- {
- copy_attname = NameStr(attr[m]->attname);
+ cstate->cur_attname = NameStr(attr[m]->attname);
+ cstate->cur_attval = string;
values[m] = FunctionCall3(&in_functions[m],
CStringGetDatum(string),
ObjectIdGetDatum(typioparams[m]),
Int32GetDatum(attr[m]->atttypmod));
nulls[m] = ' ';
- copy_attname = NULL;
+ cstate->cur_attname = NULL;
+ cstate->cur_attval = NULL;
}
}
- /*
- * Complain if there are more fields on the input line.
- *
- * Special case: if we're reading a zero-column table, we won't
- * yet have called CopyReadAttribute() at all; so no error if
- * line is empty.
- */
- if (result == NORMAL_ATTR && line_buf.len != 0)
- ereport(ERROR,
- (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- errmsg("extra data after last expected column")));
+ Assert(fieldno == nfields);
}
else
{
@@ -1825,8 +1778,8 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
int16 fld_count;
ListCell *cur;
- fld_count = CopyGetInt16();
- if (CopyGetEof() || fld_count == -1)
+ if (!CopyGetInt16(cstate, &fld_count) ||
+ fld_count == -1)
{
done = true;
break;
@@ -1840,9 +1793,10 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
if (file_has_oids)
{
- copy_attname = "oid";
+ cstate->cur_attname = "oid";
loaded_oid =
- DatumGetObjectId(CopyReadBinaryAttribute(0,
+ DatumGetObjectId(CopyReadBinaryAttribute(cstate,
+ 0,
&oid_in_function,
oid_typioparam,
-1,
@@ -1851,24 +1805,25 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("invalid OID in COPY data")));
- copy_attname = NULL;
+ cstate->cur_attname = NULL;
}
i = 0;
- foreach(cur, attnumlist)
+ foreach(cur, cstate->attnumlist)
{
int attnum = lfirst_int(cur);
int m = attnum - 1;
- copy_attname = NameStr(attr[m]->attname);
+ cstate->cur_attname = NameStr(attr[m]->attname);
i++;
- values[m] = CopyReadBinaryAttribute(i,
+ values[m] = CopyReadBinaryAttribute(cstate,
+ i,
&in_functions[m],
typioparams[m],
attr[m]->atttypmod,
&isnull);
nulls[m] = isnull ? 'n' : ' ';
- copy_attname = NULL;
+ cstate->cur_attname = NULL;
}
}
@@ -1915,7 +1870,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
/* And now we can form the input tuple. */
tuple = heap_formtuple(tupDesc, values, nulls);
- if (oids && file_has_oids)
+ if (cstate->oids && file_has_oids)
HeapTupleSetOid(tuple, loaded_oid);
/* Triggers and stuff need to be invoked in query context. */
@@ -1946,11 +1901,11 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
ExecStoreTuple(tuple, slot, InvalidBuffer, false);
/* Check the constraints of the tuple */
- if (rel->rd_att->constr)
+ if (cstate->rel->rd_att->constr)
ExecConstraints(resultRelInfo, slot, estate);
/* OK, store the tuple and create index entries for it */
- simple_heap_insert(rel, tuple);
+ simple_heap_insert(cstate->rel, tuple);
if (resultRelInfo->ri_NumIndices > 0)
ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
@@ -1973,6 +1928,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
pfree(values);
pfree(nulls);
+ pfree(field_strings);
pfree(in_functions);
pfree(typioparams);
@@ -1994,292 +1950,712 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
* server encoding.
*
* Result is true if read was terminated by EOF, false if terminated
- * by newline.
+ * by newline. The terminating newline or EOF marker is not included
+ * in the final value of line_buf.
*/
static bool
-CopyReadLine(char * quote, char * escape)
+CopyReadLine(CopyState cstate)
{
bool result;
- bool change_encoding = (client_encoding != server_encoding);
- int c;
- int mblen;
- int j;
- unsigned char s[2];
- char *cvt;
- bool in_quote = false, last_was_esc = false, csv_mode = false;
- char quotec = '\0', escapec = '\0';
- if (quote)
+ /* Reset line_buf to empty */
+ cstate->line_buf.len = 0;
+ cstate->line_buf.data[0] = '\0';
+
+ /* Mark that encoding conversion hasn't occurred yet */
+ cstate->line_buf_converted = false;
+
+ /* Parse data and transfer into line_buf */
+ if (cstate->csv_mode)
+ result = CopyReadLineCSV(cstate);
+ else
+ result = CopyReadLineText(cstate);
+
+ if (result)
+ {
+ /*
+ * Reached EOF. In protocol version 3, we should ignore anything
+ * after \. up to the protocol end of copy data. (XXX maybe
+ * better not to treat \. as special?)
+ */
+ if (cstate->copy_dest == COPY_NEW_FE)
+ {
+ do {
+ cstate->raw_buf_index = cstate->raw_buf_len;
+ } while (CopyLoadRawBuf(cstate));
+ }
+ }
+ else
{
- csv_mode = true;
- quotec = quote[0];
- escapec = escape[0];
- /* ignore special escape processing if it's the same as quotec */
- if (quotec == escapec)
- escapec = '\0';
+ /*
+ * If we didn't hit EOF, then we must have transferred the EOL marker
+ * to line_buf along with the data. Get rid of it.
+ */
+ switch (cstate->eol_type)
+ {
+ case EOL_NL:
+ Assert(cstate->line_buf.len >= 1);
+ Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
+ cstate->line_buf.len--;
+ cstate->line_buf.data[cstate->line_buf.len] = '\0';
+ break;
+ case EOL_CR:
+ Assert(cstate->line_buf.len >= 1);
+ Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
+ cstate->line_buf.len--;
+ cstate->line_buf.data[cstate->line_buf.len] = '\0';
+ break;
+ case EOL_CRNL:
+ Assert(cstate->line_buf.len >= 2);
+ Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
+ Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
+ cstate->line_buf.len -= 2;
+ cstate->line_buf.data[cstate->line_buf.len] = '\0';
+ break;
+ case EOL_UNKNOWN:
+ /* shouldn't get here */
+ Assert(false);
+ break;
+ }
}
+ /* Done reading the line. Convert it to server encoding. */
+ if (cstate->need_transcoding)
+ {
+ char *cvt;
- s[1] = 0;
+ cvt = (char *) pg_client_to_server((unsigned char *) cstate->line_buf.data,
+ cstate->line_buf.len);
+ if (cvt != cstate->line_buf.data)
+ {
+ /* transfer converted data back to line_buf */
+ cstate->line_buf.len = 0;
+ cstate->line_buf.data[0] = '\0';
+ appendBinaryStringInfo(&cstate->line_buf, cvt, strlen(cvt));
+ pfree(cvt);
+ }
+ }
+
+ /* Now it's safe to use the buffer in error messages */
+ cstate->line_buf_converted = true;
- /* reset line_buf to empty */
- line_buf.len = 0;
- line_buf.data[0] = '\0';
- line_buf.cursor = 0;
+ return result;
+}
+
+/*
+ * CopyReadLineText - inner loop of CopyReadLine for non-CSV mode
+ *
+ * If you need to change this, better look at CopyReadLineCSV too
+ */
+static bool
+CopyReadLineText(CopyState cstate)
+{
+ bool result;
+ char *copy_raw_buf;
+ int raw_buf_ptr;
+ int copy_buf_len;
+ bool need_data;
+ bool hit_eof;
+ unsigned char s[2];
- /* mark that encoding conversion hasn't occurred yet */
- line_buf_converted = false;
+ s[1] = 0;
/* set default status */
result = false;
/*
- * In this loop we only care for detecting newlines (\r and/or \n) and
- * the end-of-copy marker (\.).
+ * The objective of this loop is to transfer the entire next input
+ * line into line_buf. Hence, we only care for detecting newlines
+ * (\r and/or \n) and the end-of-copy marker (\.).
*
- * In Text mode, for backwards compatibility we allow
- * backslashes to escape newline characters. Backslashes other than
- * the end marker get put into the line_buf, since CopyReadAttribute
- * does its own escape processing.
+ * For backwards compatibility we allow backslashes to escape newline
+ * characters. Backslashes other than the end marker get put into the
+ * line_buf, since CopyReadAttributesText does its own escape processing.
*
- * In CSV mode, CR and NL inside q quoted field are just part of the
- * data value and are put in line_buf. We keep just enough state
- * to know if we are currently in a quoted field or not.
- *
- * These four characters, and only these four, are assumed the same in
+ * These four characters, and only these four, are assumed the same in
* frontend and backend encodings.
*
- * We do not assume that second and later bytes of a frontend
- * multibyte character couldn't look like ASCII characters.
+ * For speed, we try to move data to line_buf in chunks rather than
+ * one character at a time. raw_buf_ptr points to the next character
+ * to examine; any characters from raw_buf_index to raw_buf_ptr have
+ * been determined to be part of the line, but not yet transferred
+ * to line_buf.
+ *
+ * For a little extra speed within the loop, we copy raw_buf and
+ * raw_buf_len into local variables.
*/
+ copy_raw_buf = cstate->raw_buf;
+ raw_buf_ptr = cstate->raw_buf_index;
+ copy_buf_len = cstate->raw_buf_len;
+ need_data = false; /* flag to force reading more data */
+ hit_eof = false; /* flag indicating no more data available */
+
for (;;)
{
- c = CopyGetChar();
- if (c == EOF)
+ int prev_raw_ptr;
+ char c;
+
+ /* Load more data if needed */
+ if (raw_buf_ptr >= copy_buf_len || need_data)
{
- result = true;
- break;
+ /*
+ * Transfer any approved data to line_buf; must do this to
+ * be sure there is some room in raw_buf.
+ */
+ if (raw_buf_ptr > cstate->raw_buf_index)
+ {
+ appendBinaryStringInfo(&cstate->line_buf,
+ cstate->raw_buf + cstate->raw_buf_index,
+ raw_buf_ptr - cstate->raw_buf_index);
+ cstate->raw_buf_index = raw_buf_ptr;
+ }
+ /*
+ * Try to read some more data. This will certainly reset
+ * raw_buf_index to zero, and raw_buf_ptr must go with it.
+ */
+ if (!CopyLoadRawBuf(cstate))
+ hit_eof = true;
+ raw_buf_ptr = 0;
+ copy_buf_len = cstate->raw_buf_len;
+ /*
+ * If we are completely out of data, break out of the loop,
+ * reporting EOF.
+ */
+ if (copy_buf_len <= 0)
+ {
+ result = true;
+ break;
+ }
+ need_data = false;
}
- if (csv_mode)
+ /* OK to fetch a character */
+ prev_raw_ptr = raw_buf_ptr;
+ c = copy_raw_buf[raw_buf_ptr++];
+
+ if (c == '\r')
{
- /*
- * Dealing with quotes and escapes here is mildly tricky. If the
- * quote char is also the escape char, there's no problem - we
- * just use the char as a toggle. If they are different, we need
- * to ensure that we only take account of an escape inside a quoted
- * field and immediately preceding a quote char, and not the
- * second in a escape-escape sequence.
- */
-
- if (in_quote && c == escapec)
- last_was_esc = ! last_was_esc;
- if (c == quotec && ! last_was_esc)
- in_quote = ! in_quote;
- if (c != escapec)
- last_was_esc = false;
+ /* Check for \r\n on first line, _and_ handle \r\n. */
+ if (cstate->eol_type == EOL_UNKNOWN ||
+ cstate->eol_type == EOL_CRNL)
+ {
+ /*
+ * If need more data, go back to loop top to load it.
+ *
+ * Note that if we are at EOF, c will wind up as '\0'
+ * because of the guaranteed pad of raw_buf.
+ */
+ if (raw_buf_ptr >= copy_buf_len && !hit_eof)
+ {
+ raw_buf_ptr = prev_raw_ptr; /* undo fetch */
+ need_data = true;
+ continue;
+ }
+ c = copy_raw_buf[raw_buf_ptr];
- /*
- * updating the line count for embedded CR and/or LF chars is
- * necessarily a little fragile - this test is probably about
- * the best we can do.
- */
- if (in_quote && c == (eol_type == EOL_CR ? '\r' : '\n'))
- copy_lineno++;
+ if (c == '\n')
+ {
+ raw_buf_ptr++; /* eat newline */
+ cstate->eol_type = EOL_CRNL; /* in case not set yet */
+ }
+ else
+ {
+ /* found \r, but no \n */
+ if (cstate->eol_type == EOL_CRNL)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("literal carriage return found in data"),
+ errhint("Use \"\\r\" to represent carriage return.")));
+ /*
+ * if we got here, it is the first line and we didn't
+ * find \n, so don't consume the peeked character
+ */
+ cstate->eol_type = EOL_CR;
+ }
+ }
+ else if (cstate->eol_type == EOL_NL)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("literal carriage return found in data"),
+ errhint("Use \"\\r\" to represent carriage return.")));
+ /* If reach here, we have found the line terminator */
+ break;
+ }
+
+ if (c == '\n')
+ {
+ if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("literal newline found in data"),
+ errhint("Use \"\\n\" to represent newline.")));
+ cstate->eol_type = EOL_NL; /* in case not set yet */
+ /* If reach here, we have found the line terminator */
+ break;
}
- if (!in_quote && c == '\r')
+ if (c == '\\')
{
- if (eol_type == EOL_NL)
+ /*
+ * If need more data, go back to loop top to load it.
+ */
+ if (raw_buf_ptr >= copy_buf_len)
+ {
+ if (hit_eof)
+ {
+ /* backslash just before EOF, treat as data char */
+ result = true;
+ break;
+ }
+ raw_buf_ptr = prev_raw_ptr; /* undo fetch */
+ need_data = true;
+ continue;
+ }
+
+ /*
+ * In non-CSV mode, backslash quotes the following character
+ * even if it's a newline, so we always advance to next character
+ */
+ c = copy_raw_buf[raw_buf_ptr++];
+
+ if (c == '.')
{
- if (! csv_mode)
+ if (cstate->eol_type == EOL_CRNL)
+ {
+ if (raw_buf_ptr >= copy_buf_len && !hit_eof)
+ {
+ raw_buf_ptr = prev_raw_ptr; /* undo fetch */
+ need_data = true;
+ continue;
+ }
+ /* if hit_eof, c will become '\0' */
+ c = copy_raw_buf[raw_buf_ptr++];
+ if (c == '\n')
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("end-of-copy marker does not match previous newline style")));
+ if (c != '\r')
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("end-of-copy marker corrupt")));
+ }
+ if (raw_buf_ptr >= copy_buf_len && !hit_eof)
+ {
+ raw_buf_ptr = prev_raw_ptr; /* undo fetch */
+ need_data = true;
+ continue;
+ }
+ /* if hit_eof, c will become '\0' */
+ c = copy_raw_buf[raw_buf_ptr++];
+ if (c != '\r' && c != '\n')
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- errmsg("literal carriage return found in data"),
- errhint("Use \"\\r\" to represent carriage return.")));
- else
+ errmsg("end-of-copy marker corrupt")));
+ if ((cstate->eol_type == EOL_NL && c != '\n') ||
+ (cstate->eol_type == EOL_CRNL && c != '\n') ||
+ (cstate->eol_type == EOL_CR && c != '\r'))
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- errmsg("unquoted carriage return found in CSV data"),
- errhint("Use quoted CSV field to represent carriage return.")));
+ errmsg("end-of-copy marker does not match previous newline style")));
+
+ /*
+ * Transfer only the data before the \. into line_buf,
+ * then discard the data and the \. sequence.
+ */
+ if (prev_raw_ptr > cstate->raw_buf_index)
+ appendBinaryStringInfo(&cstate->line_buf,
+ cstate->raw_buf + cstate->raw_buf_index,
+ prev_raw_ptr - cstate->raw_buf_index);
+ cstate->raw_buf_index = raw_buf_ptr;
+ result = true; /* report EOF */
+ break;
+ }
+ }
+
+ /*
+ * Do we need to be careful about trailing bytes of multibyte
+ * characters? (See note above about client_only_encoding)
+ *
+ * We assume here that pg_encoding_mblen only looks at the first
+ * byte of the character!
+ */
+ if (cstate->client_only_encoding)
+ {
+ int mblen;
+
+ s[0] = c;
+ mblen = pg_encoding_mblen(cstate->client_encoding, s);
+ if (raw_buf_ptr + (mblen-1) > copy_buf_len)
+ {
+ if (hit_eof)
+ {
+ /* consume the partial character (conversion will fail) */
+ raw_buf_ptr = copy_buf_len;
+ result = true;
+ break;
+ }
+ raw_buf_ptr = prev_raw_ptr; /* undo fetch */
+ need_data = true;
+ continue;
}
+ raw_buf_ptr += mblen-1;
+ }
+ } /* end of outer loop */
+
+ /*
+ * Transfer any still-uncopied data to line_buf.
+ */
+ if (raw_buf_ptr > cstate->raw_buf_index)
+ {
+ appendBinaryStringInfo(&cstate->line_buf,
+ cstate->raw_buf + cstate->raw_buf_index,
+ raw_buf_ptr - cstate->raw_buf_index);
+ cstate->raw_buf_index = raw_buf_ptr;
+ }
+
+ return result;
+}
+
+/*
+ * CopyReadLineCSV - inner loop of CopyReadLine for CSV mode
+ *
+ * If you need to change this, better look at CopyReadLineText too
+ */
+static bool
+CopyReadLineCSV(CopyState cstate)
+{
+ bool result;
+ char *copy_raw_buf;
+ int raw_buf_ptr;
+ int copy_buf_len;
+ bool need_data;
+ bool hit_eof;
+ unsigned char s[2];
+ bool in_quote = false, last_was_esc = false;
+ char quotec = cstate->quote[0];
+ char escapec = cstate->escape[0];
+
+ /* ignore special escape processing if it's the same as quotec */
+ if (quotec == escapec)
+ escapec = '\0';
+
+ s[1] = 0;
+
+ /* set default status */
+ result = false;
+
+ /*
+ * The objective of this loop is to transfer the entire next input
+ * line into line_buf. Hence, we only care for detecting newlines
+ * (\r and/or \n) and the end-of-copy marker (\.).
+ *
+ * In CSV mode, \r and \n inside a quoted field are just part of the
+ * data value and are put in line_buf. We keep just enough state
+ * to know if we are currently in a quoted field or not.
+ *
+ * These four characters, and the CSV escape and quote characters,
+ * are assumed the same in frontend and backend encodings.
+ *
+ * For speed, we try to move data to line_buf in chunks rather than
+ * one character at a time. raw_buf_ptr points to the next character
+ * to examine; any characters from raw_buf_index to raw_buf_ptr have
+ * been determined to be part of the line, but not yet transferred
+ * to line_buf.
+ *
+ * For a little extra speed within the loop, we copy raw_buf and
+ * raw_buf_len into local variables.
+ */
+ copy_raw_buf = cstate->raw_buf;
+ raw_buf_ptr = cstate->raw_buf_index;
+ copy_buf_len = cstate->raw_buf_len;
+ need_data = false; /* flag to force reading more data */
+ hit_eof = false; /* flag indicating no more data available */
+
+ for (;;)
+ {
+ int prev_raw_ptr;
+ char c;
+
+ /* Load more data if needed */
+ if (raw_buf_ptr >= copy_buf_len || need_data)
+ {
+ /*
+ * Transfer any approved data to line_buf; must do this to
+ * be sure there is some room in raw_buf.
+ */
+ if (raw_buf_ptr > cstate->raw_buf_index)
+ {
+ appendBinaryStringInfo(&cstate->line_buf,
+ cstate->raw_buf + cstate->raw_buf_index,
+ raw_buf_ptr - cstate->raw_buf_index);
+ cstate->raw_buf_index = raw_buf_ptr;
+ }
+ /*
+ * Try to read some more data. This will certainly reset
+ * raw_buf_index to zero, and raw_buf_ptr must go with it.
+ */
+ if (!CopyLoadRawBuf(cstate))
+ hit_eof = true;
+ raw_buf_ptr = 0;
+ copy_buf_len = cstate->raw_buf_len;
+ /*
+ * If we are completely out of data, break out of the loop,
+ * reporting EOF.
+ */
+ if (copy_buf_len <= 0)
+ {
+ result = true;
+ break;
+ }
+ need_data = false;
+ }
+
+ /* OK to fetch a character */
+ prev_raw_ptr = raw_buf_ptr;
+ c = copy_raw_buf[raw_buf_ptr++];
+
+ /*
+ * If character is '\\' or '\r', we may need to look ahead below.
+ * Force fetch of the next character if we don't already have it.
+ * We need to do this before changing CSV state, in case one of
+ * these characters is also the quote or escape character.
+ *
+ * Note: old-protocol does not like forced prefetch, but it's OK
+ * here since we cannot validly be at EOF.
+ */
+ if (c == '\\' || c == '\r')
+ {
+ if (raw_buf_ptr >= copy_buf_len && !hit_eof)
+ {
+ raw_buf_ptr = prev_raw_ptr; /* undo fetch */
+ need_data = true;
+ continue;
+ }
+ }
+
+ /*
+ * Dealing with quotes and escapes here is mildly tricky. If the
+ * quote char is also the escape char, there's no problem - we
+ * just use the char as a toggle. If they are different, we need
+ * to ensure that we only take account of an escape inside a quoted
+ * field and immediately preceding a quote char, and not the
+ * second in a escape-escape sequence.
+ */
+ if (in_quote && c == escapec)
+ last_was_esc = ! last_was_esc;
+ if (c == quotec && ! last_was_esc)
+ in_quote = ! in_quote;
+ if (c != escapec)
+ last_was_esc = false;
+
+ /*
+ * Updating the line count for embedded CR and/or LF chars is
+ * necessarily a little fragile - this test is probably about
+ * the best we can do. (XXX it's arguable whether we should
+ * do this at all --- is cur_lineno a physical or logical count?)
+ */
+ if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
+ cstate->cur_lineno++;
+
+ if (c == '\r' && !in_quote)
+ {
/* Check for \r\n on first line, _and_ handle \r\n. */
- if (eol_type == EOL_UNKNOWN || eol_type == EOL_CRNL)
+ if (cstate->eol_type == EOL_UNKNOWN ||
+ cstate->eol_type == EOL_CRNL)
{
- int c2 = CopyPeekChar();
+ /*
+ * If need more data, go back to loop top to load it.
+ *
+ * Note that if we are at EOF, c will wind up as '\0'
+ * because of the guaranteed pad of raw_buf.
+ */
+ if (raw_buf_ptr >= copy_buf_len && !hit_eof)
+ {
+ raw_buf_ptr = prev_raw_ptr; /* undo fetch */
+ need_data = true;
+ continue;
+ }
+ c = copy_raw_buf[raw_buf_ptr];
- if (c2 == '\n')
+ if (c == '\n')
{
- CopyDonePeek(c2, true); /* eat newline */
- eol_type = EOL_CRNL;
+ raw_buf_ptr++; /* eat newline */
+ cstate->eol_type = EOL_CRNL; /* in case not set yet */
}
else
{
/* found \r, but no \n */
- if (eol_type == EOL_CRNL)
- {
- if (!csv_mode)
- ereport(ERROR,
- (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- errmsg("literal carriage return found in data"),
- errhint("Use \"\\r\" to represent carriage return.")));
- else
- ereport(ERROR,
- (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- errmsg("unquoted carriage return found in data"),
- errhint("Use quoted CSV field to represent carriage return.")));
-
- }
-
+ if (cstate->eol_type == EOL_CRNL)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("unquoted carriage return found in data"),
+ errhint("Use quoted CSV field to represent carriage return.")));
/*
* if we got here, it is the first line and we didn't
- * get \n, so put it back
+ * find \n, so don't consume the peeked character
*/
- CopyDonePeek(c2, false);
- eol_type = EOL_CR;
+ cstate->eol_type = EOL_CR;
}
}
+ else if (cstate->eol_type == EOL_NL)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("unquoted carriage return found in CSV data"),
+ errhint("Use quoted CSV field to represent carriage return.")));
+ /* If reach here, we have found the line terminator */
break;
}
- if (!in_quote && c == '\n')
+
+ if (c == '\n' && !in_quote)
{
- if (eol_type == EOL_CR || eol_type == EOL_CRNL)
- {
- if (!csv_mode)
- ereport(ERROR,
- (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- errmsg("literal newline found in data"),
- errhint("Use \"\\n\" to represent newline.")));
- else
- ereport(ERROR,
- (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
- errmsg("unquoted newline found in data"),
- errhint("Use quoted CSV field to represent newline.")));
-
- }
- eol_type = EOL_NL;
+ if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("unquoted newline found in data"),
+ errhint("Use quoted CSV field to represent newline.")));
+ cstate->eol_type = EOL_NL; /* in case not set yet */
+ /* If reach here, we have found the line terminator */
break;
}
- if ((line_buf.len == 0 || !csv_mode) && c == '\\')
+ /*
+ * In CSV mode, we only recognize \. at start of line
+ */
+ if (c == '\\' && cstate->line_buf.len == 0)
{
- int c2;
-
- if (csv_mode)
- c2 = CopyPeekChar();
- else
- c2 = c = CopyGetChar();
+ char c2;
- if (c2 == EOF)
+ /*
+ * If need more data, go back to loop top to load it.
+ */
+ if (raw_buf_ptr >= copy_buf_len)
{
- result = true;
- if (csv_mode)
- CopyDonePeek(c2, true);
- break;
+ if (hit_eof)
+ {
+ /* backslash just before EOF, treat as data char */
+ result = true;
+ break;
+ }
+ raw_buf_ptr = prev_raw_ptr; /* undo fetch */
+ need_data = true;
+ continue;
}
+
+ /*
+ * Note: we do not change c here since we aren't treating \
+ * as escaping the next character.
+ */
+ c2 = copy_raw_buf[raw_buf_ptr];
+
if (c2 == '.')
{
- if (csv_mode)
- CopyDonePeek(c2, true); /* allow keep calling GetChar() */
+ raw_buf_ptr++; /* consume the '.' */
- if (eol_type == EOL_CRNL)
+ /*
+ * Note: if we loop back for more data here, it does not
+ * matter that the CSV state change checks are re-executed;
+ * we will come back here with no important state changed.
+ */
+ if (cstate->eol_type == EOL_CRNL)
{
- c = CopyGetChar();
- if (c == '\n')
+ if (raw_buf_ptr >= copy_buf_len && !hit_eof)
+ {
+ raw_buf_ptr = prev_raw_ptr; /* undo fetch */
+ need_data = true;
+ continue;
+ }
+ /* if hit_eof, c2 will become '\0' */
+ c2 = copy_raw_buf[raw_buf_ptr++];
+ if (c2 == '\n')
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("end-of-copy marker does not match previous newline style")));
- if (c != '\r')
+ if (c2 != '\r')
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("end-of-copy marker corrupt")));
}
- c = CopyGetChar();
- if (c != '\r' && c != '\n')
+ if (raw_buf_ptr >= copy_buf_len && !hit_eof)
+ {
+ raw_buf_ptr = prev_raw_ptr; /* undo fetch */
+ need_data = true;
+ continue;
+ }
+ /* if hit_eof, c2 will become '\0' */
+ c2 = copy_raw_buf[raw_buf_ptr++];
+ if (c2 != '\r' && c2 != '\n')
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("end-of-copy marker corrupt")));
- if ((eol_type == EOL_NL && c != '\n') ||
- (eol_type == EOL_CRNL && c != '\n') ||
- (eol_type == EOL_CR && c != '\r'))
+ if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
+ (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
+ (cstate->eol_type == EOL_CR && c2 != '\r'))
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("end-of-copy marker does not match previous newline style")));
/*
- * In protocol version 3, we should ignore anything after
- * \. up to the protocol end of copy data. (XXX maybe
- * better not to treat \. as special?)
+ * Transfer only the data before the \. into line_buf,
+ * then discard the data and the \. sequence.
*/
- if (copy_dest == COPY_NEW_FE)
- {
- while (c != EOF)
- c = CopyGetChar();
- }
+ if (prev_raw_ptr > cstate->raw_buf_index)
+ appendBinaryStringInfo(&cstate->line_buf, cstate->raw_buf + cstate->raw_buf_index,
+ prev_raw_ptr - cstate->raw_buf_index);
+ cstate->raw_buf_index = raw_buf_ptr;
result = true; /* report EOF */
break;
}
-
- if (csv_mode)
- CopyDonePeek(c2, false); /* not a dot, so put it back */
- else
- /* not EOF mark, so emit \ and following char literally */
- appendStringInfoCharMacro(&line_buf, '\\');
}
- appendStringInfoCharMacro(&line_buf, c);
-
/*
- * When client encoding != server, must be careful to read the
- * extra bytes of a multibyte character exactly, since the
- * encoding might not ensure they don't look like ASCII. When the
- * encodings are the same, we need not do this, since no server
- * encoding we use has ASCII-like following bytes.
+ * Do we need to be careful about trailing bytes of multibyte
+ * characters? (See note above about client_only_encoding)
+ *
+ * We assume here that pg_encoding_mblen only looks at the first
+ * byte of the character!
*/
- if (change_encoding)
+ if (cstate->client_only_encoding)
{
+ int mblen;
+
s[0] = c;
- mblen = pg_encoding_mblen(client_encoding, s);
- for (j = 1; j < mblen; j++)
+ mblen = pg_encoding_mblen(cstate->client_encoding, s);
+ if (raw_buf_ptr + (mblen-1) > copy_buf_len)
{
- c = CopyGetChar();
- if (c == EOF)
+ if (hit_eof)
{
+ /* consume the partial character (will fail below) */
+ raw_buf_ptr = copy_buf_len;
result = true;
break;
}
- appendStringInfoCharMacro(&line_buf, c);
+ raw_buf_ptr = prev_raw_ptr; /* undo fetch */
+ need_data = true;
+ continue;
}
- if (result)
- break; /* out of outer loop */
+ raw_buf_ptr += mblen-1;
}
} /* end of outer loop */
- /* Done reading the line. Convert it to server encoding. */
- if (change_encoding)
+ /*
+ * Transfer any still-uncopied data to line_buf.
+ */
+ if (raw_buf_ptr > cstate->raw_buf_index)
{
- cvt = (char *) pg_client_to_server((unsigned char *) line_buf.data,
- line_buf.len);
- if (cvt != line_buf.data)
- {
- /* transfer converted data back to line_buf */
- line_buf.len = 0;
- line_buf.data[0] = '\0';
- appendBinaryStringInfo(&line_buf, cvt, strlen(cvt));
- }
+ appendBinaryStringInfo(&cstate->line_buf,
+ cstate->raw_buf + cstate->raw_buf_index,
+ raw_buf_ptr - cstate->raw_buf_index);
+ cstate->raw_buf_index = raw_buf_ptr;
}
- /* Now it's safe to use the buffer in error messages */
- line_buf_converted = true;
-
return result;
}
/*
* Return decimal value for a hexadecimal digit
*/
-static
-int GetDecimalFromHex(char hex)
+static int
+GetDecimalFromHex(char hex)
{
if (isdigit(hex))
return hex - '0';
@@ -2287,85 +2663,131 @@ int GetDecimalFromHex(char hex)
return tolower(hex) - 'a' + 10;
}
-/*----------
- * Read the value of a single attribute, performing de-escaping as needed.
+/*
+ * Parse the current line into separate attributes (fields),
+ * performing de-escaping as needed.
+ *
+ * The input is in line_buf. We use attribute_buf to hold the result
+ * strings. fieldvals[k] is set to point to the k'th attribute string,
+ * or NULL when the input matches the null marker string. (Note that the
+ * caller cannot check for nulls since the returned string would be the
+ * post-de-escaping equivalent, which may look the same as some valid data
+ * string.)
*
* delim is the column delimiter string (must be just one byte for now).
* null_print is the null marker string. Note that this is compared to
* the pre-de-escaped input string.
*
- * *result is set to indicate what terminated the read:
- * NORMAL_ATTR: column delimiter
- * END_OF_LINE: end of line
- * In either case, the string read up to the terminator is returned.
- *
- * *isnull is set true or false depending on whether the input matched
- * the null marker. Note that the caller cannot check this since the
- * returned string will be the post-de-escaping equivalent, which may
- * look the same as some valid data string.
- *----------
+ * The return value is the number of fields actually read. (We error out
+ * if this would exceed maxfields, which is the length of fieldvals[].)
*/
-static char *
-CopyReadAttribute(const char *delim, const char *null_print,
- CopyReadResult *result, bool *isnull)
+static int
+CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
{
- char c;
- char delimc = delim[0];
- int start_cursor = line_buf.cursor;
- int end_cursor;
- int input_len;
+ char delimc = cstate->delim[0];
+ int fieldno;
+ char *output_ptr;
+ char *cur_ptr;
+ char *line_end_ptr;
+
+ /*
+ * We need a special case for zero-column tables: check that the input
+ * line is empty, and return.
+ */
+ if (maxfields <= 0)
+ {
+ if (cstate->line_buf.len != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("extra data after last expected column")));
+ return 0;
+ }
/* reset attribute_buf to empty */
- attribute_buf.len = 0;
- attribute_buf.data[0] = '\0';
+ cstate->attribute_buf.len = 0;
+ cstate->attribute_buf.data[0] = '\0';
- /* set default status */
- *result = END_OF_LINE;
+ /*
+ * The de-escaped attributes will certainly not be longer than the input
+ * data line, so we can just force attribute_buf to be large enough and
+ * then transfer data without any checks for enough space. We need to
+ * do it this way because enlarging attribute_buf mid-stream would
+ * invalidate pointers already stored into fieldvals[].
+ */
+ if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
+ enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
+ output_ptr = cstate->attribute_buf.data;
+
+ /* set pointer variables for loop */
+ cur_ptr = cstate->line_buf.data;
+ line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
+ /* Outer loop iterates over fields */
+ fieldno = 0;
for (;;)
{
- end_cursor = line_buf.cursor;
- if (line_buf.cursor >= line_buf.len)
- break;
- c = line_buf.data[line_buf.cursor++];
- if (c == delimc)
- {
- *result = NORMAL_ATTR;
- break;
- }
- if (c == '\\')
+ bool found_delim = false;
+ char *start_ptr;
+ char *end_ptr;
+ int input_len;
+
+ /* Make sure space remains in fieldvals[] */
+ if (fieldno >= maxfields)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("extra data after last expected column")));
+
+ /* Remember start of field on both input and output sides */
+ start_ptr = cur_ptr;
+ fieldvals[fieldno] = output_ptr;
+
+ /* Scan data for field */
+ for (;;)
{
- if (line_buf.cursor >= line_buf.len)
+ char c;
+
+ end_ptr = cur_ptr;
+ if (cur_ptr >= line_end_ptr)
break;
- c = line_buf.data[line_buf.cursor++];
- switch (c)
+ c = *cur_ptr++;
+ if (c == delimc)
{
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- /* handle \013 */
+ found_delim = true;
+ break;
+ }
+ if (c == '\\')
+ {
+ if (cur_ptr >= line_end_ptr)
+ break;
+ c = *cur_ptr++;
+ switch (c)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
{
+ /* handle \013 */
int val;
val = OCTVALUE(c);
- if (line_buf.cursor < line_buf.len)
+ if (cur_ptr < line_end_ptr)
{
- c = line_buf.data[line_buf.cursor];
+ c = *cur_ptr;
if (ISOCTAL(c))
{
- line_buf.cursor++;
+ cur_ptr++;
val = (val << 3) + OCTVALUE(c);
- if (line_buf.cursor < line_buf.len)
+ if (cur_ptr < line_end_ptr)
{
- c = line_buf.data[line_buf.cursor];
+ c = *cur_ptr;
if (ISOCTAL(c))
{
- line_buf.cursor++;
+ cur_ptr++;
val = (val << 3) + OCTVALUE(c);
}
}
@@ -2374,199 +2796,252 @@ CopyReadAttribute(const char *delim, const char *null_print,
c = val & 0377;
}
break;
- case 'x':
- /* Handle \x3F */
- if (line_buf.cursor < line_buf.len)
- {
- char hexchar = line_buf.data[line_buf.cursor];
-
- if (isxdigit(hexchar))
+ case 'x':
+ /* Handle \x3F */
+ if (cur_ptr < line_end_ptr)
{
- int val = GetDecimalFromHex(hexchar);
+ char hexchar = *cur_ptr;
- line_buf.cursor++;
- if (line_buf.cursor < line_buf.len)
+ if (isxdigit(hexchar))
{
- hexchar = line_buf.data[line_buf.cursor];
- if (isxdigit(hexchar))
+ int val = GetDecimalFromHex(hexchar);
+
+ cur_ptr++;
+ if (cur_ptr < line_end_ptr)
{
- line_buf.cursor++;
- val = (val << 4) + GetDecimalFromHex(hexchar);
+ hexchar = *cur_ptr;
+ if (isxdigit(hexchar))
+ {
+ cur_ptr++;
+ val = (val << 4) + GetDecimalFromHex(hexchar);
+ }
}
+ c = val & 0xff;
}
- c = val & 0xff;
}
- }
- break;
- case 'b':
- c = '\b';
- break;
- case 'f':
- c = '\f';
- break;
- case 'n':
- c = '\n';
- break;
- case 'r':
- c = '\r';
- break;
- case 't':
- c = '\t';
- break;
- case 'v':
- c = '\v';
- break;
-
- /*
- * in all other cases, take the char after '\'
- * literally
- */
+ break;
+ case 'b':
+ c = '\b';
+ break;
+ case 'f':
+ c = '\f';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case 't':
+ c = '\t';
+ break;
+ case 'v':
+ c = '\v';
+ break;
+
+ /*
+ * in all other cases, take the char after '\'
+ * literally
+ */
+ }
}
+
+ /* Add c to output string */
+ *output_ptr++ = c;
}
- appendStringInfoCharMacro(&attribute_buf, c);
+
+ /* Terminate attribute value in output area */
+ *output_ptr++ = '\0';
+
+ /* Check whether raw input matched null marker */
+ input_len = end_ptr - start_ptr;
+ if (input_len == cstate->null_print_len &&
+ strncmp(start_ptr, cstate->null_print, input_len) == 0)
+ fieldvals[fieldno] = NULL;
+
+ fieldno++;
+ /* Done if we hit EOL instead of a delim */
+ if (!found_delim)
+ break;
}
- /* check whether raw input matched null marker */
- input_len = end_cursor - start_cursor;
- if (input_len == strlen(null_print) &&
- strncmp(&line_buf.data[start_cursor], null_print, input_len) == 0)
- *isnull = true;
- else
- *isnull = false;
+ /* Clean up state of attribute_buf */
+ output_ptr--;
+ Assert(*output_ptr == '\0');
+ cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
- return attribute_buf.data;
+ return fieldno;
}
-
/*
- * Read the value of a single attribute in CSV mode,
- * performing de-escaping as needed. Escaping does not follow the normal
- * PostgreSQL text mode, but instead "standard" (i.e. common) CSV usage.
- *
- * Quoted fields can span lines, in which case the line end is embedded
- * in the returned string.
- *
- * null_print is the null marker string. Note that this is compared to
- * the pre-de-escaped input string (thus if it is quoted it is not a NULL).
- *
- * *result is set to indicate what terminated the read:
- * NORMAL_ATTR: column delimiter
- * END_OF_LINE: end of line
- * UNTERMINATED_FIELD no quote detected at end of a quoted field
- *
- * In any case, the string read up to the terminator (or end of file)
- * is returned.
- *
- * *isnull is set true or false depending on whether the input matched
- * the null marker. Note that the caller cannot check this since the
- * returned string will be the post-de-escaping equivalent, which may
- * look the same as some valid data string.
- *----------
+ * Parse the current line into separate attributes (fields),
+ * performing de-escaping as needed. This has exactly the same API as
+ * CopyReadAttributesText, except we parse the fields according to
+ * "standard" (i.e. common) CSV usage.
*/
-
-static char *
-CopyReadAttributeCSV(const char *delim, const char *null_print, char *quote,
- char *escape, CopyReadResult *result, bool *isnull)
+static int
+CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals)
{
- char delimc = delim[0];
- char quotec = quote[0];
- char escapec = escape[0];
- char c;
- int start_cursor = line_buf.cursor;
- int end_cursor = start_cursor;
- int input_len;
- bool in_quote = false;
- bool saw_quote = false;
+ char delimc = cstate->delim[0];
+ char quotec = cstate->quote[0];
+ char escapec = cstate->escape[0];
+ int fieldno;
+ char *output_ptr;
+ char *cur_ptr;
+ char *line_end_ptr;
+
+ /*
+ * We need a special case for zero-column tables: check that the input
+ * line is empty, and return.
+ */
+ if (maxfields <= 0)
+ {
+ if (cstate->line_buf.len != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("extra data after last expected column")));
+ return 0;
+ }
/* reset attribute_buf to empty */
- attribute_buf.len = 0;
- attribute_buf.data[0] = '\0';
+ cstate->attribute_buf.len = 0;
+ cstate->attribute_buf.data[0] = '\0';
- /* set default status */
- *result = END_OF_LINE;
+ /*
+ * The de-escaped attributes will certainly not be longer than the input
+ * data line, so we can just force attribute_buf to be large enough and
+ * then transfer data without any checks for enough space. We need to
+ * do it this way because enlarging attribute_buf mid-stream would
+ * invalidate pointers already stored into fieldvals[].
+ */
+ if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
+ enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
+ output_ptr = cstate->attribute_buf.data;
+
+ /* set pointer variables for loop */
+ cur_ptr = cstate->line_buf.data;
+ line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
+ /* Outer loop iterates over fields */
+ fieldno = 0;
for (;;)
{
- end_cursor = line_buf.cursor;
- if (line_buf.cursor >= line_buf.len)
- break;
- c = line_buf.data[line_buf.cursor++];
+ bool found_delim = false;
+ bool in_quote = false;
+ bool saw_quote = false;
+ char *start_ptr;
+ char *end_ptr;
+ int input_len;
+
+ /* Make sure space remains in fieldvals[] */
+ if (fieldno >= maxfields)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("extra data after last expected column")));
- /* unquoted field delimiter */
- if (!in_quote && c == delimc)
- {
- *result = NORMAL_ATTR;
- break;
- }
+ /* Remember start of field on both input and output sides */
+ start_ptr = cur_ptr;
+ fieldvals[fieldno] = output_ptr;
- /* start of quoted field (or part of field) */
- if (!in_quote && c == quotec)
+ /* Scan data for field */
+ for (;;)
{
- saw_quote = true;
- in_quote = true;
- continue;
- }
+ char c;
- /* escape within a quoted field */
- if (in_quote && c == escapec)
- {
- /*
- * peek at the next char if available, and escape it if it is
- * an escape char or a quote char
- */
- if (line_buf.cursor <= line_buf.len)
+ end_ptr = cur_ptr;
+ if (cur_ptr >= line_end_ptr)
+ break;
+ c = *cur_ptr++;
+ /* unquoted field delimiter */
+ if (c == delimc && !in_quote)
{
- char nextc = line_buf.data[line_buf.cursor];
-
- if (nextc == escapec || nextc == quotec)
+ found_delim = true;
+ break;
+ }
+ /* start of quoted field (or part of field) */
+ if (c == quotec && !in_quote)
+ {
+ saw_quote = true;
+ in_quote = true;
+ continue;
+ }
+ /* escape within a quoted field */
+ if (c == escapec && in_quote)
+ {
+ /*
+ * peek at the next char if available, and escape it if it is
+ * an escape char or a quote char
+ */
+ if (cur_ptr < line_end_ptr)
{
- appendStringInfoCharMacro(&attribute_buf, nextc);
- line_buf.cursor++;
- continue;
+ char nextc = *cur_ptr;
+
+ if (nextc == escapec || nextc == quotec)
+ {
+ *output_ptr++ = nextc;
+ cur_ptr++;
+ continue;
+ }
}
}
- }
+ /*
+ * end of quoted field. Must do this test after testing for escape
+ * in case quote char and escape char are the same (which is the
+ * common case).
+ */
+ if (c == quotec && in_quote)
+ {
+ in_quote = false;
+ continue;
+ }
- /*
- * end of quoted field. Must do this test after testing for escape
- * in case quote char and escape char are the same (which is the
- * common case).
- */
- if (in_quote && c == quotec)
- {
- in_quote = false;
- continue;
+ /* Add c to output string */
+ *output_ptr++ = c;
}
- appendStringInfoCharMacro(&attribute_buf, c);
- }
- if (in_quote)
- *result = UNTERMINATED_FIELD;
+ /* Terminate attribute value in output area */
+ *output_ptr++ = '\0';
- /* check whether raw input matched null marker */
- input_len = end_cursor - start_cursor;
- if (!saw_quote && input_len == strlen(null_print) &&
- strncmp(&line_buf.data[start_cursor], null_print, input_len) == 0)
- *isnull = true;
- else
- *isnull = false;
+ /* Shouldn't still be in quote mode */
+ if (in_quote)
+ ereport(ERROR,
+ (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+ errmsg("unterminated CSV quoted field")));
- return attribute_buf.data;
+ /* Check whether raw input matched null marker */
+ input_len = end_ptr - start_ptr;
+ if (!saw_quote && input_len == cstate->null_print_len &&
+ strncmp(start_ptr, cstate->null_print, input_len) == 0)
+ fieldvals[fieldno] = NULL;
+
+ fieldno++;
+ /* Done if we hit EOL instead of a delim */
+ if (!found_delim)
+ break;
+ }
+
+ /* Clean up state of attribute_buf */
+ output_ptr--;
+ Assert(*output_ptr == '\0');
+ cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
+
+ return fieldno;
}
+
/*
* Read a binary attribute
*/
static Datum
-CopyReadBinaryAttribute(int column_no, FmgrInfo *flinfo,
+CopyReadBinaryAttribute(CopyState cstate,
+ int column_no, FmgrInfo *flinfo,
Oid typioparam, int32 typmod,
bool *isnull)
{
int32 fld_size;
Datum result;
- fld_size = CopyGetInt32();
- if (CopyGetEof())
+ if (!CopyGetInt32(cstate, &fld_size))
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("unexpected EOF in COPY data")));
@@ -2581,29 +3056,29 @@ CopyReadBinaryAttribute(int column_no, FmgrInfo *flinfo,
errmsg("invalid field size")));
/* reset attribute_buf to empty, and load raw data in it */
- attribute_buf.len = 0;
- attribute_buf.data[0] = '\0';
- attribute_buf.cursor = 0;
+ cstate->attribute_buf.len = 0;
+ cstate->attribute_buf.data[0] = '\0';
+ cstate->attribute_buf.cursor = 0;
- enlargeStringInfo(&attribute_buf, fld_size);
+ enlargeStringInfo(&cstate->attribute_buf, fld_size);
- CopyGetData(attribute_buf.data, fld_size);
- if (CopyGetEof())
+ if (CopyGetData(cstate, cstate->attribute_buf.data,
+ fld_size, fld_size) != fld_size)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("unexpected EOF in COPY data")));
- attribute_buf.len = fld_size;
- attribute_buf.data[fld_size] = '\0';
+ cstate->attribute_buf.len = fld_size;
+ cstate->attribute_buf.data[fld_size] = '\0';
/* Call the column type's binary input converter */
result = FunctionCall3(flinfo,
- PointerGetDatum(&attribute_buf),
+ PointerGetDatum(&cstate->attribute_buf),
ObjectIdGetDatum(typioparam),
Int32GetDatum(typmod));
/* Trouble if it didn't eat the whole buffer */
- if (attribute_buf.cursor != attribute_buf.len)
+ if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
ereport(ERROR,
(errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
errmsg("incorrect binary data format")));
@@ -2616,17 +3091,14 @@ CopyReadBinaryAttribute(int column_no, FmgrInfo *flinfo,
* Send text representation of one attribute, with conversion and escaping
*/
static void
-CopyAttributeOut(char *server_string, char *delim)
+CopyAttributeOutText(CopyState cstate, char *server_string)
{
char *string;
char c;
- char delimc = delim[0];
- bool same_encoding;
+ char delimc = cstate->delim[0];
int mblen;
- int i;
- same_encoding = (server_encoding == client_encoding);
- if (!same_encoding)
+ if (cstate->need_transcoding)
string = (char *) pg_server_to_client((unsigned char *) server_string,
strlen(server_string));
else
@@ -2639,43 +3111,38 @@ CopyAttributeOut(char *server_string, char *delim)
switch (c)
{
case '\b':
- CopySendString("\\b");
+ CopySendString(cstate, "\\b");
break;
case '\f':
- CopySendString("\\f");
+ CopySendString(cstate, "\\f");
break;
case '\n':
- CopySendString("\\n");
+ CopySendString(cstate, "\\n");
break;
case '\r':
- CopySendString("\\r");
+ CopySendString(cstate, "\\r");
break;
case '\t':
- CopySendString("\\t");
+ CopySendString(cstate, "\\t");
break;
case '\v':
- CopySendString("\\v");
+ CopySendString(cstate, "\\v");
break;
case '\\':
- CopySendString("\\\\");
+ CopySendString(cstate, "\\\\");
break;
default:
if (c == delimc)
- CopySendChar('\\');
- CopySendChar(c);
+ CopySendChar(cstate, '\\');
/*
* We can skip pg_encoding_mblen() overhead when encoding
- * is same, because in valid backend encodings, extra
+ * is safe, because in valid backend encodings, extra
* bytes of a multibyte character never look like ASCII.
*/
- if (!same_encoding)
- {
- /* send additional bytes of the char, if any */
- mblen = pg_encoding_mblen(client_encoding, string);
- for (i = 1; i < mblen; i++)
- CopySendChar(string[i]);
- }
+ if (cstate->client_only_encoding)
+ mblen = pg_encoding_mblen(cstate->client_encoding, string);
+ CopySendData(cstate, string, mblen);
break;
}
}
@@ -2686,21 +3153,22 @@ CopyAttributeOut(char *server_string, char *delim)
* CSV type escaping
*/
static void
-CopyAttributeOutCSV(char *server_string, char *delim, char *quote,
- char *escape, bool use_quote)
+CopyAttributeOutCSV(CopyState cstate, char *server_string,
+ bool use_quote)
{
char *string;
char c;
- char delimc = delim[0];
- char quotec = quote[0];
- char escapec = escape[0];
- char *test_string;
- bool same_encoding;
+ char delimc = cstate->delim[0];
+ char quotec = cstate->quote[0];
+ char escapec = cstate->escape[0];
+ char *tstring;
int mblen;
- int i;
- same_encoding = (server_encoding == client_encoding);
- if (!same_encoding)
+ /* force quoting if it matches null_print */
+ if (!use_quote && strcmp(server_string, cstate->null_print) == 0)
+ use_quote = true;
+
+ if (cstate->need_transcoding)
string = (char *) pg_server_to_client((unsigned char *) server_string,
strlen(server_string));
else
@@ -2710,42 +3178,38 @@ CopyAttributeOutCSV(char *server_string, char *delim, char *quote,
* have to run through the string twice, first time to see if it needs
* quoting, second to actually send it
*/
-
- for (test_string = string;
- !use_quote && (c = *test_string) != '\0';
- test_string += mblen)
+ if (!use_quote)
{
- if (c == delimc || c == quotec || c == '\n' || c == '\r')
- use_quote = true;
- if (!same_encoding)
- mblen = pg_encoding_mblen(client_encoding, test_string);
- else
- mblen = 1;
+ for (tstring = string; (c = *tstring) != '\0'; tstring += mblen)
+ {
+ if (c == delimc || c == quotec || c == '\n' || c == '\r')
+ {
+ use_quote = true;
+ break;
+ }
+ if (cstate->client_only_encoding)
+ mblen = pg_encoding_mblen(cstate->client_encoding, tstring);
+ else
+ mblen = 1;
+ }
}
if (use_quote)
- CopySendChar(quotec);
+ CopySendChar(cstate, quotec);
for (; (c = *string) != '\0'; string += mblen)
{
if (use_quote && (c == quotec || c == escapec))
- CopySendChar(escapec);
-
- CopySendChar(c);
-
- if (!same_encoding)
- {
- /* send additional bytes of the char, if any */
- mblen = pg_encoding_mblen(client_encoding, string);
- for (i = 1; i < mblen; i++)
- CopySendChar(string[i]);
- }
+ CopySendChar(cstate, escapec);
+ if (cstate->client_only_encoding)
+ mblen = pg_encoding_mblen(cstate->client_encoding, string);
else
mblen = 1;
+ CopySendData(cstate, string, mblen);
}
if (use_quote)
- CopySendChar(quotec);
+ CopySendChar(cstate, quotec);
}
/*