aboutsummaryrefslogtreecommitdiff
path: root/src/include/commands/copyfrom_internal.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/commands/copyfrom_internal.h')
-rw-r--r--src/include/commands/copyfrom_internal.h62
1 files changed, 35 insertions, 27 deletions
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index 705f5b615be..858af7a717b 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -52,17 +52,6 @@ typedef enum CopyInsertMethod
/*
* This struct contains all the state variables used throughout a COPY FROM
* operation.
- *
- * Multi-byte encodings: all supported client-side encodings encode multi-byte
- * characters by having the first byte's high bit set. Subsequent bytes of the
- * character can have the high bit not set. When scanning data in such an
- * encoding to look for a match to a single-byte (ie ASCII) character, we must
- * use the full pg_encoding_mblen() machinery to skip over multibyte
- * characters, else we might find a false match to a trailing byte. In
- * supported server encodings, there is no possibility of a false match, and
- * it's faster to make useless comparisons to trailing bytes than it is to
- * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true
- * when we have to do it the hard way.
*/
typedef struct CopyFromStateData
{
@@ -70,13 +59,11 @@ typedef struct CopyFromStateData
CopySource copy_src; /* type of copy source */
FILE *copy_file; /* used if copy_src == COPY_FILE */
StringInfo fe_msgbuf; /* used if copy_src == COPY_NEW_FE */
- bool reached_eof; /* true if we read to end of copy data (not
- * all copy_src types maintain this) */
EolType eol_type; /* EOL type of input */
int file_encoding; /* file or remote side's character encoding */
bool need_transcoding; /* file encoding diff from server? */
- bool encoding_embeds_ascii; /* ASCII can be non-first byte? */
+ Oid conversion_proc; /* encoding conversion function */
/* parameters from the COPY command */
Relation rel; /* relation to copy from */
@@ -131,31 +118,52 @@ typedef struct CopyFromStateData
/*
* Similarly, line_buf holds the whole input line being processed. The
- * input cycle is first to read the whole line into line_buf, convert it
- * to server encoding there, and then extract the individual attribute
- * fields into attribute_buf. line_buf is preserved unmodified so that we
- * can display it in error messages if appropriate. (In binary mode,
- * line_buf is not used.)
+ * input cycle is first to read the whole line into line_buf, and then
+ * extract the individual attribute fields into attribute_buf. line_buf
+ * is preserved unmodified so that we can display it in error messages if
+ * appropriate. (In binary mode, line_buf is not used.)
*/
StringInfoData line_buf;
- bool line_buf_converted; /* converted to server encoding? */
bool line_buf_valid; /* contains the row being processed? */
/*
- * Finally, raw_buf holds raw data read from the data source (file or
- * client connection). In text mode, CopyReadLine parses this data
- * sufficiently to locate line boundaries, then transfers the data to
- * line_buf and converts it. In binary mode, CopyReadBinaryData fetches
- * appropriate amounts of data from this buffer. In both modes, we
- * guarantee that there is a \0 at raw_buf[raw_buf_len].
+ * input_buf holds input data, already converted to database encoding.
+ *
+ * In text mode, CopyReadLine parses this data sufficiently to locate
+ * line boundaries, then transfers the data to line_buf. We guarantee
+ * that there is a \0 at input_buf[input_buf_len] at all times. (In
+ * binary mode, input_buf is not used.)
+ *
+ * If encoding conversion is not required, input_buf is not a separate
+ * buffer but points directly to raw_buf. In that case, input_buf_len
+ * tracks the number of bytes that have been verified as valid in the
+ * database encoding, and raw_buf_len is the total number of bytes
+ * stored in the buffer.
+ */
+#define INPUT_BUF_SIZE 65536 /* we palloc INPUT_BUF_SIZE+1 bytes */
+ char *input_buf;
+ int input_buf_index; /* next byte to process */
+ int input_buf_len; /* total # of bytes stored */
+ bool input_reached_eof; /* true if we reached EOF */
+ bool input_reached_error; /* true if a conversion error happened */
+ /* Shorthand for number of unconsumed bytes available in input_buf */
+#define INPUT_BUF_BYTES(cstate) ((cstate)->input_buf_len - (cstate)->input_buf_index)
+
+ /*
+ * raw_buf holds raw input data read from the data source (file or client
+ * connection), not yet converted to the database encoding. Like with
+ * 'input_buf', we guarantee that there is a \0 at raw_buf[raw_buf_len].
*/
#define RAW_BUF_SIZE 65536 /* we palloc RAW_BUF_SIZE+1 bytes */
char *raw_buf;
int raw_buf_index; /* next byte to process */
int raw_buf_len; /* total # of bytes stored */
- uint64 bytes_processed;/* number of bytes processed so far */
+ bool raw_reached_eof; /* true if we reached EOF */
+
/* Shorthand for number of unconsumed bytes available in raw_buf */
#define RAW_BUF_BYTES(cstate) ((cstate)->raw_buf_len - (cstate)->raw_buf_index)
+
+ uint64 bytes_processed; /* number of bytes processed so far */
} CopyFromStateData;
extern void ReceiveCopyBegin(CopyFromState cstate);