diff options
Diffstat (limited to 'src/include/commands/copyfrom_internal.h')
-rw-r--r-- | src/include/commands/copyfrom_internal.h | 62 |
1 files changed, 35 insertions, 27 deletions
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h index 705f5b615be..858af7a717b 100644 --- a/src/include/commands/copyfrom_internal.h +++ b/src/include/commands/copyfrom_internal.h @@ -52,17 +52,6 @@ typedef enum CopyInsertMethod /* * This struct contains all the state variables used throughout a COPY FROM * operation. - * - * Multi-byte encodings: all supported client-side encodings encode multi-byte - * characters by having the first byte's high bit set. Subsequent bytes of the - * character can have the high bit not set. When scanning data in such an - * encoding to look for a match to a single-byte (ie ASCII) character, we must - * use the full pg_encoding_mblen() machinery to skip over multibyte - * characters, else we might find a false match to a trailing byte. In - * supported server encodings, there is no possibility of a false match, and - * it's faster to make useless comparisons to trailing bytes than it is to - * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true - * when we have to do it the hard way. */ typedef struct CopyFromStateData { @@ -70,13 +59,11 @@ typedef struct CopyFromStateData CopySource copy_src; /* type of copy source */ FILE *copy_file; /* used if copy_src == COPY_FILE */ StringInfo fe_msgbuf; /* used if copy_src == COPY_NEW_FE */ - bool reached_eof; /* true if we read to end of copy data (not - * all copy_src types maintain this) */ EolType eol_type; /* EOL type of input */ int file_encoding; /* file or remote side's character encoding */ bool need_transcoding; /* file encoding diff from server? */ - bool encoding_embeds_ascii; /* ASCII can be non-first byte? */ + Oid conversion_proc; /* encoding conversion function */ /* parameters from the COPY command */ Relation rel; /* relation to copy from */ @@ -131,31 +118,52 @@ typedef struct CopyFromStateData /* * Similarly, line_buf holds the whole input line being processed. The - * input cycle is first to read the whole line into line_buf, convert it - * to server encoding there, and then extract the individual attribute - * fields into attribute_buf. line_buf is preserved unmodified so that we - * can display it in error messages if appropriate. (In binary mode, - * line_buf is not used.) + * input cycle is first to read the whole line into line_buf, and then + * extract the individual attribute fields into attribute_buf. line_buf + * is preserved unmodified so that we can display it in error messages if + * appropriate. (In binary mode, line_buf is not used.) */ StringInfoData line_buf; - bool line_buf_converted; /* converted to server encoding? */ bool line_buf_valid; /* contains the row being processed? */ /* - * Finally, raw_buf holds raw data read from the data source (file or - * client connection). In text mode, CopyReadLine parses this data - * sufficiently to locate line boundaries, then transfers the data to - * line_buf and converts it. In binary mode, CopyReadBinaryData fetches - * appropriate amounts of data from this buffer. In both modes, we - * guarantee that there is a \0 at raw_buf[raw_buf_len]. + * input_buf holds input data, already converted to database encoding. + * + * In text mode, CopyReadLine parses this data sufficiently to locate + * line boundaries, then transfers the data to line_buf. We guarantee + * that there is a \0 at input_buf[input_buf_len] at all times. (In + * binary mode, input_buf is not used.) + * + * If encoding conversion is not required, input_buf is not a separate + * buffer but points directly to raw_buf. In that case, input_buf_len + * tracks the number of bytes that have been verified as valid in the + * database encoding, and raw_buf_len is the total number of bytes + * stored in the buffer. + */ +#define INPUT_BUF_SIZE 65536 /* we palloc INPUT_BUF_SIZE+1 bytes */ + char *input_buf; + int input_buf_index; /* next byte to process */ + int input_buf_len; /* total # of bytes stored */ + bool input_reached_eof; /* true if we reached EOF */ + bool input_reached_error; /* true if a conversion error happened */ + /* Shorthand for number of unconsumed bytes available in input_buf */ +#define INPUT_BUF_BYTES(cstate) ((cstate)->input_buf_len - (cstate)->input_buf_index) + + /* + * raw_buf holds raw input data read from the data source (file or client + * connection), not yet converted to the database encoding. Like with + * 'input_buf', we guarantee that there is a \0 at raw_buf[raw_buf_len]. */ #define RAW_BUF_SIZE 65536 /* we palloc RAW_BUF_SIZE+1 bytes */ char *raw_buf; int raw_buf_index; /* next byte to process */ int raw_buf_len; /* total # of bytes stored */ - uint64 bytes_processed;/* number of bytes processed so far */ + bool raw_reached_eof; /* true if we reached EOF */ + /* Shorthand for number of unconsumed bytes available in raw_buf */ #define RAW_BUF_BYTES(cstate) ((cstate)->raw_buf_len - (cstate)->raw_buf_index) + + uint64 bytes_processed; /* number of bytes processed so far */ } CopyFromStateData; extern void ReceiveCopyBegin(CopyFromState cstate); |