aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordrh <drh@noemail.net>2017-06-26 18:42:23 +0000
committerdrh <drh@noemail.net>2017-06-26 18:42:23 +0000
commitd5fbde80a2ecb0ceaadb09804e1e9af2586de9f9 (patch)
treed2d3461e80f5ad74a521454b4ddb73a3560a820a /src
parenta22dd3860a3491d621fec0d0d507c7a0a60d9b6b (diff)
downloadsqlite-d5fbde80a2ecb0ceaadb09804e1e9af2586de9f9.tar.gz
sqlite-d5fbde80a2ecb0ceaadb09804e1e9af2586de9f9.zip
The ".import" command of the shell, and the csv virtual table extension both
ignore a single UTF-8 BOM at the beginning of their input. FossilOrigin-Name: 7c15d762d99c2e3e534cd35dfe25ddcd317637eb1f2655fd24c2dd5f9d5a7613
Diffstat (limited to 'src')
-rw-r--r--src/shell.c17
1 files changed, 17 insertions, 0 deletions
diff --git a/src/shell.c b/src/shell.c
index 490c922e6..ec255a673 100644
--- a/src/shell.c
+++ b/src/shell.c
@@ -3822,6 +3822,7 @@ struct ImportCtx {
int n; /* Number of bytes in z */
int nAlloc; /* Space allocated for z[] */
int nLine; /* Current line number */
+ int bNotFirst; /* True if one or more bytes already read */
int cTerm; /* Character that terminated the most recent field */
int cColSep; /* The column separator character. (Usually ",") */
int cRowSep; /* The row separator character. (Usually "\n") */
@@ -3901,6 +3902,21 @@ static char *SQLITE_CDECL csv_read_one_field(ImportCtx *p){
pc = c;
}
}else{
+ /* If this is the first field being parsed and it begins with the
+ ** UTF-8 BOM (0xEF BB BF) then skip the BOM */
+ if( (c&0xff)==0xef && p->bNotFirst==0 ){
+ import_append_char(p, c);
+ c = fgetc(p->in);
+ if( (c&0xff)==0xbb ){
+ import_append_char(p, c);
+ c = fgetc(p->in);
+ if( (c&0xff)==0xbf ){
+ p->bNotFirst = 1;
+ p->n = 0;
+ return csv_read_one_field(p);
+ }
+ }
+ }
while( c!=EOF && c!=cSep && c!=rSep ){
import_append_char(p, c);
c = fgetc(p->in);
@@ -3912,6 +3928,7 @@ static char *SQLITE_CDECL csv_read_one_field(ImportCtx *p){
p->cTerm = c;
}
if( p->z ) p->z[p->n] = 0;
+ p->bNotFirst = 1;
return p->z;
}