diff options
author | drh <drh@noemail.net> | 2017-06-26 18:42:23 +0000 |
---|---|---|
committer | drh <drh@noemail.net> | 2017-06-26 18:42:23 +0000 |
commit | d5fbde80a2ecb0ceaadb09804e1e9af2586de9f9 (patch) | |
tree | d2d3461e80f5ad74a521454b4ddb73a3560a820a /src | |
parent | a22dd3860a3491d621fec0d0d507c7a0a60d9b6b (diff) | |
download | sqlite-d5fbde80a2ecb0ceaadb09804e1e9af2586de9f9.tar.gz sqlite-d5fbde80a2ecb0ceaadb09804e1e9af2586de9f9.zip |
The ".import" command of the shell, and the csv virtual table extension both
ignore a single UTF-8 BOM at the beginning of their input.
FossilOrigin-Name: 7c15d762d99c2e3e534cd35dfe25ddcd317637eb1f2655fd24c2dd5f9d5a7613
Diffstat (limited to 'src')
-rw-r--r-- | src/shell.c | 17 |
1 files changed, 17 insertions, 0 deletions
diff --git a/src/shell.c b/src/shell.c index 490c922e6..ec255a673 100644 --- a/src/shell.c +++ b/src/shell.c @@ -3822,6 +3822,7 @@ struct ImportCtx { int n; /* Number of bytes in z */ int nAlloc; /* Space allocated for z[] */ int nLine; /* Current line number */ + int bNotFirst; /* True if one or more bytes already read */ int cTerm; /* Character that terminated the most recent field */ int cColSep; /* The column separator character. (Usually ",") */ int cRowSep; /* The row separator character. (Usually "\n") */ @@ -3901,6 +3902,21 @@ static char *SQLITE_CDECL csv_read_one_field(ImportCtx *p){ pc = c; } }else{ + /* If this is the first field being parsed and it begins with the + ** UTF-8 BOM (0xEF BB BF) then skip the BOM */ + if( (c&0xff)==0xef && p->bNotFirst==0 ){ + import_append_char(p, c); + c = fgetc(p->in); + if( (c&0xff)==0xbb ){ + import_append_char(p, c); + c = fgetc(p->in); + if( (c&0xff)==0xbf ){ + p->bNotFirst = 1; + p->n = 0; + return csv_read_one_field(p); + } + } + } while( c!=EOF && c!=cSep && c!=rSep ){ import_append_char(p, c); c = fgetc(p->in); @@ -3912,6 +3928,7 @@ static char *SQLITE_CDECL csv_read_one_field(ImportCtx *p){ p->cTerm = c; } if( p->z ) p->z[p->n] = 0; + p->bNotFirst = 1; return p->z; } |