diff options
author | drh <drh@noemail.net> | 2017-06-26 18:42:23 +0000 |
---|---|---|
committer | drh <drh@noemail.net> | 2017-06-26 18:42:23 +0000 |
commit | d5fbde80a2ecb0ceaadb09804e1e9af2586de9f9 (patch) | |
tree | d2d3461e80f5ad74a521454b4ddb73a3560a820a /ext/misc | |
parent | a22dd3860a3491d621fec0d0d507c7a0a60d9b6b (diff) | |
download | sqlite-d5fbde80a2ecb0ceaadb09804e1e9af2586de9f9.tar.gz sqlite-d5fbde80a2ecb0ceaadb09804e1e9af2586de9f9.zip |
The ".import" command of the shell, and the csv virtual table extension both
ignore a single UTF-8 BOM at the beginning of their input.
FossilOrigin-Name: 7c15d762d99c2e3e534cd35dfe25ddcd317637eb1f2655fd24c2dd5f9d5a7613
Diffstat (limited to 'ext/misc')
-rw-r--r-- | ext/misc/csv.c | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/ext/misc/csv.c b/ext/misc/csv.c index f887784ab..bcafdc526 100644 --- a/ext/misc/csv.c +++ b/ext/misc/csv.c @@ -77,6 +77,7 @@ struct CsvReader { int n; /* Number of bytes in z */ int nAlloc; /* Space allocated for z[] */ int nLine; /* Current line number */ + int bNotFirst; /* True if prior text has been seen */ char cTerm; /* Character that terminated the most recent field */ size_t iIn; /* Next unread character in the input buffer */ size_t nIn; /* Number of characters in the input buffer */ @@ -91,6 +92,7 @@ static void csv_reader_init(CsvReader *p){ p->n = 0; p->nAlloc = 0; p->nLine = 0; + p->bNotFirst = 0; p->nIn = 0; p->zIn = 0; p->zErr[0] = 0; @@ -251,6 +253,21 @@ static char *csv_read_one_field(CsvReader *p){ pc = c; } }else{ + /* If this is the first field being parsed and it begins with the + ** UTF-8 BOM (0xEF BB BF) then skip the BOM */ + if( (c&0xff)==0xef && p->bNotFirst==0 ){ + csv_append(p, c); + c = csv_getc(p); + if( (c&0xff)==0xbb ){ + csv_append(p, c); + c = csv_getc(p); + if( (c&0xff)==0xbf ){ + p->bNotFirst = 1; + p->n = 0; + return csv_read_one_field(p); + } + } + } while( c>',' || (c!=EOF && c!=',' && c!='\n') ){ if( csv_append(p, (char)c) ) return 0; c = csv_getc(p); @@ -262,6 +279,7 @@ static char *csv_read_one_field(CsvReader *p){ p->cTerm = (char)c; } if( p->z ) p->z[p->n] = 0; + p->bNotFirst = 1; return p->z; } |