aboutsummaryrefslogtreecommitdiff
path: root/ext/misc/csv.c
diff options
context:
space:
mode:
authordrh <drh@noemail.net>2018-11-16 01:42:26 +0000
committerdrh <drh@noemail.net>2018-11-16 01:42:26 +0000
commit6f147c54ef48e1aae41fac6322cfeb7481eb1dea (patch)
tree7b59dce69b912eb1fc777d6f3d39e5f2ba736f33 /ext/misc/csv.c
parent23bfa80c198d25ceceb257769f5f9a4d0774e22e (diff)
downloadsqlite-6f147c54ef48e1aae41fac6322cfeb7481eb1dea.tar.gz
sqlite-6f147c54ef48e1aae41fac6322cfeb7481eb1dea.zip
Improvements to the CSV virtual table.
FossilOrigin-Name: 0406ecbbe75513e342040b71fdd342462222dbb3820486b5f745d7865805c00b
Diffstat (limited to 'ext/misc/csv.c')
-rw-r--r--ext/misc/csv.c131
1 files changed, 96 insertions, 35 deletions
diff --git a/ext/misc/csv.c b/ext/misc/csv.c
index ec90f96f2..8cca8aeb4 100644
--- a/ext/misc/csv.c
+++ b/ext/misc/csv.c
@@ -19,9 +19,9 @@
** CREATE VIRTUAL TABLE temp.csv USING csv(filename=FILENAME);
** SELECT * FROM csv;
**
-** The columns are named "c1", "c2", "c3", ... by default. But the
-** application can define its own CREATE TABLE statement as an additional
-** parameter. For example:
+** The columns are named "c1", "c2", "c3", ... by default. Or the
+** application can define its own CREATE TABLE statement using the
+** schema= parameter, like this:
**
** CREATE VIRTUAL TABLE temp.csv2 USING csv(
** filename = "../http.log",
@@ -32,9 +32,9 @@
** the data= parameter.
**
** If the columns=N parameter is supplied, then the CSV file is assumed to have
-** N columns. If the columns parameter is omitted, the CSV file is opened
-** as soon as the virtual table is constructed and the first row of the CSV
-** is read in order to count the tables.
+** N columns. If both the columns= and schema= parameters are omitted, then
+** the number and names of the columns is determined by the first line of
+** the CSV input.
**
** Some extra debugging features (used for testing virtual tables) are available
** if this module is compiled with -DSQLITE_TEST.
@@ -436,6 +436,34 @@ static int csv_boolean(const char *z){
return -1;
}
+/* Check to see if the string is of the form: "TAG = BOOLEAN" or just "TAG".
+** If it is, set *pValue to be the value of the boolean ("true" if there is
+** not "= BOOLEAN" component) and return non-zero. If the input string
+** does not begin with TAG, return zero.
+*/
+static int csv_boolean_parameter(
+ const char *zTag, /* Tag we are looking for */
+ int nTag, /* Size of the tag in bytes */
+ const char *z, /* Input parameter */
+ int *pValue /* Write boolean value here */
+){
+ int b;
+ z = csv_skip_whitespace(z);
+ if( strncmp(zTag, z, nTag)!=0 ) return 0;
+ z = csv_skip_whitespace(z + nTag);
+ if( z[0]==0 ){
+ *pValue = 1;
+ return 1;
+ }
+ if( z[0]!='=' ) return 0;
+ z = csv_skip_whitespace(z+1);
+ b = csv_boolean(z);
+ if( b>=0 ){
+ *pValue = b;
+ return 1;
+ }
+ return 0;
+}
/*
** Parameters:
@@ -469,6 +497,7 @@ static int csvtabConnect(
#ifdef SQLITE_TEST
int tstFlags = 0; /* Value for testflags=N parameter */
#endif
+ int b; /* Value of a boolean parameter */
int nCol = -99; /* Value of the columns= parameter */
CsvReader sRdr; /* A CSV file reader used to store an error
** message and/or to count the number of columns */
@@ -493,21 +522,12 @@ static int csvtabConnect(
if( j<sizeof(azParam)/sizeof(azParam[0]) ){
if( sRdr.zErr[0] ) goto csvtab_connect_error;
}else
- if( (zValue = csv_parameter("header",6,z))!=0 ){
- int x;
+ if( csv_boolean_parameter("header",6,z,&b) ){
if( bHeader>=0 ){
csv_errmsg(&sRdr, "more than one 'header' parameter");
goto csvtab_connect_error;
}
- x = csv_boolean(zValue);
- if( x==1 ){
- bHeader = 1;
- }else if( x==0 ){
- bHeader = 0;
- }else{
- csv_errmsg(&sRdr, "unrecognized argument to 'header': %s", zValue);
- goto csvtab_connect_error;
- }
+ bHeader = b;
}else
#ifdef SQLITE_TEST
if( (zValue = csv_parameter("testflags",9,z))!=0 ){
@@ -521,53 +541,94 @@ static int csvtabConnect(
}
nCol = atoi(zValue);
if( nCol<=0 ){
- csv_errmsg(&sRdr, "must have at least one column");
+ csv_errmsg(&sRdr, "column= value must be positive");
goto csvtab_connect_error;
}
}else
{
- csv_errmsg(&sRdr, "unrecognized parameter '%s'", z);
+ csv_errmsg(&sRdr, "bad parameter: '%s'", z);
goto csvtab_connect_error;
}
}
if( (CSV_FILENAME==0)==(CSV_DATA==0) ){
- csv_errmsg(&sRdr, "must either filename= or data= but not both");
+ csv_errmsg(&sRdr, "must specify either filename= or data= but not both");
goto csvtab_connect_error;
}
- if( nCol<=0 && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA) ){
+
+ if( (nCol<=0 || bHeader==1)
+ && csv_reader_open(&sRdr, CSV_FILENAME, CSV_DATA)
+ ){
goto csvtab_connect_error;
}
pNew = sqlite3_malloc( sizeof(*pNew) );
*ppVtab = (sqlite3_vtab*)pNew;
if( pNew==0 ) goto csvtab_connect_oom;
memset(pNew, 0, sizeof(*pNew));
- if( nCol>0 ){
+ if( CSV_SCHEMA==0 ){
+ sqlite3_str *pStr = sqlite3_str_new(0);
+ char *zSep = "";
+ int iCol = 0;
+ sqlite3_str_appendf(pStr, "CREATE TABLE x(");
+ if( nCol<0 && bHeader<1 ){
+ nCol = 0;
+ do{
+ csv_read_one_field(&sRdr);
+ nCol++;
+ }while( sRdr.cTerm==',' );
+ }
+ if( nCol>0 && bHeader<1 ){
+ for(iCol=0; iCol<nCol; iCol++){
+ sqlite3_str_appendf(pStr, "%sc%d TEXT", zSep, iCol);
+ zSep = ",";
+ }
+ }else{
+ do{
+ char *z = csv_read_one_field(&sRdr);
+ if( (nCol>0 && iCol<nCol) || (nCol<0 && bHeader) ){
+ sqlite3_str_appendf(pStr,"%s\"%w\" TEXT", zSep, z);
+ zSep = ",";
+ iCol++;
+ }
+ }while( sRdr.cTerm==',' );
+ if( nCol<0 ){
+ nCol = iCol;
+ }else{
+ while( iCol<nCol ){
+ sqlite3_str_appendf(pStr,"%sc%d TEXT", zSep, ++iCol);
+ zSep = ",";
+ }
+ }
+ }
pNew->nCol = nCol;
- }else{
+ sqlite3_str_appendf(pStr, ")");
+ CSV_SCHEMA = sqlite3_str_finish(pStr);
+ if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
+ }else if( nCol<0 ){
do{
csv_read_one_field(&sRdr);
pNew->nCol++;
}while( sRdr.cTerm==',' );
+ }else{
+ pNew->nCol = nCol;
}
pNew->zFilename = CSV_FILENAME; CSV_FILENAME = 0;
pNew->zData = CSV_DATA; CSV_DATA = 0;
#ifdef SQLITE_TEST
pNew->tstFlags = tstFlags;
#endif
- pNew->iStart = bHeader==1 ? ftell(sRdr.in) : 0;
- csv_reader_reset(&sRdr);
- if( CSV_SCHEMA==0 ){
- char *zSep = "";
- CSV_SCHEMA = sqlite3_mprintf("CREATE TABLE x(");
- if( CSV_SCHEMA==0 ) goto csvtab_connect_oom;
- for(i=0; i<pNew->nCol; i++){
- CSV_SCHEMA = sqlite3_mprintf("%z%sc%d TEXT",CSV_SCHEMA, zSep, i);
- zSep = ",";
- }
- CSV_SCHEMA = sqlite3_mprintf("%z);", CSV_SCHEMA);
+ if( bHeader!=1 ){
+ pNew->iStart = 0;
+ }else if( pNew->zData ){
+ pNew->iStart = (int)sRdr.iIn;
+ }else{
+ pNew->iStart = ftell(sRdr.in);
}
+ csv_reader_reset(&sRdr);
rc = sqlite3_declare_vtab(db, CSV_SCHEMA);
- if( rc ) goto csvtab_connect_error;
+ if( rc ){
+ csv_errmsg(&sRdr, "bad schema: '%s' - %s", CSV_SCHEMA, sqlite3_errmsg(db));
+ goto csvtab_connect_error;
+ }
for(i=0; i<sizeof(azPValue)/sizeof(azPValue[0]); i++){
sqlite3_free(azPValue[i]);
}