aboutsummaryrefslogtreecommitdiff
path: root/ext/fts5/fts5_storage.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/fts5/fts5_storage.c')
-rw-r--r--ext/fts5/fts5_storage.c234
1 files changed, 173 insertions, 61 deletions
diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c
index cf25eb361..e8649c703 100644
--- a/ext/fts5/fts5_storage.c
+++ b/ext/fts5/fts5_storage.c
@@ -74,6 +74,30 @@ struct Fts5Storage {
#define FTS5_STMT_SCAN 11
/*
+** Return a pointer to a buffer obtained from sqlite3_malloc() that contains
+** nBind comma-separated question marks. e.g. if nBind is passed 5, this
+** function returns "?,?,?,?,?".
+**
+** If *pRc is not SQLITE_OK when this function is called, it is a no-op and
+** NULL is returned immediately. Or, if the attempt to malloc a buffer
+** fails, then *pRc is set to SQLITE_NOMEM and NULL is returned. Otherwise,
+** if it is SQLITE_OK when this function is called and the malloc() succeeds,
+** *pRc is left unchanged.
+*/
+static char *fts5BindingsList(int *pRc, int nBind){
+ char *zBind = sqlite3Fts5MallocZero(pRc, 1 + nBind*2);
+ if( zBind ){
+ int ii;
+ for(ii=0; ii<nBind; ii++){
+ zBind[ii*2] = '?';
+ zBind[ii*2 + 1] = ',';
+ }
+ zBind[ii*2-1] = '\0';
+ }
+ return zBind;
+}
+
+/*
** Prepare the two insert statements - Fts5Storage.pInsertContent and
** Fts5Storage.pInsertDocsize - if they have not already been prepared.
** Return SQLITE_OK if successful, or an SQLite error code if an error
@@ -141,19 +165,20 @@ static int fts5StorageGetStmt(
);
break;
- case FTS5_STMT_INSERT_CONTENT:
- case FTS5_STMT_REPLACE_CONTENT: {
- int nCol = pC->nCol + 1;
+ case FTS5_STMT_INSERT_CONTENT: {
+ int nCol = 0;
char *zBind;
int i;
- zBind = sqlite3_malloc64(1 + nCol*2);
- if( zBind ){
- for(i=0; i<nCol; i++){
- zBind[i*2] = '?';
- zBind[i*2 + 1] = ',';
+ nCol = 1 + pC->nCol;
+ if( pC->bLocale ){
+ for(i=0; i<pC->nCol; i++){
+ if( pC->abUnindexed[i]==0 ) nCol++;
}
- zBind[i*2-1] = '\0';
+ }
+
+ zBind = fts5BindingsList(&rc, nCol);
+ if( zBind ){
zSql = sqlite3_mprintf(azStmt[eStmt], pC->zDb, pC->zName, zBind);
sqlite3_free(zBind);
}
@@ -344,7 +369,7 @@ int sqlite3Fts5StorageOpen(
if( bCreate ){
if( pConfig->eContent==FTS5_CONTENT_NORMAL ){
int nDefn = 32 + pConfig->nCol*10;
- char *zDefn = sqlite3_malloc64(32 + (sqlite3_int64)pConfig->nCol * 10);
+ char *zDefn = sqlite3_malloc64(32 + (sqlite3_int64)pConfig->nCol * 20);
if( zDefn==0 ){
rc = SQLITE_NOMEM;
}else{
@@ -356,6 +381,14 @@ int sqlite3Fts5StorageOpen(
sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", c%d", i);
iOff += (int)strlen(&zDefn[iOff]);
}
+ if( pConfig->bLocale ){
+ for(i=0; i<pConfig->nCol; i++){
+ if( pConfig->abUnindexed[i]==0 ){
+ sqlite3_snprintf(nDefn-iOff, &zDefn[iOff], ", l%d", i);
+ iOff += (int)strlen(&zDefn[iOff]);
+ }
+ }
+ }
rc = sqlite3Fts5CreateTable(pConfig, "content", zDefn, 0, pzErr);
}
sqlite3_free(zDefn);
@@ -507,7 +540,8 @@ static int fts5StorageDeleteFromIndex(
sqlite3_value *pVal = 0;
const char *pText = 0;
int nText = 0;
- int bReset = 0;
+ const char *pLoc = 0;
+ int nLoc = 0;
assert( pSeek==0 || apVal==0 );
assert( pSeek!=0 || apVal!=0 );
@@ -517,10 +551,19 @@ static int fts5StorageDeleteFromIndex(
pVal = apVal[iCol-1];
}
- rc = sqlite3Fts5ExtractText(
- pConfig, pVal, pSeek!=0, &bReset, &pText, &nText
- );
+ if( pConfig->bLocale && sqlite3Fts5IsLocaleValue(pConfig, pVal) ){
+ rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc);
+ }else{
+ pText = (const char*)sqlite3_value_text(pVal);
+ nText = sqlite3_value_bytes(pVal);
+ if( pConfig->bLocale && pSeek ){
+ pLoc = (const char*)sqlite3_column_text(pSeek, iCol + pConfig->nCol);
+ nLoc = sqlite3_column_bytes(pSeek, iCol + pConfig->nCol);
+ }
+ }
+
if( rc==SQLITE_OK ){
+ sqlite3Fts5SetLocale(pConfig, pLoc, nLoc);
ctx.szCol = 0;
rc = sqlite3Fts5Tokenize(pConfig, FTS5_TOKENIZE_DOCUMENT,
pText, nText, (void*)&ctx, fts5StorageInsertCallback
@@ -529,7 +572,7 @@ static int fts5StorageDeleteFromIndex(
if( rc==SQLITE_OK && p->aTotalSize[iCol-1]<0 ){
rc = FTS5_CORRUPT;
}
- if( bReset ) sqlite3Fts5ClearLocale(pConfig);
+ sqlite3Fts5ClearLocale(pConfig);
}
}
}
@@ -788,20 +831,35 @@ int sqlite3Fts5StorageRebuild(Fts5Storage *p){
for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
ctx.szCol = 0;
if( pConfig->abUnindexed[ctx.iCol]==0 ){
- int bReset = 0; /* True if tokenizer locale must be reset */
int nText = 0; /* Size of pText in bytes */
const char *pText = 0; /* Pointer to buffer containing text value */
+ int nLoc = 0; /* Size of pLoc in bytes */
+ const char *pLoc = 0; /* Pointer to buffer containing text value */
+
sqlite3_value *pVal = sqlite3_column_value(pScan, ctx.iCol+1);
+ if( pConfig->eContent==FTS5_CONTENT_EXTERNAL
+ && sqlite3Fts5IsLocaleValue(pConfig, pVal)
+ ){
+ rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc);
+ }else{
+ pText = (const char*)sqlite3_value_text(pVal);
+ nText = sqlite3_value_bytes(pVal);
+ if( pConfig->bLocale ){
+ int iCol = ctx.iCol + 1 + pConfig->nCol;
+ pLoc = (const char*)sqlite3_column_text(pScan, iCol);
+ nLoc = sqlite3_column_bytes(pScan, iCol);
+ }
+ }
- rc = sqlite3Fts5ExtractText(pConfig, pVal, 1, &bReset, &pText, &nText);
if( rc==SQLITE_OK ){
+ sqlite3Fts5SetLocale(pConfig, pLoc, nLoc);
rc = sqlite3Fts5Tokenize(pConfig,
FTS5_TOKENIZE_DOCUMENT,
pText, nText,
(void*)&ctx,
fts5StorageInsertCallback
);
- if( bReset ) sqlite3Fts5ClearLocale(pConfig);
+ sqlite3Fts5ClearLocale(pConfig);
}
}
sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
@@ -884,29 +942,45 @@ int sqlite3Fts5StorageContentInsert(
}else{
sqlite3_stmt *pInsert = 0; /* Statement to write %_content table */
int i; /* Counter variable */
+ int nIndexed = 0; /* Number indexed columns seen */
rc = fts5StorageGetStmt(p, FTS5_STMT_INSERT_CONTENT, &pInsert, 0);
- for(i=1; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){
+ if( pInsert ) sqlite3_clear_bindings(pInsert);
+
+ /* Bind the rowid value */
+ sqlite3_bind_value(pInsert, 1, apVal[1]);
+
+ /* Loop through values for user-defined columns. i=2 is the leftmost
+ ** user-defined column. As is column 1 of pSavedRow. */
+ for(i=2; rc==SQLITE_OK && i<=pConfig->nCol+1; i++){
+ int bUnindexed = pConfig->abUnindexed[i-2];
sqlite3_value *pVal = apVal[i];
+
+ nIndexed += !bUnindexed;
if( sqlite3_value_nochange(pVal) && p->pSavedRow ){
/* This is an UPDATE statement, and column (i-2) was not modified.
** Retrieve the value from Fts5Storage.pSavedRow instead. */
pVal = sqlite3_column_value(p->pSavedRow, i-1);
- }else if( sqlite3_value_subtype(pVal)==FTS5_LOCALE_SUBTYPE ){
+ if( pConfig->bLocale && bUnindexed==0 ){
+ sqlite3_bind_value(pInsert, pConfig->nCol + 1 + nIndexed,
+ sqlite3_column_value(p->pSavedRow, pConfig->nCol + i - 1)
+ );
+ }
+ }else if( sqlite3Fts5IsLocaleValue(pConfig, pVal) ){
+ const char *pText = 0;
+ const char *pLoc = 0;
+ int nText = 0;
+ int nLoc = 0;
assert( pConfig->bLocale );
- assert( i>1 );
- if( pConfig->abUnindexed[i-2] ){
- /* At attempt to insert an fts5_locale() value into an UNINDEXED
- ** column. Strip the locale away and just bind the text. */
- const char *pText = 0;
- int nText = 0;
- rc = sqlite3Fts5ExtractText(pConfig, pVal, 0, 0, &pText, &nText);
+
+ rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc);
+ if( rc==SQLITE_OK ){
sqlite3_bind_text(pInsert, i, pText, nText, SQLITE_TRANSIENT);
- }else{
- const u8 *pBlob = (const u8*)sqlite3_value_blob(pVal);
- int nBlob = sqlite3_value_bytes(pVal);
- assert( nBlob>4 );
- sqlite3_bind_blob(pInsert, i, pBlob+4, nBlob-4, SQLITE_TRANSIENT);
+ if( bUnindexed==0 ){
+ int iLoc = pConfig->nCol + 1 + nIndexed;
+ sqlite3_bind_text(pInsert, iLoc, pLoc, nLoc, SQLITE_TRANSIENT);
+ }
}
+
continue;
}
@@ -945,23 +1019,37 @@ int sqlite3Fts5StorageIndexInsert(
for(ctx.iCol=0; rc==SQLITE_OK && ctx.iCol<pConfig->nCol; ctx.iCol++){
ctx.szCol = 0;
if( pConfig->abUnindexed[ctx.iCol]==0 ){
- int bReset = 0; /* True if tokenizer locale must be reset */
int nText = 0; /* Size of pText in bytes */
const char *pText = 0; /* Pointer to buffer containing text value */
+ int nLoc = 0; /* Size of pText in bytes */
+ const char *pLoc = 0; /* Pointer to buffer containing text value */
+
sqlite3_value *pVal = apVal[ctx.iCol+2];
- int bDisk = 0;
if( p->pSavedRow && sqlite3_value_nochange(pVal) ){
pVal = sqlite3_column_value(p->pSavedRow, ctx.iCol+1);
- bDisk = 1;
+ if( pConfig->eContent==FTS5_CONTENT_NORMAL && pConfig->bLocale ){
+ int iCol = ctx.iCol + 1 + pConfig->nCol;
+ pLoc = (const char*)sqlite3_column_text(p->pSavedRow, iCol);
+ nLoc = sqlite3_column_bytes(p->pSavedRow, iCol);
+ }
+ }else{
+ pVal = apVal[ctx.iCol+2];
+ }
+
+ if( pConfig->bLocale && sqlite3Fts5IsLocaleValue(pConfig, pVal) ){
+ rc = sqlite3Fts5DecodeLocaleValue(pVal, &pText, &nText, &pLoc, &nLoc);
+ }else{
+ pText = (const char*)sqlite3_value_text(pVal);
+ nText = sqlite3_value_bytes(pVal);
}
- rc = sqlite3Fts5ExtractText(pConfig, pVal, bDisk, &bReset, &pText,&nText);
+
if( rc==SQLITE_OK ){
- assert( bReset==0 || pConfig->bLocale );
+ sqlite3Fts5SetLocale(pConfig, pLoc, nLoc);
rc = sqlite3Fts5Tokenize(pConfig,
FTS5_TOKENIZE_DOCUMENT, pText, nText, (void*)&ctx,
fts5StorageInsertCallback
);
- if( bReset ) sqlite3Fts5ClearLocale(pConfig);
+ sqlite3Fts5ClearLocale(pConfig);
}
}
sqlite3Fts5BufferAppendVarint(&rc, &buf, ctx.szCol);
@@ -1126,37 +1214,61 @@ int sqlite3Fts5StorageIntegrity(Fts5Storage *p, int iArg){
rc = sqlite3Fts5TermsetNew(&ctx.pTermset);
}
for(i=0; rc==SQLITE_OK && i<pConfig->nCol; i++){
- if( pConfig->abUnindexed[i] ) continue;
- ctx.iCol = i;
- ctx.szCol = 0;
- if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
- rc = sqlite3Fts5TermsetNew(&ctx.pTermset);
- }
- if( rc==SQLITE_OK ){
- int bReset = 0; /* True if tokenizer locale must be reset */
- int nText = 0; /* Size of pText in bytes */
- const char *pText = 0; /* Pointer to buffer containing text value */
-
- rc = sqlite3Fts5ExtractText(pConfig,
- sqlite3_column_value(pScan, i+1), 1, &bReset, &pText, &nText
- );
+ if( pConfig->abUnindexed[i]==0 ){
+ const char *pText = 0;
+ int nText = 0;
+ const char *pLoc = 0;
+ int nLoc = 0;
+ sqlite3_value *pVal = sqlite3_column_value(pScan, i+1);
+
+ if( pConfig->eContent==FTS5_CONTENT_EXTERNAL
+ && sqlite3Fts5IsLocaleValue(pConfig, pVal)
+ ){
+ rc = sqlite3Fts5DecodeLocaleValue(
+ pVal, &pText, &nText, &pLoc, &nLoc
+ );
+ }else{
+ if( pConfig->eContent==FTS5_CONTENT_NORMAL && pConfig->bLocale ){
+ int iCol = i + 1 + pConfig->nCol;
+ pLoc = (const char*)sqlite3_column_text(pScan, iCol);
+ nLoc = sqlite3_column_bytes(pScan, iCol);
+ }
+ pText = (const char*)sqlite3_value_text(pVal);
+ nText = sqlite3_value_bytes(pVal);
+ }
+
+ ctx.iCol = i;
+ ctx.szCol = 0;
+
+ if( rc==SQLITE_OK && pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
+ rc = sqlite3Fts5TermsetNew(&ctx.pTermset);
+ }
+
if( rc==SQLITE_OK ){
+ sqlite3Fts5SetLocale(pConfig, pLoc, nLoc);
rc = sqlite3Fts5Tokenize(pConfig,
FTS5_TOKENIZE_DOCUMENT,
pText, nText,
(void*)&ctx,
fts5StorageIntegrityCallback
);
- if( bReset ) sqlite3Fts5ClearLocale(pConfig);
+ sqlite3Fts5ClearLocale(pConfig);
+ }
+
+ /* If this is not a columnsize=0 database, check that the number
+ ** of tokens in the value matches the aColSize[] value read from
+ ** the %_docsize table. */
+ if( rc==SQLITE_OK
+ && pConfig->bColumnsize
+ && ctx.szCol!=aColSize[i]
+ ){
+ rc = FTS5_CORRUPT;
+ }
+ aTotalSize[i] += ctx.szCol;
+ if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
+ sqlite3Fts5TermsetFree(ctx.pTermset);
+ ctx.pTermset = 0;
}
- }
- if( rc==SQLITE_OK && pConfig->bColumnsize && ctx.szCol!=aColSize[i] ){
- rc = FTS5_CORRUPT;
- }
- aTotalSize[i] += ctx.szCol;
- if( pConfig->eDetail==FTS5_DETAIL_COLUMNS ){
- sqlite3Fts5TermsetFree(ctx.pTermset);
- ctx.pTermset = 0;
}
}
sqlite3Fts5TermsetFree(ctx.pTermset);