aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordan <dan@noemail.net>2015-04-20 18:48:57 +0000
committerdan <dan@noemail.net>2015-04-20 18:48:57 +0000
commit80d3ea080a151c54f83c2e11e77fc6255c5692fd (patch)
treed61ce2f78602fdc9eee96573f8bd10b1bbc4844f
parent687c5124e07351ce9a78dbb6632f043f5ee06732 (diff)
downloadsqlite-80d3ea080a151c54f83c2e11e77fc6255c5692fd.tar.gz
sqlite-80d3ea080a151c54f83c2e11e77fc6255c5692fd.zip
Fix some fts5 problems with very large position lists.
FossilOrigin-Name: 2ea8f9cbe67dac60c1a0a661c95a03ecfa9a0b9a
-rw-r--r--ext/fts5/fts5_index.c28
-rw-r--r--ext/fts5/test/fts5bigpl.test58
-rw-r--r--manifest13
-rw-r--r--manifest.uuid2
4 files changed, 88 insertions, 13 deletions
diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c
index d407411a2..7d48d074d 100644
--- a/ext/fts5/fts5_index.c
+++ b/ext/fts5/fts5_index.c
@@ -1901,13 +1901,20 @@ static void fts5SegIterNext(
** the doclist.
*/
static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){
+ Fts5DlidxIter *pDlidx = pIter->pDlidx;
Fts5Data *pLast = 0;
int pgnoLast = 0;
- if( pIter->pDlidx ){
- int iSegid = pIter->pSeg->iSegid;
- pgnoLast = pIter->pDlidx->iLeafPgno;
- pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pgnoLast));
+ if( pDlidx ){
+ /* If the doclist-iterator is already at EOF, then the current doclist
+ ** contains no entries except those on the current page. */
+ if( fts5DlidxIterEof(p, pDlidx)==0 ){
+ int iSegid = pIter->pSeg->iSegid;
+ pgnoLast = pDlidx->iLeafPgno;
+ pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iIdx, iSegid, 0, pgnoLast));
+ }else{
+ pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel);
+ }
}else{
int iOff; /* Byte offset within pLeaf */
Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */
@@ -1915,7 +1922,7 @@ static void fts5SegIterReverse(Fts5Index *p, int iIdx, Fts5SegIter *pIter){
/* Currently, Fts5SegIter.iLeafOffset (and iOff) points to the first
** byte of position-list content for the current rowid. Back it up
** so that it points to the start of the position-list size field. */
- pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2 + pIter->bDel);
+ pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel);
iOff = pIter->iLeafOffset;
assert( iOff>=4 );
@@ -3285,6 +3292,9 @@ static void fts5TrimSegments(Fts5Index *p, Fts5MultiSegIter *pIter){
if( pSeg->pSeg==0 ){
/* no-op */
}else if( pSeg->pLeaf==0 ){
+ /* All keys from this input segment have been transfered to the output.
+ ** Set both the first and last page-numbers to 0 to indicate that the
+ ** segment is now empty. */
pSeg->pSeg->pgnoLast = 0;
pSeg->pSeg->pgnoFirst = 0;
}else{
@@ -4092,7 +4102,13 @@ static void fts5IndexIntegrityCheckSegment(
}
}
- if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
+ /* Either iter.iLeaf must be the rightmost leaf-page in the segment, or
+ ** else the segment has been completely emptied by an ongoing merge
+ ** operation. */
+ if( p->rc==SQLITE_OK
+ && iter.iLeaf!=pSeg->pgnoLast
+ && (pSeg->pgnoFirst || pSeg->pgnoLast)
+ ){
p->rc = FTS5_CORRUPT;
}
diff --git a/ext/fts5/test/fts5bigpl.test b/ext/fts5/test/fts5bigpl.test
new file mode 100644
index 000000000..172c0396b
--- /dev/null
+++ b/ext/fts5/test/fts5bigpl.test
@@ -0,0 +1,58 @@
+# 2015 April 21
+#
+# The author disclaims copyright to this source code. In place of
+# a legal notice, here is a blessing:
+#
+# May you do good and not evil.
+# May you find forgiveness for yourself and forgive others.
+# May you share freely, never taking more than you give.
+#
+#***********************************************************************
+#
+# This test is focused on really large position lists. Those that require
+# 4 or 5 byte position-list size varints. Because of the amount of memory
+# required, these tests only run on 64-bit platforms.
+#
+
+source [file join [file dirname [info script]] fts5_common.tcl]
+set testprefix fts5bigpl
+
+if { $tcl_platform(wordSize)<8 } {
+ finish_test
+ return
+}
+
+do_execsql_test 1.0 { CREATE VIRTUAL TABLE t1 USING fts5(x) }
+
+do_test 1.1 {
+ foreach t {a b c d e f g h i j} {
+ set doc [string repeat "$t " 1200000]
+ execsql { INSERT INTO t1 VALUES($doc) }
+ }
+ execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
+} {}
+
+do_test 1.2 {
+ execsql { DELETE FROM t1 }
+ foreach t {"a b" "b a" "c d" "d c"} {
+ set doc [string repeat "$t " 600000]
+ execsql { INSERT INTO t1 VALUES($doc) }
+ }
+ execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
+} {}
+
+
+# 5-byte varint. This test takes 30 seconds or so on a 2014 workstation.
+# The generated database is roughly 635MiB.
+#
+do_test 2.1...slow {
+ execsql { DELETE FROM t1 }
+ foreach t {a} {
+ set doc [string repeat "$t " 150000000]
+ execsql { INSERT INTO t1 VALUES($doc) }
+ }
+ execsql { INSERT INTO t1(t1) VALUES('integrity-check') }
+} {}
+
+finish_test
+
diff --git a/manifest b/manifest
index 994b10599..cb1f4c510 100644
--- a/manifest
+++ b/manifest
@@ -1,5 +1,5 @@
-C Logically\sstore\supdates\sas\s(insert+delete)\swithin\sthe\sFTS\stree.\sThis\sallows\skeys\sto\sbe\sannihilated\smore\squickly\sunder\ssome\scircumstances.
-D 2015-04-15T18:49:20.008
+C Fix\ssome\sfts5\sproblems\swith\svery\slarge\sposition\slists.
+D 2015-04-20T18:48:57.780
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 88a3e6261286db378fdffa1124cad11b3c05f5bb
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 3ba56cc6824c9f7b1e0695159e0a9c636f6b4a23
F ext/fts5/fts5_config.c 0847facc8914f57ea4452c43ce109200dc65e894
F ext/fts5/fts5_expr.c 5215137efab527577d36bdf9e44bfc2ec3e1be98
F ext/fts5/fts5_hash.c 3cb5a3d04dd2030eb0ac8d544711dfd37c0e6529
-F ext/fts5/fts5_index.c 28f1bfadf3eb4f860c8b978f4d8d6ea0cf7c724d
+F ext/fts5/fts5_index.c f840e35cceafcd0597688467010a4d12feea9c76
F ext/fts5/fts5_storage.c ac0f0937059c8d4f38a1f13aa5f2c2cd7edf3e0d
F ext/fts5/fts5_tcl.c 617b6bb96545be8d9045de6967c688cd9cd15541
F ext/fts5/fts5_tokenize.c c07f2c2f749282c1dbbf46bde1f6d7095c740b8b
@@ -133,6 +133,7 @@ F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8
F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592
F ext/fts5/test/fts5al.test 6a5717faaf7f1e0e866360022d284903f3a4eede
F ext/fts5/test/fts5auxdata.test c69b86092bf1a157172de5f9169731af3403179b
+F ext/fts5/test/fts5bigpl.test b1cfd00561350ab04994ba7dd9d48468e5e0ec3b
F ext/fts5/test/fts5content.test 8dc302fccdff834d946497e9d862750ea87d4517
F ext/fts5/test/fts5corrupt.test dbdcfe75749ed2f2eb3915cf68fd55d3dc3b058d
F ext/fts5/test/fts5ea.test 04695560a444fcc00c3c4f27783bdcfbf71f030c
@@ -1292,7 +1293,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
-P b29109a083e5cd442cdd19f29d7be45b09c4f661
-R 3f3d372bbefa9c63dd6c3e57000a1fa8
+P 50fae1f0006c0e946b5214e73eedf2687a0016f9
+R e3e266d7af429931ea61b2ad868bf28e
U dan
-Z ff9a464604d7627b634f349ee851852e
+Z 88f214a9049d68201f885f825375d535
diff --git a/manifest.uuid b/manifest.uuid
index ab0ef8daa..6b9e7e2cd 100644
--- a/manifest.uuid
+++ b/manifest.uuid
@@ -1 +1 @@
-50fae1f0006c0e946b5214e73eedf2687a0016f9 \ No newline at end of file
+2ea8f9cbe67dac60c1a0a661c95a03ecfa9a0b9a \ No newline at end of file