aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2016-06-20 10:49:19 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2016-06-20 10:49:19 -0400
commit9c852566a3cf4ede40e22e4ca216d12cd4a27117 (patch)
tree9cae7919739b245f16d5bbda6cf2ac0e75fe59c0
parent47981a4665dfc9c3808366dff9971daedba32e98 (diff)
downloadpostgresql-9c852566a3cf4ede40e22e4ca216d12cd4a27117.tar.gz
postgresql-9c852566a3cf4ede40e22e4ca216d12cd4a27117.zip
Fix comparison of similarity to threshold in GIST trigram searches.
There was some very strange code here, dating to commit b525bf77, that purported to work around an ancient gcc bug by forcing a float4 comparison to be done as int instead. Commit 5871b8848 broke that when it changed one side of the comparison to "double" but left the comparison code alone. Commit f576b17cd doubled down on the weirdness by introducing a "volatile" marker, which had nothing to do with the actual problem. Guess that the gcc bug, even if it's still present in the wild, was triggered by comparison of float4's and can be avoided if we store the result of cnt_sml() into a double before comparing to the double "nlimit". This will at least work correctly on non-broken compilers, and it's way more readable. Per bug #14202 from Greg Navis. Add a regression test based on his example. Report: <20160620115321.5792.10766@wrigleys.postgresql.org>
-rw-r--r--contrib/pg_trgm/expected/pg_trgm.out53
-rw-r--r--contrib/pg_trgm/sql/pg_trgm.sql21
-rw-r--r--contrib/pg_trgm/trgm_gist.c11
3 files changed, 76 insertions, 9 deletions
diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out
index 78e8174f866..9bc55f1d0d3 100644
--- a/contrib/pg_trgm/expected/pg_trgm.out
+++ b/contrib/pg_trgm/expected/pg_trgm.out
@@ -3838,3 +3838,56 @@ select * from test2 where t ~ ' z foo';
z foo bar
(1 row)
+-- Check similarity threshold (bug #14202)
+CREATE TEMP TABLE restaurants (city text);
+INSERT INTO restaurants SELECT 'Warsaw' FROM generate_series(1, 10000);
+INSERT INTO restaurants SELECT 'Szczecin' FROM generate_series(1, 10000);
+CREATE INDEX ON restaurants USING gist(city gist_trgm_ops);
+-- Similarity of the two names (for reference).
+SELECT similarity('Szczecin', 'Warsaw');
+ similarity
+------------
+ 0
+(1 row)
+
+-- Should get only 'Warsaw' for either setting of set_limit.
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit()
+ FROM restaurants WHERE city % 'Warsaw';
+ QUERY PLAN
+-------------------------------------------------------------
+ Unique
+ -> Sort
+ Sort Key: city, (similarity(city, 'Warsaw'::text))
+ -> Bitmap Heap Scan on restaurants
+ Recheck Cond: (city % 'Warsaw'::text)
+ -> Bitmap Index Scan on restaurants_city_idx
+ Index Cond: (city % 'Warsaw'::text)
+(7 rows)
+
+SELECT set_limit(0.3);
+ set_limit
+-----------
+ 0.3
+(1 row)
+
+SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit()
+ FROM restaurants WHERE city % 'Warsaw';
+ city | similarity | show_limit
+--------+------------+------------
+ Warsaw | 1 | 0.3
+(1 row)
+
+SELECT set_limit(0.5);
+ set_limit
+-----------
+ 0.5
+(1 row)
+
+SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit()
+ FROM restaurants WHERE city % 'Warsaw';
+ city | similarity | show_limit
+--------+------------+------------
+ Warsaw | 1 | 0.5
+(1 row)
+
diff --git a/contrib/pg_trgm/sql/pg_trgm.sql b/contrib/pg_trgm/sql/pg_trgm.sql
index 6cefc8e643a..244946bb8db 100644
--- a/contrib/pg_trgm/sql/pg_trgm.sql
+++ b/contrib/pg_trgm/sql/pg_trgm.sql
@@ -113,3 +113,24 @@ select * from test2 where t ~ 'z foo bar';
select * from test2 where t ~ ' z foo bar';
select * from test2 where t ~ ' z foo bar';
select * from test2 where t ~ ' z foo';
+
+-- Check similarity threshold (bug #14202)
+
+CREATE TEMP TABLE restaurants (city text);
+INSERT INTO restaurants SELECT 'Warsaw' FROM generate_series(1, 10000);
+INSERT INTO restaurants SELECT 'Szczecin' FROM generate_series(1, 10000);
+CREATE INDEX ON restaurants USING gist(city gist_trgm_ops);
+
+-- Similarity of the two names (for reference).
+SELECT similarity('Szczecin', 'Warsaw');
+
+-- Should get only 'Warsaw' for either setting of set_limit.
+EXPLAIN (COSTS OFF)
+SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit()
+ FROM restaurants WHERE city % 'Warsaw';
+SELECT set_limit(0.3);
+SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit()
+ FROM restaurants WHERE city % 'Warsaw';
+SELECT set_limit(0.5);
+SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit()
+ FROM restaurants WHERE city % 'Warsaw';
diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c
index 3a5aff9edee..f52867df324 100644
--- a/contrib/pg_trgm/trgm_gist.c
+++ b/contrib/pg_trgm/trgm_gist.c
@@ -296,16 +296,9 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
if (GIST_LEAF(entry))
{ /* all leafs contains orig trgm */
+ double tmpsml = cnt_sml(qtrg, key, *recheck);
- /*
- * Prevent gcc optimizing the tmpsml variable using volatile
- * keyword. Otherwise comparison of nlimit and tmpsml may give
- * wrong results.
- */
- float4 volatile tmpsml = cnt_sml(qtrg, key, *recheck);
-
- /* strange bug at freebsd 5.2.1 and gcc 3.3.3 */
- res = (*(int *) &tmpsml == *(int *) &nlimit || tmpsml > nlimit);
+ res = (tmpsml >= nlimit);
}
else if (ISALLTRUE(key))
{ /* non-leaf contains signature */