aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTeodor Sigaev <teodor@sigaev.ru>2015-12-25 13:05:13 +0300
committerTeodor Sigaev <teodor@sigaev.ru>2015-12-25 13:05:13 +0300
commit25bfa7efd037a3c44d6a2989d18f55758090e8a9 (patch)
tree6f670a31c83c3f77a9d3bf28dbcec31a58211cb9
parenta9246fbf665327870370d1088bfc9efdfd2719ec (diff)
downloadpostgresql-25bfa7efd037a3c44d6a2989d18f55758090e8a9.tar.gz
postgresql-25bfa7efd037a3c44d6a2989d18f55758090e8a9.zip
Improve the gin index scan performance in pg_trgm.
Previous coding assumes too pessimistic upper bound of similarity in consistent method of GIN. Author: Fornaroli Christophe with comments by me.
-rw-r--r--contrib/pg_trgm/trgm_gin.c28
1 files changed, 20 insertions, 8 deletions
diff --git a/contrib/pg_trgm/trgm_gin.c b/contrib/pg_trgm/trgm_gin.c
index 6a0731d44ea..baa4cc70a23 100644
--- a/contrib/pg_trgm/trgm_gin.c
+++ b/contrib/pg_trgm/trgm_gin.c
@@ -190,11 +190,23 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
if (check[i])
ntrue++;
}
-#ifdef DIVUNION
- res = (nkeys == ntrue) ? true : ((((((float4) ntrue) / ((float4) (nkeys - ntrue)))) >= trgm_limit) ? true : false);
-#else
+
+ /*--------------------
+ * If DIVUNION is defined then similarity formula is:
+ * c / (len1 + len2 - c)
+ * where c is number of common trigrams and it stands as ntrue in
+ * this code. Here we don't know value of len2 but we can assume
+ * that c (ntrue) is a lower bound of len2, so upper bound of
+ * similarity is:
+ * c / (len1 + c - c) => c / len1
+ * If DIVUNION is not defined then similarity formula is:
+ * c / max(len1, len2)
+ * And again, c (ntrue) is a lower bound of len2, but c <= len1
+ * just by definition and, consequently, upper bound of
+ * similarity is just c / len1.
+ * So, independly on DIVUNION the upper bound formula is the same.
+ */
res = (nkeys == 0) ? false : ((((((float4) ntrue) / ((float4) nkeys))) >= trgm_limit) ? true : false);
-#endif
break;
case ILikeStrategyNumber:
#ifndef IGNORECASE
@@ -267,11 +279,11 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS)
if (check[i] != GIN_FALSE)
ntrue++;
}
-#ifdef DIVUNION
- res = (nkeys == ntrue) ? GIN_MAYBE : (((((float4) ntrue) / ((float4) (nkeys - ntrue))) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE);
-#else
+
+ /*
+ * See comment in gin_trgm_consistent() about * upper bound formula
+ */
res = (nkeys == 0) ? GIN_FALSE : (((((float4) ntrue) / ((float4) nkeys)) >= trgm_limit) ? GIN_MAYBE : GIN_FALSE);
-#endif
break;
case ILikeStrategyNumber:
#ifndef IGNORECASE