aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2020-04-01 10:32:33 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2020-04-01 10:32:33 -0400
commita80818605e5447b9b846590c3d3fab99060cb53e (patch)
tree87b0877f0893bcc5b6a11455d6edc4a7f7644e01
parentd8653f468789a75627c2fc82e73e2755ad8d1fb4 (diff)
downloadpostgresql-a80818605e5447b9b846590c3d3fab99060cb53e.tar.gz
postgresql-a80818605e5447b9b846590c3d3fab99060cb53e.zip
Improve selectivity estimation for assorted match-style operators.
Quite a few matching operators such as JSONB's @> used "contsel" and "contjoinsel" as their selectivity estimators. That was a bad idea, because (a) contsel is only a stub, yielding a fixed default estimate, and (b) that default is 0.001, meaning we estimate these operators as five times more selective than equality, which is surely pretty silly. There's a good model for improving this in ltree's ltreeparentsel(): for any "var OP constant" query, we can try applying the operator to all of the column's MCV and histogram values, taking the latter as being a random sample of the non-MCV values. That code is actually 100% generic, except for the question of exactly what default selectivity ought to be plugged in when we don't have stats. Hence, migrate the guts of ltreeparentsel() into the core code, provide wrappers "matchingsel" and "matchingjoinsel" with a more-appropriate default estimate, and use those for the non-geometric operators that formerly used contsel (mostly JSONB containment operators and tsquery matching). Also apply this code to some match-like operators in hstore, ltree, and pg_trgm, including the former users of ltreeparentsel as well as ones that improperly used contsel. Since commit 911e70207 just created new versions of those extensions that we haven't released yet, we can sneak this change into those new versions instead of having to create an additional generation of update scripts. Patch by me, reviewed by Alexey Bashtanov Discussion: https://postgr.es/m/12237.1582833074@sss.pgh.pa.us
-rw-r--r--contrib/hstore/hstore--1.6--1.7.sql15
-rw-r--r--contrib/ltree/ltree--1.1--1.2.sql81
-rw-r--r--contrib/ltree/ltree_op.c102
-rw-r--r--contrib/pg_trgm/expected/pg_trgm.out19
-rw-r--r--contrib/pg_trgm/pg_trgm--1.4--1.5.sql11
-rw-r--r--doc/src/sgml/xoper.sgml13
-rw-r--r--src/backend/utils/adt/selfuncs.c160
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/catalog/pg_operator.dat35
-rw-r--r--src/include/catalog/pg_proc.dat9
-rw-r--r--src/include/utils/selfuncs.h6
11 files changed, 332 insertions, 121 deletions
diff --git a/contrib/hstore/hstore--1.6--1.7.sql b/contrib/hstore/hstore--1.6--1.7.sql
index 0d126ef8a9c..3e5cb67c933 100644
--- a/contrib/hstore/hstore--1.6--1.7.sql
+++ b/contrib/hstore/hstore--1.6--1.7.sql
@@ -10,3 +10,18 @@ LANGUAGE C IMMUTABLE PARALLEL SAFE;
ALTER OPERATOR FAMILY gist_hstore_ops USING gist
ADD FUNCTION 10 (hstore) ghstore_options (internal);
+
+ALTER OPERATOR ? (hstore, text)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ?| (hstore, text[])
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ?& (hstore, text[])
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR @> (hstore, hstore)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR <@ (hstore, hstore)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR @ (hstore, hstore)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ~ (hstore, hstore)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
diff --git a/contrib/ltree/ltree--1.1--1.2.sql b/contrib/ltree/ltree--1.1--1.2.sql
index 7b4ea998679..186381e61d8 100644
--- a/contrib/ltree/ltree--1.1--1.2.sql
+++ b/contrib/ltree/ltree--1.1--1.2.sql
@@ -19,3 +19,84 @@ ADD FUNCTION 10 (ltree) ltree_gist_options (internal);
ALTER OPERATOR FAMILY gist__ltree_ops USING gist
ADD FUNCTION 10 (_ltree) _ltree_gist_options (internal);
+ALTER OPERATOR < (ltree, ltree)
+ SET (RESTRICT = scalarltsel, JOIN = scalarltjoinsel);
+ALTER OPERATOR <= (ltree, ltree)
+ SET (RESTRICT = scalarlesel, JOIN = scalarlejoinsel);
+ALTER OPERATOR >= (ltree, ltree)
+ SET (RESTRICT = scalargesel, JOIN = scalargejoinsel);
+ALTER OPERATOR > (ltree, ltree)
+ SET (RESTRICT = scalargtsel, JOIN = scalargtjoinsel);
+
+ALTER OPERATOR @> (ltree, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^@> (ltree, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR <@ (ltree, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^<@ (ltree, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ~ (ltree, lquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ~ (lquery, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^~ (ltree, lquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^~ (lquery, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ? (ltree, _lquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ? (_lquery, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^? (ltree, _lquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^? (_lquery, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR @ (ltree, ltxtquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR @ (ltxtquery, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^@ (ltree, ltxtquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^@ (ltxtquery, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR @> (_ltree, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR <@ (ltree, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR <@ (_ltree, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR @> (ltree, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ~ (_ltree, lquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ~ (lquery, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ? (_ltree, _lquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ? (_lquery, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR @ (_ltree, ltxtquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR @ (ltxtquery, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^@> (_ltree, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^<@ (ltree, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^<@ (_ltree, ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^@> (ltree, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^~ (_ltree, lquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^~ (lquery, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^? (_ltree, _lquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^? (_lquery, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^@ (_ltree, ltxtquery)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR ^@ (ltxtquery, _ltree)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
diff --git a/contrib/ltree/ltree_op.c b/contrib/ltree/ltree_op.c
index 34e6e4b2aaa..4ac2ed5e54d 100644
--- a/contrib/ltree/ltree_op.c
+++ b/contrib/ltree/ltree_op.c
@@ -566,10 +566,11 @@ ltree2text(PG_FUNCTION_ARGS)
}
-#define DEFAULT_PARENT_SEL 0.001
-
/*
* ltreeparentsel - Selectivity of parent relationship for ltree data types.
+ *
+ * This function is not used anymore, if the ltree extension has been
+ * updated to 1.2 or later.
*/
Datum
ltreeparentsel(PG_FUNCTION_ARGS)
@@ -578,101 +579,12 @@ ltreeparentsel(PG_FUNCTION_ARGS)
Oid operator = PG_GETARG_OID(1);
List *args = (List *) PG_GETARG_POINTER(2);
int varRelid = PG_GETARG_INT32(3);
- VariableStatData vardata;
- Node *other;
- bool varonleft;
double selec;
- /*
- * If expression is not variable <@ something or something <@ variable,
- * then punt and return a default estimate.
- */
- if (!get_restriction_variable(root, args, varRelid,
- &vardata, &other, &varonleft))
- PG_RETURN_FLOAT8(DEFAULT_PARENT_SEL);
-
- /*
- * If the something is a NULL constant, assume operator is strict and
- * return zero, ie, operator will never return TRUE.
- */
- if (IsA(other, Const) &&
- ((Const *) other)->constisnull)
- {
- ReleaseVariableStats(vardata);
- PG_RETURN_FLOAT8(0.0);
- }
-
- if (IsA(other, Const))
- {
- /* Variable is being compared to a known non-null constant */
- Datum constval = ((Const *) other)->constvalue;
- FmgrInfo contproc;
- double mcvsum;
- double mcvsel;
- double nullfrac;
- int hist_size;
-
- fmgr_info(get_opcode(operator), &contproc);
-
- /*
- * Is the constant "<@" to any of the column's most common values?
- */
- mcvsel = mcv_selectivity(&vardata, &contproc, constval, varonleft,
- &mcvsum);
-
- /*
- * If the histogram is large enough, see what fraction of it the
- * constant is "<@" to, and assume that's representative of the
- * non-MCV population. Otherwise use the default selectivity for the
- * non-MCV population.
- */
- selec = histogram_selectivity(&vardata, &contproc,
- constval, varonleft,
- 10, 1, &hist_size);
- if (selec < 0)
- {
- /* Nope, fall back on default */
- selec = DEFAULT_PARENT_SEL;
- }
- else if (hist_size < 100)
- {
- /*
- * For histogram sizes from 10 to 100, we combine the histogram
- * and default selectivities, putting increasingly more trust in
- * the histogram for larger sizes.
- */
- double hist_weight = hist_size / 100.0;
-
- selec = selec * hist_weight +
- DEFAULT_PARENT_SEL * (1.0 - hist_weight);
- }
-
- /* In any case, don't believe extremely small or large estimates. */
- if (selec < 0.0001)
- selec = 0.0001;
- else if (selec > 0.9999)
- selec = 0.9999;
-
- if (HeapTupleIsValid(vardata.statsTuple))
- nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
- else
- nullfrac = 0.0;
-
- /*
- * Now merge the results from the MCV and histogram calculations,
- * realizing that the histogram covers only the non-null values that
- * are not listed in MCV.
- */
- selec *= 1.0 - nullfrac - mcvsum;
- selec += mcvsel;
- }
- else
- selec = DEFAULT_PARENT_SEL;
-
- ReleaseVariableStats(vardata);
-
- /* result should be in range, but make sure... */
- CLAMP_PROBABILITY(selec);
+ /* Use generic restriction selectivity logic, with default 0.001. */
+ selec = generic_restriction_selectivity(root, operator,
+ args, varRelid,
+ 0.001);
PG_RETURN_FLOAT8((float8) selec);
}
diff --git a/contrib/pg_trgm/expected/pg_trgm.out b/contrib/pg_trgm/expected/pg_trgm.out
index 5746be0dc41..923c326c7bd 100644
--- a/contrib/pg_trgm/expected/pg_trgm.out
+++ b/contrib/pg_trgm/expected/pg_trgm.out
@@ -5170,16 +5170,15 @@ SELECT similarity('Szczecin', 'Warsaw');
EXPLAIN (COSTS OFF)
SELECT DISTINCT city, similarity(city, 'Warsaw'), show_limit()
FROM restaurants WHERE city % 'Warsaw';
- QUERY PLAN
--------------------------------------------------------------
- Unique
- -> Sort
- Sort Key: city, (similarity(city, 'Warsaw'::text))
- -> Bitmap Heap Scan on restaurants
- Recheck Cond: (city % 'Warsaw'::text)
- -> Bitmap Index Scan on restaurants_city_idx
- Index Cond: (city % 'Warsaw'::text)
-(7 rows)
+ QUERY PLAN
+-------------------------------------------------------------------
+ HashAggregate
+ Group Key: city, similarity(city, 'Warsaw'::text), show_limit()
+ -> Bitmap Heap Scan on restaurants
+ Recheck Cond: (city % 'Warsaw'::text)
+ -> Bitmap Index Scan on restaurants_city_idx
+ Index Cond: (city % 'Warsaw'::text)
+(6 rows)
SELECT set_limit(0.3);
set_limit
diff --git a/contrib/pg_trgm/pg_trgm--1.4--1.5.sql b/contrib/pg_trgm/pg_trgm--1.4--1.5.sql
index 3804c3bc692..284f88d3252 100644
--- a/contrib/pg_trgm/pg_trgm--1.4--1.5.sql
+++ b/contrib/pg_trgm/pg_trgm--1.4--1.5.sql
@@ -10,3 +10,14 @@ LANGUAGE C IMMUTABLE PARALLEL SAFE;
ALTER OPERATOR FAMILY gist_trgm_ops USING gist
ADD FUNCTION 10 (text) gtrgm_options (internal);
+
+ALTER OPERATOR % (text, text)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR <% (text, text)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR %> (text, text)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR <<% (text, text)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
+ALTER OPERATOR %>> (text, text)
+ SET (RESTRICT = matchingsel, JOIN = matchingjoinsel);
diff --git a/doc/src/sgml/xoper.sgml b/doc/src/sgml/xoper.sgml
index 132056f869a..56b08491c96 100644
--- a/doc/src/sgml/xoper.sgml
+++ b/doc/src/sgml/xoper.sgml
@@ -283,6 +283,18 @@ column OP constant
</para>
<para>
+ Another useful built-in selectivity estimation function
+ is <function>matchingsel</function>, which will work for almost any
+ binary operator, if standard MCV and/or histogram statistics are
+ collected for the input data type(s). Its default estimate is set to
+ twice the default estimate used in <function>eqsel</function>, making
+ it most suitable for comparison operators that are somewhat less
+ strict than equality. (Or you could call the
+ underlying <function>generic_restriction_selectivity</function>
+ function, providing a different default estimate.)
+ </para>
+
+ <para>
There are additional selectivity estimation functions designed for geometric
operators in <filename>src/backend/utils/adt/geo_selfuncs.c</filename>: <function>areasel</function>, <function>positionsel</function>,
and <function>contsel</function>. At this writing these are just stubs, but you might want
@@ -319,6 +331,7 @@ table1.column1 OP table2.column2
<member><function>scalarlejoinsel</function> for <literal>&lt;=</literal></member>
<member><function>scalargtjoinsel</function> for <literal>&gt;</literal></member>
<member><function>scalargejoinsel</function> for <literal>&gt;=</literal></member>
+ <member><function>matchingjoinsel</function> for generic matching operators</member>
<member><function>areajoinsel</function> for 2D area-based comparisons</member>
<member><function>positionjoinsel</function> for 2D position-based comparisons</member>
<member><function>contjoinsel</function> for 2D containment-based comparisons</member>
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index e62b69d6f26..4fdcb07d97b 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -830,6 +830,132 @@ histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
}
/*
+ * generic_restriction_selectivity - Selectivity for almost anything
+ *
+ * This function estimates selectivity for operators that we don't have any
+ * special knowledge about, but are on data types that we collect standard
+ * MCV and/or histogram statistics for. (Additional assumptions are that
+ * the operator is strict and immutable, or at least stable.)
+ *
+ * If we have "VAR OP CONST" or "CONST OP VAR", selectivity is estimated by
+ * applying the operator to each element of the column's MCV and/or histogram
+ * stats, and merging the results using the assumption that the histogram is
+ * a reasonable random sample of the column's non-MCV population. Note that
+ * if the operator's semantics are related to the histogram ordering, this
+ * might not be such a great assumption; other functions such as
+ * scalarineqsel() are probably a better match in such cases.
+ *
+ * Otherwise, fall back to the default selectivity provided by the caller.
+ */
+double
+generic_restriction_selectivity(PlannerInfo *root, Oid operator,
+ List *args, int varRelid,
+ double default_selectivity)
+{
+ double selec;
+ VariableStatData vardata;
+ Node *other;
+ bool varonleft;
+
+ /*
+ * If expression is not variable OP something or something OP variable,
+ * then punt and return the default estimate.
+ */
+ if (!get_restriction_variable(root, args, varRelid,
+ &vardata, &other, &varonleft))
+ return default_selectivity;
+
+ /*
+ * If the something is a NULL constant, assume operator is strict and
+ * return zero, ie, operator will never return TRUE.
+ */
+ if (IsA(other, Const) &&
+ ((Const *) other)->constisnull)
+ {
+ ReleaseVariableStats(vardata);
+ return 0.0;
+ }
+
+ if (IsA(other, Const))
+ {
+ /* Variable is being compared to a known non-null constant */
+ Datum constval = ((Const *) other)->constvalue;
+ FmgrInfo opproc;
+ double mcvsum;
+ double mcvsel;
+ double nullfrac;
+ int hist_size;
+
+ fmgr_info(get_opcode(operator), &opproc);
+
+ /*
+ * Calculate the selectivity for the column's most common values.
+ */
+ mcvsel = mcv_selectivity(&vardata, &opproc, constval, varonleft,
+ &mcvsum);
+
+ /*
+ * If the histogram is large enough, see what fraction of it matches
+ * the query, and assume that's representative of the non-MCV
+ * population. Otherwise use the default selectivity for the non-MCV
+ * population.
+ */
+ selec = histogram_selectivity(&vardata, &opproc,
+ constval, varonleft,
+ 10, 1, &hist_size);
+ if (selec < 0)
+ {
+ /* Nope, fall back on default */
+ selec = default_selectivity;
+ }
+ else if (hist_size < 100)
+ {
+ /*
+ * For histogram sizes from 10 to 100, we combine the histogram
+ * and default selectivities, putting increasingly more trust in
+ * the histogram for larger sizes.
+ */
+ double hist_weight = hist_size / 100.0;
+
+ selec = selec * hist_weight +
+ default_selectivity * (1.0 - hist_weight);
+ }
+
+ /* In any case, don't believe extremely small or large estimates. */
+ if (selec < 0.0001)
+ selec = 0.0001;
+ else if (selec > 0.9999)
+ selec = 0.9999;
+
+ /* Don't forget to account for nulls. */
+ if (HeapTupleIsValid(vardata.statsTuple))
+ nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
+ else
+ nullfrac = 0.0;
+
+ /*
+ * Now merge the results from the MCV and histogram calculations,
+ * realizing that the histogram covers only the non-null values that
+ * are not listed in MCV.
+ */
+ selec *= 1.0 - nullfrac - mcvsum;
+ selec += mcvsel;
+ }
+ else
+ {
+ /* Comparison value is not constant, so we can't do anything */
+ selec = default_selectivity;
+ }
+
+ ReleaseVariableStats(vardata);
+
+ /* result should be in range, but make sure... */
+ CLAMP_PROBABILITY(selec);
+
+ return selec;
+}
+
+/*
* ineq_histogram_selectivity - Examine the histogram for scalarineqsel
*
* Determine the fraction of the variable's histogram population that
@@ -2917,6 +3043,40 @@ fail:
/*
+ * matchingsel -- generic matching-operator selectivity support
+ *
+ * Use these for any operators that (a) are on data types for which we collect
+ * standard statistics, and (b) have behavior for which the default estimate
+ * (twice DEFAULT_EQ_SEL) is sane. Typically that is good for match-like
+ * operators.
+ */
+
+Datum
+matchingsel(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+ int varRelid = PG_GETARG_INT32(3);
+ double selec;
+
+ /* Use generic restriction selectivity logic. */
+ selec = generic_restriction_selectivity(root, operator,
+ args, varRelid,
+ DEFAULT_MATCHING_SEL);
+
+ PG_RETURN_FLOAT8((float8) selec);
+}
+
+Datum
+matchingjoinsel(PG_FUNCTION_ARGS)
+{
+ /* Just punt, for the moment. */
+ PG_RETURN_FLOAT8(DEFAULT_MATCHING_SEL);
+}
+
+
+/*
* Helper routine for estimate_num_groups: add an item to a list of
* GroupVarInfos, but only if it's not known equal to any of the existing
* entries.
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index eaca0570fdd..bb07aebdbed 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202003301
+#define CATALOG_VERSION_NO 202004011
#endif
diff --git a/src/include/catalog/pg_operator.dat b/src/include/catalog/pg_operator.dat
index 7c135da3b1f..65c7fedf237 100644
--- a/src/include/catalog/pg_operator.dat
+++ b/src/include/catalog/pg_operator.dat
@@ -3016,18 +3016,21 @@
{ oid => '3693', descr => 'contains',
oprname => '@>', oprleft => 'tsquery', oprright => 'tsquery',
oprresult => 'bool', oprcom => '<@(tsquery,tsquery)',
- oprcode => 'tsq_mcontains', oprrest => 'contsel', oprjoin => 'contjoinsel' },
+ oprcode => 'tsq_mcontains', oprrest => 'matchingsel',
+ oprjoin => 'matchingjoinsel' },
{ oid => '3694', descr => 'is contained by',
oprname => '<@', oprleft => 'tsquery', oprright => 'tsquery',
oprresult => 'bool', oprcom => '@>(tsquery,tsquery)',
- oprcode => 'tsq_mcontained', oprrest => 'contsel', oprjoin => 'contjoinsel' },
+ oprcode => 'tsq_mcontained', oprrest => 'matchingsel',
+ oprjoin => 'matchingjoinsel' },
{ oid => '3762', descr => 'text search match',
oprname => '@@', oprleft => 'text', oprright => 'text', oprresult => 'bool',
- oprcode => 'ts_match_tt', oprrest => 'contsel', oprjoin => 'contjoinsel' },
+ oprcode => 'ts_match_tt', oprrest => 'matchingsel',
+ oprjoin => 'matchingjoinsel' },
{ oid => '3763', descr => 'text search match',
oprname => '@@', oprleft => 'text', oprright => 'tsquery',
- oprresult => 'bool', oprcode => 'ts_match_tq', oprrest => 'contsel',
- oprjoin => 'contjoinsel' },
+ oprresult => 'bool', oprcode => 'ts_match_tq', oprrest => 'matchingsel',
+ oprjoin => 'matchingjoinsel' },
# generic record comparison operators
{ oid => '2988', oid_symbol => 'RECORD_EQ_OP', descr => 'equal',
@@ -3178,7 +3181,8 @@
{ oid => '3897', descr => 'is adjacent to',
oprname => '-|-', oprleft => 'anyrange', oprright => 'anyrange',
oprresult => 'bool', oprcom => '-|-(anyrange,anyrange)',
- oprcode => 'range_adjacent', oprrest => 'contsel', oprjoin => 'contjoinsel' },
+ oprcode => 'range_adjacent', oprrest => 'matchingsel',
+ oprjoin => 'matchingjoinsel' },
{ oid => '3898', descr => 'range union',
oprname => '+', oprleft => 'anyrange', oprright => 'anyrange',
oprresult => 'anyrange', oprcom => '+(anyrange,anyrange)',
@@ -3258,22 +3262,23 @@
{ oid => '3246', descr => 'contains',
oprname => '@>', oprleft => 'jsonb', oprright => 'jsonb', oprresult => 'bool',
oprcom => '<@(jsonb,jsonb)', oprcode => 'jsonb_contains',
- oprrest => 'contsel', oprjoin => 'contjoinsel' },
+ oprrest => 'matchingsel', oprjoin => 'matchingjoinsel' },
{ oid => '3247', descr => 'key exists',
oprname => '?', oprleft => 'jsonb', oprright => 'text', oprresult => 'bool',
- oprcode => 'jsonb_exists', oprrest => 'contsel', oprjoin => 'contjoinsel' },
+ oprcode => 'jsonb_exists', oprrest => 'matchingsel',
+ oprjoin => 'matchingjoinsel' },
{ oid => '3248', descr => 'any key exists',
oprname => '?|', oprleft => 'jsonb', oprright => '_text', oprresult => 'bool',
- oprcode => 'jsonb_exists_any', oprrest => 'contsel',
- oprjoin => 'contjoinsel' },
+ oprcode => 'jsonb_exists_any', oprrest => 'matchingsel',
+ oprjoin => 'matchingjoinsel' },
{ oid => '3249', descr => 'all keys exist',
oprname => '?&', oprleft => 'jsonb', oprright => '_text', oprresult => 'bool',
- oprcode => 'jsonb_exists_all', oprrest => 'contsel',
- oprjoin => 'contjoinsel' },
+ oprcode => 'jsonb_exists_all', oprrest => 'matchingsel',
+ oprjoin => 'matchingjoinsel' },
{ oid => '3250', descr => 'is contained by',
oprname => '<@', oprleft => 'jsonb', oprright => 'jsonb', oprresult => 'bool',
oprcom => '@>(jsonb,jsonb)', oprcode => 'jsonb_contained',
- oprrest => 'contsel', oprjoin => 'contjoinsel' },
+ oprrest => 'matchingsel', oprjoin => 'matchingjoinsel' },
{ oid => '3284', descr => 'concatenate',
oprname => '||', oprleft => 'jsonb', oprright => 'jsonb',
oprresult => 'jsonb', oprcode => 'jsonb_concat' },
@@ -3292,10 +3297,10 @@
{ oid => '4012', descr => 'jsonpath exists',
oprname => '@?', oprleft => 'jsonb', oprright => 'jsonpath',
oprresult => 'bool', oprcode => 'jsonb_path_exists_opr(jsonb,jsonpath)',
- oprrest => 'contsel', oprjoin => 'contjoinsel' },
+ oprrest => 'matchingsel', oprjoin => 'matchingjoinsel' },
{ oid => '4013', descr => 'jsonpath match',
oprname => '@@', oprleft => 'jsonb', oprright => 'jsonpath',
oprresult => 'bool', oprcode => 'jsonb_path_match_opr(jsonb,jsonpath)',
- oprrest => 'contsel', oprjoin => 'contjoinsel' },
+ oprrest => 'matchingsel', oprjoin => 'matchingjoinsel' },
]
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index a6a708cca92..fe3df4436d7 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -10628,6 +10628,15 @@
prosrc => 'shift_jis_2004_to_euc_jis_2004',
probin => '$libdir/euc2004_sjis2004' },
+{ oid => '8387',
+ descr => 'restriction selectivity for generic matching operators',
+ proname => 'matchingsel', provolatile => 's', prorettype => 'float8',
+ proargtypes => 'internal oid internal int4', prosrc => 'matchingsel' },
+{ oid => '8388', descr => 'join selectivity for generic matching operators',
+ proname => 'matchingjoinsel', provolatile => 's', prorettype => 'float8',
+ proargtypes => 'internal oid internal int2 internal',
+ prosrc => 'matchingjoinsel' },
+
# replication/origin.h
{ oid => '6003', descr => 'create a replication origin',
proname => 'pg_replication_origin_create', provolatile => 'v',
diff --git a/src/include/utils/selfuncs.h b/src/include/utils/selfuncs.h
index 1c9570f1951..1dd3ac12f8b 100644
--- a/src/include/utils/selfuncs.h
+++ b/src/include/utils/selfuncs.h
@@ -42,6 +42,9 @@
/* default selectivity estimate for pattern-match operators such as LIKE */
#define DEFAULT_MATCH_SEL 0.005
+/* default selectivity estimate for other matching operators */
+#define DEFAULT_MATCHING_SEL 0.010
+
/* default number of distinct values in a table */
#define DEFAULT_NUM_DISTINCT 200
@@ -148,6 +151,9 @@ extern double histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
Datum constval, bool varonleft,
int min_hist_size, int n_skip,
int *hist_size);
+extern double generic_restriction_selectivity(PlannerInfo *root, Oid operator,
+ List *args, int varRelid,
+ double default_selectivity);
extern double ineq_histogram_selectivity(PlannerInfo *root,
VariableStatData *vardata,
FmgrInfo *opproc, bool isgt, bool iseq,