aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/optimizer/path/clausesel.c245
-rw-r--r--src/backend/utils/adt/selfuncs.c174
2 files changed, 345 insertions, 74 deletions
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index d3a494f9bc9..edce3d21291 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.28 2000/01/23 02:06:58 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.29 2000/01/24 07:16:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -23,6 +23,23 @@
#include "utils/lsyscache.h"
+/*
+ * Data structure for accumulating info about possible range-query
+ * clause pairs in clauselist_selectivity.
+ */
+typedef struct RangeQueryClause {
+ struct RangeQueryClause *next; /* next in linked list */
+ Node *var; /* The common variable of the clauses */
+ bool have_lobound; /* found a low-bound clause yet? */
+ bool have_hibound; /* found a high-bound clause yet? */
+ Selectivity lobound; /* Selectivity of a var > something clause */
+ Selectivity hibound; /* Selectivity of a var < something clause */
+} RangeQueryClause;
+
+static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
+ int flag, bool isLTsel, Selectivity s2);
+
+
/****************************************************************************
* ROUTINES TO COMPUTE SELECTIVITIES
****************************************************************************/
@@ -55,30 +72,238 @@ restrictlist_selectivity(Query *root,
* must be returned.
*
* See clause_selectivity() for the meaning of the varRelid parameter.
+ *
+ * Our basic approach is to take the product of the selectivities of the
+ * subclauses. However, that's only right if the subclauses have independent
+ * probabilities, and in reality they are often NOT independent. So,
+ * we want to be smarter where we can.
+
+ * Currently, the only extra smarts we have is to recognize "range queries",
+ * such as "x > 34 AND x < 42". Clauses are recognized as possible range
+ * query components if they are restriction opclauses whose operators have
+ * scalarltsel() or scalargtsel() as their restriction selectivity estimator.
+ * We pair up clauses of this form that refer to the same variable. An
+ * unpairable clause of this kind is simply multiplied into the selectivity
+ * product in the normal way. But when we find a pair, we know that the
+ * selectivities represent the relative positions of the low and high bounds
+ * within the column's range, so instead of figuring the selectivity as
+ * hisel * losel, we can figure it as hisel + losel - 1. (To visualize this,
+ * see that hisel is the fraction of the range below the high bound, while
+ * losel is the fraction above the low bound; so hisel can be interpreted
+ * directly as a 0..1 value but we need to convert losel to 1-losel before
+ * interpreting it as a value. Then the available range is 1-losel to hisel.)
+ * If the calculation yields zero or negative, however, we chicken out and
+ * use the default interpretation; that probably means that one or both
+ * selectivities is a default estimate rather than an actual range value.
+ * Of course this is all very dependent on the behavior of
+ * scalarltsel/scalargtsel; perhaps some day we can generalize the approach.
*/
Selectivity
clauselist_selectivity(Query *root,
List *clauses,
int varRelid)
{
- Selectivity s1 = 1.0;
- List *clause;
+ Selectivity s1 = 1.0;
+ RangeQueryClause *rqlist = NULL;
+ List *clist;
- /* Use the product of the selectivities of the subclauses.
- * XXX this is too optimistic, since the subclauses
- * are very likely not independent...
+ /*
+ * Initial scan over clauses. Anything that doesn't look like a
+ * potential rangequery clause gets multiplied into s1 and forgotten.
+ * Anything that does gets inserted into an rqlist entry.
*/
- foreach(clause, clauses)
+ foreach(clist, clauses)
{
- Selectivity s2 = clause_selectivity(root,
- (Node *) lfirst(clause),
- varRelid);
+ Node *clause = (Node *) lfirst(clist);
+ Selectivity s2;
+
+ /*
+ * See if it looks like a restriction clause with a constant.
+ * (If it's not a constant we can't really trust the selectivity!)
+ * NB: for consistency of results, this fragment of code had
+ * better match what clause_selectivity() would do.
+ */
+ if (varRelid != 0 || NumRelids(clause) == 1)
+ {
+ int relidx;
+ AttrNumber attno;
+ Datum constval;
+ int flag;
+
+ get_relattval(clause, varRelid,
+ &relidx, &attno, &constval, &flag);
+ if (relidx != 0 && (flag & SEL_CONSTANT))
+ {
+ /* if get_relattval succeeded, it must be an opclause */
+ Oid opno = ((Oper *) ((Expr *) clause)->oper)->opno;
+ RegProcedure oprrest = get_oprrest(opno);
+
+ if (!oprrest)
+ s2 = (Selectivity) 0.5;
+ else
+ s2 = restriction_selectivity(oprrest, opno,
+ getrelid(relidx,
+ root->rtable),
+ attno,
+ constval, flag);
+ /*
+ * If we reach here, we have computed the same result
+ * that clause_selectivity would, so we can just use s2
+ * if it's the wrong oprrest. But if it's the right
+ * oprrest, add the clause to rqlist for later processing.
+ */
+ switch (oprrest)
+ {
+ case F_SCALARLTSEL:
+ addRangeClause(&rqlist, clause, flag, true, s2);
+ break;
+ case F_SCALARGTSEL:
+ addRangeClause(&rqlist, clause, flag, false, s2);
+ break;
+ default:
+ /* Just merge the selectivity in generically */
+ s1 = s1 * s2;
+ break;
+ }
+ continue; /* drop to loop bottom */
+ }
+ }
+ /* Not the right form, so treat it generically. */
+ s2 = clause_selectivity(root, clause, varRelid);
s1 = s1 * s2;
}
+
+ /*
+ * Now scan the rangequery pair list.
+ */
+ while (rqlist != NULL)
+ {
+ RangeQueryClause *rqnext;
+
+ if (rqlist->have_lobound && rqlist->have_hibound)
+ {
+ /* Successfully matched a pair of range clauses */
+ Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0;
+
+ if (s2 > 0.0)
+ {
+ /* All our hard work has paid off! */
+ s1 *= s2;
+ }
+ else
+ {
+ /* One or both is probably a default estimate,
+ * so punt and just merge them in generically.
+ */
+ s1 *= rqlist->hibound * rqlist->lobound;
+ }
+ }
+ else
+ {
+ /* Only found one of a pair, merge it in generically */
+ if (rqlist->have_lobound)
+ s1 *= rqlist->lobound;
+ else
+ s1 *= rqlist->hibound;
+ }
+ /* release storage and advance */
+ rqnext = rqlist->next;
+ pfree(rqlist);
+ rqlist = rqnext;
+ }
+
return s1;
}
/*
+ * addRangeClause --- add a new range clause for clauselist_selectivity
+ *
+ * Here is where we try to match up pairs of range-query clauses
+ */
+static void
+addRangeClause(RangeQueryClause **rqlist, Node *clause,
+ int flag, bool isLTsel, Selectivity s2)
+{
+ RangeQueryClause *rqelem;
+ Node *var;
+ bool is_lobound;
+
+ /* get_relattval sets flag&SEL_RIGHT if the var is on the LEFT. */
+ if (flag & SEL_RIGHT)
+ {
+ var = (Node *) get_leftop((Expr *) clause);
+ is_lobound = ! isLTsel; /* x < something is high bound */
+ }
+ else
+ {
+ var = (Node *) get_rightop((Expr *) clause);
+ is_lobound = isLTsel; /* something < x is low bound */
+ }
+
+ for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
+ {
+ /* We use full equal() here because the "var" might be a function
+ * of one or more attributes of the same relation...
+ */
+ if (! equal(var, rqelem->var))
+ continue;
+ /* Found the right group to put this clause in */
+ if (is_lobound)
+ {
+ if (! rqelem->have_lobound)
+ {
+ rqelem->have_lobound = true;
+ rqelem->lobound = s2;
+ }
+ else
+ {
+ /* We have found two similar clauses, such as
+ * x < y AND x < z. Keep only the more restrictive one.
+ */
+ if (rqelem->lobound > s2)
+ rqelem->lobound = s2;
+ }
+ }
+ else
+ {
+ if (! rqelem->have_hibound)
+ {
+ rqelem->have_hibound = true;
+ rqelem->hibound = s2;
+ }
+ else
+ {
+ /* We have found two similar clauses, such as
+ * x > y AND x > z. Keep only the more restrictive one.
+ */
+ if (rqelem->hibound > s2)
+ rqelem->hibound = s2;
+ }
+ }
+ return;
+ }
+
+ /* No matching var found, so make a new clause-pair data structure */
+ rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
+ rqelem->var = var;
+ if (is_lobound)
+ {
+ rqelem->have_lobound = true;
+ rqelem->have_hibound = false;
+ rqelem->lobound = s2;
+ }
+ else
+ {
+ rqelem->have_lobound = false;
+ rqelem->have_hibound = true;
+ rqelem->hibound = s2;
+ }
+ rqelem->next = *rqlist;
+ *rqlist = rqelem;
+}
+
+
+/*
* clause_selectivity -
* Compute the selectivity of a general boolean expression clause.
*
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index c6b9520a263..346bda3bc7d 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -14,7 +14,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.52 2000/01/24 02:12:55 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.53 2000/01/24 07:16:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -47,15 +47,13 @@
/* default selectivity estimate for inequalities such as "A < b" */
#define DEFAULT_INEQ_SEL (1.0 / 3.0)
-static bool convert_to_scale(Datum value, Oid typid,
- double *scaleval);
static void getattproperties(Oid relid, AttrNumber attnum,
Oid *typid,
int *typlen,
bool *typbyval,
int32 *typmod);
static bool getattstatistics(Oid relid, AttrNumber attnum,
- Oid opid, Oid typid, int32 typmod,
+ Oid typid, int32 typmod,
double *nullfrac,
double *commonfrac,
Datum *commonval,
@@ -100,7 +98,7 @@ eqsel(Oid opid,
&typid, &typlen, &typbyval, &typmod);
/* get stats for the attribute, if available */
- if (getattstatistics(relid, attno, opid, typid, typmod,
+ if (getattstatistics(relid, attno, typid, typmod,
&nullfrac, &commonfrac, &commonval,
NULL, NULL))
{
@@ -212,19 +210,18 @@ neqsel(Oid opid,
}
/*
- * intltsel - Selectivity of "<" (also "<=") for integers.
+ * scalarltsel - Selectivity of "<" (also "<=") for scalars.
*
- * Actually, this works and is used for all numeric types, so it should
- * be renamed. In fact, it is also currently called for all manner of
- * non-numeric types, for which it is NOT very helpful. That needs
- * to be fixed.
+ * This routine works for any datatype (or pair of datatypes) known to
+ * convert_to_scalar(). If it is applied to some other datatype,
+ * it will return a default estimate.
*/
float64
-intltsel(Oid opid,
- Oid relid,
- AttrNumber attno,
- Datum value,
- int32 flag)
+scalarltsel(Oid opid,
+ Oid relid,
+ AttrNumber attno,
+ Datum value,
+ int32 flag)
{
float64 result;
@@ -253,19 +250,19 @@ intltsel(Oid opid,
*/
oprtuple = get_operator_tuple(opid);
if (! HeapTupleIsValid(oprtuple))
- elog(ERROR, "intltsel: no tuple for operator %u", opid);
+ elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
/* Convert the constant to a uniform comparison scale. */
- if (! convert_to_scale(value,
- ((flag & SEL_RIGHT) ? rtype : ltype),
- &val))
+ if (! convert_to_scalar(value,
+ ((flag & SEL_RIGHT) ? rtype : ltype),
+ &val))
{
- /* Ideally we'd produce an error here, on the grounds that
- * the given operator shouldn't have intltsel registered as its
+ /* Ideally we'd produce an error here, on the grounds that the
+ * given operator shouldn't have scalarltsel registered as its
* selectivity func unless we can deal with its operand types.
- * But currently, all manner of stuff is invoking intltsel,
+ * But currently, all manner of stuff is invoking scalarltsel,
* so give a default estimate until that can be fixed.
*/
*result = DEFAULT_INEQ_SEL;
@@ -276,7 +273,7 @@ intltsel(Oid opid,
getattproperties(relid, attno,
&typid, &typlen, &typbyval, &typmod);
- if (! getattstatistics(relid, attno, opid, typid, typmod,
+ if (! getattstatistics(relid, attno, typid, typmod,
NULL, NULL, NULL,
&loval, &hival))
{
@@ -286,8 +283,8 @@ intltsel(Oid opid,
}
/* Convert the attribute's loval/hival to common scale. */
- if (! convert_to_scale(loval, typid, &low) ||
- ! convert_to_scale(hival, typid, &high))
+ if (! convert_to_scalar(loval, typid, &low) ||
+ ! convert_to_scalar(hival, typid, &high))
{
/* See above comments... */
if (! typbyval)
@@ -341,23 +338,23 @@ intltsel(Oid opid,
}
/*
- * intgtsel - Selectivity of ">" (also ">=") for integers.
+ * scalargtsel - Selectivity of ">" (also ">=") for integers.
*
- * See above comments for intltsel.
+ * See above comments for scalarltsel.
*/
float64
-intgtsel(Oid opid,
- Oid relid,
- AttrNumber attno,
- Datum value,
- int32 flag)
+scalargtsel(Oid opid,
+ Oid relid,
+ AttrNumber attno,
+ Datum value,
+ int32 flag)
{
float64 result;
/* Compute selectivity of "<", then invert --- but only if we
* were able to produce a non-default estimate.
*/
- result = intltsel(opid, relid, attno, value, flag);
+ result = scalarltsel(opid, relid, attno, value, flag);
if (*result != DEFAULT_INEQ_SEL)
*result = 1.0 - *result;
return result;
@@ -429,14 +426,14 @@ neqjoinsel(Oid opid,
}
/*
- * intltjoinsel - Join selectivity of "<" and "<="
+ * scalarltjoinsel - Join selectivity of "<" and "<=" for scalars
*/
float64
-intltjoinsel(Oid opid,
- Oid relid1,
- AttrNumber attno1,
- Oid relid2,
- AttrNumber attno2)
+scalarltjoinsel(Oid opid,
+ Oid relid1,
+ AttrNumber attno1,
+ Oid relid2,
+ AttrNumber attno2)
{
float64 result;
@@ -446,14 +443,14 @@ intltjoinsel(Oid opid,
}
/*
- * intgtjoinsel - Join selectivity of ">" and ">="
+ * scalargtjoinsel - Join selectivity of ">" and ">=" for scalars
*/
float64
-intgtjoinsel(Oid opid,
- Oid relid1,
- AttrNumber attno1,
- Oid relid2,
- AttrNumber attno2)
+scalargtjoinsel(Oid opid,
+ Oid relid1,
+ AttrNumber attno1,
+ Oid relid2,
+ AttrNumber attno2)
{
float64 result;
@@ -463,21 +460,25 @@ intgtjoinsel(Oid opid,
}
/*
- * convert_to_scale
- * Convert a given value of the indicated type to the comparison
- * scale needed by intltsel(). Returns "true" if successful.
+ * convert_to_scalar
+ * Convert a non-NULL value of the indicated type to the comparison
+ * scale needed by scalarltsel()/scalargtsel().
+ * Returns "true" if successful.
*
* All numeric datatypes are simply converted to their equivalent
- * "double" values.
- * Future extension: convert string-like types to some suitable scale.
+ * "double" values. String datatypes are converted to a crude scale
+ * using their first character (only if it is in the ASCII range,
+ * to try to avoid problems with non-ASCII collating sequences).
*/
-static bool
-convert_to_scale(Datum value, Oid typid,
- double *scaleval)
+bool
+convert_to_scalar(Datum value, Oid typid,
+ double *scaleval)
{
- /* Fast-path conversions for some built-in types */
switch (typid)
{
+ /*
+ * Built-in numeric types
+ */
case BOOLOID:
*scaleval = (double) DatumGetUInt8(value);
return true;
@@ -504,18 +505,54 @@ convert_to_scale(Datum value, Oid typid,
/* we can treat OIDs as integers... */
*scaleval = (double) DatumGetObjectId(value);
return true;
+
+ /*
+ * Built-in string types
+ */
+ case CHAROID:
+ {
+ char ch = DatumGetChar(value);
+
+ if (ch >= 0 && ch < 127)
+ {
+ *scaleval = (double) ch;
+ return true;
+ }
+ break;
+ }
+ case BPCHAROID:
+ case VARCHAROID:
case TEXTOID:
- /*
- * Eventually this should get handled by somehow scaling as a
- * string value. For now, we need to call it out to avoid
- * falling into the default case, because there is a float8(text)
- * function declared in pg_proc that will do the wrong thing :-(
- */
+ if (VARSIZE(DatumGetPointer(value)) > VARHDRSZ)
+ {
+ char ch = * (char *) VARDATA(DatumGetPointer(value));
+
+ if (ch >= 0 && ch < 127)
+ {
+ *scaleval = (double) ch;
+ return true;
+ }
+ }
+ break;
+ case NAMEOID:
+ {
+ NameData *nm = (NameData *) DatumGetPointer(value);
+ char ch = NameStr(*nm)[0];
+
+ if (ch >= 0 && ch < 127)
+ {
+ *scaleval = (double) ch;
+ return true;
+ }
break;
+ }
+
default:
{
- /* See whether there is a registered type-conversion function,
+ /*
+ * See whether there is a registered type-conversion function,
* namely a procedure named "float8" with the right signature.
+ * If so, assume we can convert the value to the numeric scale.
*/
Oid oid_array[FUNC_MAX_ARGS];
HeapTuple ftup;
@@ -589,7 +626,9 @@ getattproperties(Oid relid, AttrNumber attnum,
* after use if the data type is not by-value.)
*/
static bool
-getattstatistics(Oid relid, AttrNumber attnum, Oid opid, Oid typid,
+getattstatistics(Oid relid,
+ AttrNumber attnum,
+ Oid typid,
int32 typmod,
double *nullfrac,
double *commonfrac,
@@ -603,8 +642,15 @@ getattstatistics(Oid relid, AttrNumber attnum, Oid opid, Oid typid,
Oid typelem;
bool isnull;
- /* We assume that there will only be one entry in pg_statistic
- * for the given rel/att. Someday, VACUUM might store more than one...
+ /*
+ * We assume that there will only be one entry in pg_statistic for
+ * the given rel/att, so we search WITHOUT considering the staop
+ * column. Someday, VACUUM might store more than one entry per rel/att,
+ * corresponding to more than one possible sort ordering defined for
+ * the column type. However, to make that work we will need to figure
+ * out which staop to search for --- it's not necessarily the one we
+ * have at hand! (For example, we might have a '>' operator rather than
+ * the '<' operator that will appear in staop.)
*/
tuple = SearchSysCacheTuple(STATRELID,
ObjectIdGetDatum(relid),