aboutsummaryrefslogtreecommitdiff
path: root/src/backend/parser/parse_expr.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2005-12-28 01:30:02 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2005-12-28 01:30:02 +0000
commit6e07709760a29d8dbfb93b9846c905bd40689082 (patch)
tree9bf0084587d7e313ba087ce53c24bc748c63a456 /src/backend/parser/parse_expr.c
parenta37422e042a6114ab0e513f50dac4a47fab22313 (diff)
downloadpostgresql-6e07709760a29d8dbfb93b9846c905bd40689082.tar.gz
postgresql-6e07709760a29d8dbfb93b9846c905bd40689082.zip
Implement SQL-compliant treatment of row comparisons for < <= > >= cases
(previously we only did = and <> correctly). Also, allow row comparisons with any operators that are in btree opclasses, not only those with these specific names. This gets rid of a whole lot of indefensible assumptions about the behavior of particular operators based on their names ... though it's still true that IN and NOT IN expand to "= ANY". The patch adds a RowCompareExpr expression node type, and makes some changes in the representation of ANY/ALL/ROWCOMPARE SubLinks so that they can share code with RowCompareExpr. I have not yet done anything about making RowCompareExpr an indexable operator, but will look at that soon. initdb forced due to changes in stored rules.
Diffstat (limited to 'src/backend/parser/parse_expr.c')
-rw-r--r--src/backend/parser/parse_expr.c450
1 files changed, 288 insertions, 162 deletions
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index ece78b21820..923d357ee17 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -8,21 +8,20 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.188 2005/11/28 04:35:31 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.189 2005/12/28 01:30:00 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
-#include "catalog/pg_operator.h"
-#include "catalog/pg_proc.h"
#include "commands/dbcommands.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/params.h"
#include "nodes/plannodes.h"
+#include "optimizer/clauses.h"
#include "parser/analyze.h"
#include "parser/gramparse.h"
#include "parser/parse_coerce.h"
@@ -33,7 +32,7 @@
#include "parser/parse_type.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
-#include "utils/syscache.h"
+
bool Transform_null_equals = false;
@@ -64,8 +63,8 @@ static Node *transformIndirection(ParseState *pstate, Node *basenode,
List *indirection);
static Node *typecast_expression(ParseState *pstate, Node *expr,
TypeName *typename);
-static Node *make_row_op(ParseState *pstate, List *opname,
- RowExpr *lrow, RowExpr *rrow);
+static Node *make_row_comparison_op(ParseState *pstate, List *opname,
+ List *largs, List *rargs);
static Node *make_row_distinct_op(ParseState *pstate, List *opname,
RowExpr *lrow, RowExpr *rrow);
static Expr *make_distinct_op(ParseState *pstate, List *opname,
@@ -592,14 +591,14 @@ transformAExprOp(ParseState *pstate, A_Expr *a)
((SubLink *) rexpr)->subLinkType == EXPR_SUBLINK)
{
/*
- * Convert "row op subselect" into a MULTIEXPR sublink. Formerly the
+ * Convert "row op subselect" into a ROWCOMPARE sublink. Formerly the
* grammar did this, but now that a row construct is allowed anywhere
* in expressions, it's easier to do it here.
*/
SubLink *s = (SubLink *) rexpr;
- s->subLinkType = MULTIEXPR_SUBLINK;
- s->lefthand = ((RowExpr *) lexpr)->args;
+ s->subLinkType = ROWCOMPARE_SUBLINK;
+ s->testexpr = lexpr;
s->operName = a->name;
result = transformExpr(pstate, (Node *) s);
}
@@ -612,10 +611,10 @@ transformAExprOp(ParseState *pstate, A_Expr *a)
Assert(IsA(lexpr, RowExpr));
Assert(IsA(rexpr, RowExpr));
- result = make_row_op(pstate,
- a->name,
- (RowExpr *) lexpr,
- (RowExpr *) rexpr);
+ result = make_row_comparison_op(pstate,
+ a->name,
+ ((RowExpr *) lexpr)->args,
+ ((RowExpr *) rexpr)->args);
}
else
{
@@ -885,10 +884,10 @@ transformAExprIn(ParseState *pstate, A_Expr *a)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("arguments of row IN must all be row expressions")));
- cmp = make_row_op(pstate,
- a->name,
- (RowExpr *) copyObject(lexpr),
- (RowExpr *) rexpr);
+ cmp = make_row_comparison_op(pstate,
+ a->name,
+ (List *) copyObject(((RowExpr *) lexpr)->args),
+ ((RowExpr *) rexpr)->args);
}
else
cmp = (Node *) make_op(pstate,
@@ -1080,13 +1079,11 @@ transformSubLink(ParseState *pstate, SubLink *sublink)
if (sublink->subLinkType == EXISTS_SUBLINK)
{
/*
- * EXISTS needs no lefthand or combining operator. These fields
- * should be NIL already, but make sure.
+ * EXISTS needs no test expression or combining operator.
+ * These fields should be null already, but make sure.
*/
- sublink->lefthand = NIL;
+ sublink->testexpr = NULL;
sublink->operName = NIL;
- sublink->operOids = NIL;
- sublink->useOr = FALSE;
}
else if (sublink->subLinkType == EXPR_SUBLINK ||
sublink->subLinkType == ARRAY_SUBLINK)
@@ -1111,128 +1108,72 @@ transformSubLink(ParseState *pstate, SubLink *sublink)
}
/*
- * EXPR and ARRAY need no lefthand or combining operator. These fields
- * should be NIL already, but make sure.
+ * EXPR and ARRAY need no test expression or combining operator.
+ * These fields should be null already, but make sure.
*/
- sublink->lefthand = NIL;
+ sublink->testexpr = NULL;
sublink->operName = NIL;
- sublink->operOids = NIL;
- sublink->useOr = FALSE;
}
else
{
- /* ALL, ANY, or MULTIEXPR: generate operator list */
- List *left_list = sublink->lefthand;
- List *right_list = qtree->targetList;
- int row_length = list_length(left_list);
- bool needNot = false;
- List *op = sublink->operName;
- char *opname = strVal(llast(op));
+ /* ALL, ANY, or ROWCOMPARE: generate row-comparing expression */
+ Node *lefthand;
+ List *left_list;
+ List *right_list;
ListCell *l;
- ListCell *ll_item;
-
- /* transform lefthand expressions */
- foreach(l, left_list)
- lfirst(l) = transformExpr(pstate, lfirst(l));
/*
- * If the expression is "<> ALL" (with unqualified opname) then
- * convert it to "NOT IN". This is a hack to improve efficiency of
- * expressions output by pre-7.4 Postgres.
+ * Transform lefthand expression, and convert to a list
*/
- if (sublink->subLinkType == ALL_SUBLINK &&
- list_length(op) == 1 && strcmp(opname, "<>") == 0)
- {
- sublink->subLinkType = ANY_SUBLINK;
- opname = pstrdup("=");
- op = list_make1(makeString(opname));
- sublink->operName = op;
- needNot = true;
- }
-
- /* Set useOr if op is "<>" (possibly qualified) */
- if (strcmp(opname, "<>") == 0)
- sublink->useOr = TRUE;
+ lefthand = transformExpr(pstate, sublink->testexpr);
+ if (lefthand && IsA(lefthand, RowExpr))
+ left_list = ((RowExpr *) lefthand)->args;
else
- sublink->useOr = FALSE;
-
- /* Combining operators other than =/<> is dubious... */
- if (row_length != 1 &&
- strcmp(opname, "=") != 0 &&
- strcmp(opname, "<>") != 0)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("row comparison cannot use operator %s",
- opname)));
+ left_list = list_make1(lefthand);
/*
- * To build the list of combining operator OIDs, we must scan
- * subquery's targetlist to find values that will be matched against
- * lefthand values. We need to ignore resjunk targets, so doing the
- * outer iteration over right_list is easier than doing it over
- * left_list.
+ * Build a list of PARAM_SUBLINK nodes representing the
+ * output columns of the subquery.
*/
- sublink->operOids = NIL;
-
- ll_item = list_head(left_list);
- foreach(l, right_list)
+ right_list = NIL;
+ foreach(l, qtree->targetList)
{
TargetEntry *tent = (TargetEntry *) lfirst(l);
- Node *lexpr;
- Operator optup;
- Form_pg_operator opform;
+ Param *param;
if (tent->resjunk)
continue;
- if (ll_item == NULL)
- ereport(ERROR,
- (errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("subquery has too many columns")));
- lexpr = lfirst(ll_item);
- ll_item = lnext(ll_item);
+ param = makeNode(Param);
+ param->paramkind = PARAM_SUBLINK;
+ param->paramid = (AttrNumber) tent->resno;
+ param->paramtype = exprType((Node *) tent->expr);
- /*
- * It's OK to use oper() not compatible_oper() here, because
- * make_subplan() will insert type coercion calls if needed.
- */
- optup = oper(op,
- exprType(lexpr),
- exprType((Node *) tent->expr),
- false);
- opform = (Form_pg_operator) GETSTRUCT(optup);
-
- if (opform->oprresult != BOOLOID)
- ereport(ERROR,
- (errcode(ERRCODE_DATATYPE_MISMATCH),
- errmsg("operator %s must return type boolean, not type %s",
- opname,
- format_type_be(opform->oprresult)),
- errhint("The operator of a quantified predicate subquery must return type boolean.")));
-
- if (get_func_retset(opform->oprcode))
- ereport(ERROR,
- (errcode(ERRCODE_DATATYPE_MISMATCH),
- errmsg("operator %s must not return a set",
- opname),
- errhint("The operator of a quantified predicate subquery must return type boolean.")));
-
- sublink->operOids = lappend_oid(sublink->operOids,
- oprid(optup));
-
- ReleaseSysCache(optup);
+ right_list = lappend(right_list, param);
}
- if (ll_item != NULL)
+
+ /*
+ * We could rely on make_row_comparison_op to complain if the
+ * list lengths differ, but we prefer to generate a more specific
+ * error message.
+ */
+ if (list_length(left_list) < list_length(right_list))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("subquery has too many columns")));
+ if (list_length(left_list) > list_length(right_list))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("subquery has too few columns")));
- if (needNot)
- {
- result = coerce_to_boolean(pstate, result, "NOT");
- result = (Node *) makeBoolExpr(NOT_EXPR,
- list_make1(result));
- }
+ /*
+ * Identify the combining operator(s) and generate a suitable
+ * row-comparison expression.
+ */
+ sublink->testexpr = make_row_comparison_op(pstate,
+ sublink->operName,
+ left_list,
+ right_list);
}
return result;
@@ -1673,6 +1614,9 @@ exprType(Node *expr)
case T_RowExpr:
type = ((RowExpr *) expr)->row_typeid;
break;
+ case T_RowCompareExpr:
+ type = BOOLOID;
+ break;
case T_CoalesceExpr:
type = ((CoalesceExpr *) expr)->coalescetype;
break;
@@ -1953,76 +1897,258 @@ typecast_expression(ParseState *pstate, Node *expr, TypeName *typename)
}
/*
- * Transform a "row op row" construct
+ * Transform a "row compare-op row" construct
*
- * The input RowExprs are already transformed
+ * The inputs are lists of already-transformed expressions.
+ * As with coerce_type, pstate may be NULL if no special unknown-Param
+ * processing is wanted.
+ *
+ * The output may be a single OpExpr, an AND or OR combination of OpExprs,
+ * or a RowCompareExpr. In all cases it is guaranteed to return boolean.
+ * The AND, OR, and RowCompareExpr cases further imply things about the
+ * behavior of the operators (ie, they behave as =, <>, or < <= > >=).
*/
static Node *
-make_row_op(ParseState *pstate, List *opname,
- RowExpr *lrow, RowExpr *rrow)
+make_row_comparison_op(ParseState *pstate, List *opname,
+ List *largs, List *rargs)
{
- Node *result = NULL;
- List *largs = lrow->args;
- List *rargs = rrow->args;
+ RowCompareExpr *rcexpr;
+ RowCompareType rctype;
+ List *opexprs;
+ List *opnos;
+ List *opclasses;
ListCell *l,
*r;
- char *oprname;
- BoolExprType boolop;
-
- if (list_length(largs) != list_length(rargs))
+ List **opclass_lists;
+ List **opstrat_lists;
+ Bitmapset *strats;
+ int nopers;
+ int i;
+
+ nopers = list_length(largs);
+ if (nopers != list_length(rargs))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unequal number of entries in row expressions")));
/*
- * XXX it's really wrong to generate a simple AND combination for < <= >
- * >=. We probably need to invent a new runtime node type to handle those
- * correctly. For the moment, though, keep on doing this ...
+ * We can't compare zero-length rows because there is no principled
+ * basis for figuring out what the operator is.
*/
- oprname = strVal(llast(opname));
-
- if ((strcmp(oprname, "=") == 0) ||
- (strcmp(oprname, "<") == 0) ||
- (strcmp(oprname, "<=") == 0) ||
- (strcmp(oprname, ">") == 0) ||
- (strcmp(oprname, ">=") == 0))
- boolop = AND_EXPR;
- else if (strcmp(oprname, "<>") == 0)
- boolop = OR_EXPR;
- else
- {
+ if (nopers == 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("operator %s is not supported for row expressions",
- oprname)));
- boolop = 0; /* keep compiler quiet */
- }
+ errmsg("cannot compare rows of zero length")));
+ /*
+ * Identify all the pairwise operators, using make_op so that
+ * behavior is the same as in the simple scalar case.
+ */
+ opexprs = NIL;
forboth(l, largs, r, rargs)
{
Node *larg = (Node *) lfirst(l);
Node *rarg = (Node *) lfirst(r);
- Node *cmp;
+ OpExpr *cmp;
- cmp = (Node *) make_op(pstate, opname, larg, rarg);
- cmp = coerce_to_boolean(pstate, cmp, "row comparison");
- if (result == NULL)
- result = cmp;
+ cmp = (OpExpr *) make_op(pstate, opname, larg, rarg);
+ Assert(IsA(cmp, OpExpr));
+
+ /*
+ * We don't use coerce_to_boolean here because we insist on the
+ * operator yielding boolean directly, not via coercion. If it
+ * doesn't yield bool it won't be in any index opclasses...
+ */
+ if (cmp->opresulttype != BOOLOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("row comparison operator must yield type boolean, "
+ "not type %s",
+ format_type_be(cmp->opresulttype))));
+ if (expression_returns_set((Node *) cmp))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("row comparison operator must not return a set")));
+ opexprs = lappend(opexprs, cmp);
+ }
+
+ /*
+ * If rows are length 1, just return the single operator. In this
+ * case we don't insist on identifying btree semantics for the operator
+ * (but we still require it to return boolean).
+ */
+ if (nopers == 1)
+ return (Node *) linitial(opexprs);
+
+ /*
+ * Now we must determine which row comparison semantics (= <> < <= > >=)
+ * apply to this set of operators. We look for btree opclasses containing
+ * the operators, and see which interpretations (strategy numbers) exist
+ * for each operator.
+ */
+ opclass_lists = (List **) palloc(nopers * sizeof(List *));
+ opstrat_lists = (List **) palloc(nopers * sizeof(List *));
+ strats = NULL;
+ i = 0;
+ foreach(l, opexprs)
+ {
+ Bitmapset *this_strats;
+ ListCell *j;
+
+ get_op_btree_interpretation(((OpExpr *) lfirst(l))->opno,
+ &opclass_lists[i], &opstrat_lists[i]);
+ /*
+ * convert strategy number list to a Bitmapset to make the intersection
+ * calculation easy.
+ */
+ this_strats = NULL;
+ foreach(j, opstrat_lists[i])
+ {
+ this_strats = bms_add_member(this_strats, lfirst_int(j));
+ }
+ if (i == 0)
+ strats = this_strats;
else
- result = (Node *) makeBoolExpr(boolop,
- list_make2(result, cmp));
+ strats = bms_int_members(strats, this_strats);
+ i++;
}
- if (result == NULL)
+ switch (bms_membership(strats))
+ {
+ case BMS_EMPTY_SET:
+ /* No common interpretation, so fail */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not determine interpretation of row comparison operator %s",
+ strVal(llast(opname))),
+ errhint("Row comparison operators must be associated with btree operator classes.")));
+ rctype = 0; /* keep compiler quiet */
+ break;
+ case BMS_SINGLETON:
+ /* Simple case: just one possible interpretation */
+ rctype = bms_singleton_member(strats);
+ break;
+ case BMS_MULTIPLE:
+ default: /* keep compiler quiet */
+ {
+ /*
+ * Prefer the interpretation with the most default opclasses.
+ */
+ int best_defaults = 0;
+ bool multiple_best = false;
+ int this_rctype;
+
+ rctype = 0; /* keep compiler quiet */
+ while ((this_rctype = bms_first_member(strats)) >= 0)
+ {
+ int ndefaults = 0;
+
+ for (i = 0; i < nopers; i++)
+ {
+ forboth(l, opclass_lists[i], r, opstrat_lists[i])
+ {
+ Oid opclass = lfirst_oid(l);
+ int opstrat = lfirst_int(r);
+
+ if (opstrat == this_rctype &&
+ opclass_is_default(opclass))
+ ndefaults++;
+ }
+ }
+ if (ndefaults > best_defaults)
+ {
+ best_defaults = ndefaults;
+ rctype = this_rctype;
+ multiple_best = false;
+ }
+ else if (ndefaults == best_defaults)
+ multiple_best = true;
+ }
+ if (best_defaults == 0 || multiple_best)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not determine interpretation of row comparison operator %s",
+ strVal(llast(opname))),
+ errdetail("There are multiple equally-plausible candidates.")));
+ break;
+ }
+ }
+
+ /*
+ * For = and <> cases, we just combine the pairwise operators with
+ * AND or OR respectively.
+ *
+ * Note: this is presently the only place where the parser generates
+ * BoolExpr with more than two arguments. Should be OK since the
+ * rest of the system thinks BoolExpr is N-argument anyway.
+ */
+ if (rctype == ROWCOMPARE_EQ)
+ return (Node *) makeBoolExpr(AND_EXPR, opexprs);
+ if (rctype == ROWCOMPARE_NE)
+ return (Node *) makeBoolExpr(OR_EXPR, opexprs);
+
+ /*
+ * Otherwise we need to determine exactly which opclass to associate
+ * with each operator.
+ */
+ opclasses = NIL;
+ for (i = 0; i < nopers; i++)
{
- /* zero-length rows? Generate constant TRUE or FALSE */
- if (boolop == AND_EXPR)
- result = makeBoolConst(true, false);
+ Oid best_opclass = 0;
+ int ndefault = 0;
+ int nmatch = 0;
+
+ forboth(l, opclass_lists[i], r, opstrat_lists[i])
+ {
+ Oid opclass = lfirst_oid(l);
+ int opstrat = lfirst_int(r);
+
+ if (opstrat == rctype)
+ {
+ if (ndefault == 0)
+ best_opclass = opclass;
+ if (opclass_is_default(opclass))
+ ndefault++;
+ else
+ nmatch++;
+ }
+ }
+ if (ndefault == 1 || (ndefault == 0 && nmatch == 1))
+ opclasses = lappend_oid(opclasses, best_opclass);
else
- result = makeBoolConst(false, false);
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not determine interpretation of row comparison operator %s",
+ strVal(llast(opname))),
+ errdetail("There are multiple equally-plausible candidates.")));
}
- return result;
+ /*
+ * Now deconstruct the OpExprs and create a RowCompareExpr.
+ *
+ * Note: can't just reuse the passed largs/rargs lists, because of
+ * possibility that make_op inserted coercion operations.
+ */
+ opnos = NIL;
+ largs = NIL;
+ rargs = NIL;
+ foreach(l, opexprs)
+ {
+ OpExpr *cmp = (OpExpr *) lfirst(l);
+
+ opnos = lappend_oid(opnos, cmp->opno);
+ largs = lappend(largs, linitial(cmp->args));
+ rargs = lappend(rargs, lsecond(cmp->args));
+ }
+
+ rcexpr = makeNode(RowCompareExpr);
+ rcexpr->rctype = rctype;
+ rcexpr->opnos = opnos;
+ rcexpr->opclasses = opclasses;
+ rcexpr->largs = largs;
+ rcexpr->rargs = rargs;
+
+ return (Node *) rcexpr;
}
/*