diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2010-10-30 21:55:20 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2010-10-30 21:56:11 -0400 |
commit | 186cbbda8f8dc5e42f68fc7892f206a76d56a20f (patch) | |
tree | 2c909d8365726683a61515b639c02a9ac00682f4 /src/backend/optimizer | |
parent | bd1ff9713369c2f54391112b92e0c22ab5c99180 (diff) | |
download | postgresql-186cbbda8f8dc5e42f68fc7892f206a76d56a20f.tar.gz postgresql-186cbbda8f8dc5e42f68fc7892f206a76d56a20f.zip |
Provide hashing support for arrays.
The core of this patch is hash_array() and associated typcache
infrastructure, which works just about exactly like the existing support
for array comparison.
In addition I did some work to ensure that the planner won't think that an
array type is hashable unless its element type is hashable, and similarly
for sorting. This includes adding a datatype parameter to op_hashjoinable
and op_mergejoinable, and adding an explicit "hashable" flag to
SortGroupClause. The lack of a cross-check on the element type was a
pre-existing bug in mergejoin support --- but it didn't matter so much
before, because if you couldn't sort the element type there wasn't any good
alternative to failing anyhow. Now that we have the alternative of hashing
the array type, there are cases where we can avoid a failure by being picky
at the planner stage, so it's time to be picky.
The issue of exactly how to combine the per-element hash values to produce
an array hash is still open for discussion, but the rest of this is pretty
solid, so I'll commit it as-is.
Diffstat (limited to 'src/backend/optimizer')
-rw-r--r-- | src/backend/optimizer/path/equivclass.c | 4 | ||||
-rw-r--r-- | src/backend/optimizer/plan/createplan.c | 1 | ||||
-rw-r--r-- | src/backend/optimizer/plan/initsplan.c | 9 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planagg.c | 1 | ||||
-rw-r--r-- | src/backend/optimizer/plan/subselect.c | 39 | ||||
-rw-r--r-- | src/backend/optimizer/util/pathnode.c | 7 | ||||
-rw-r--r-- | src/backend/optimizer/util/tlist.c | 8 |
7 files changed, 47 insertions, 22 deletions
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index b646aa4c9e6..7dffe18c973 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -925,7 +925,9 @@ generate_join_implied_equalities_normal(PlannerInfo *root, (IsA(inner_em->em_expr, RelabelType) && IsA(((RelabelType *) inner_em->em_expr)->arg, Var))) score++; - if (!enable_hashjoin || op_hashjoinable(eq_op)) + if (!enable_hashjoin || + op_hashjoinable(eq_op, + exprType((Node *) outer_em->em_expr))) score++; if (score > best_score) { diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 52dd27b1583..56647c93ab3 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -958,6 +958,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) sortcl->eqop = eqop; sortcl->sortop = sortop; sortcl->nulls_first = false; + sortcl->hashable = false; /* no need to make this accurate */ sortList = lappend(sortList, sortcl); groupColPos++; } diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index 77ebadd5cc6..6efb6dc4bce 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -16,6 +16,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_type.h" +#include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/joininfo.h" @@ -1486,6 +1487,7 @@ check_mergejoinable(RestrictInfo *restrictinfo) { Expr *clause = restrictinfo->clause; Oid opno; + Node *leftarg; if (restrictinfo->pseudoconstant) return; @@ -1495,8 +1497,9 @@ check_mergejoinable(RestrictInfo *restrictinfo) return; opno = ((OpExpr *) clause)->opno; + leftarg = linitial(((OpExpr *) clause)->args); - if (op_mergejoinable(opno) && + if (op_mergejoinable(opno, exprType(leftarg)) && !contain_volatile_functions((Node *) clause)) restrictinfo->mergeopfamilies = get_mergejoin_opfamilies(opno); @@ -1521,6 +1524,7 @@ check_hashjoinable(RestrictInfo *restrictinfo) { Expr *clause = restrictinfo->clause; Oid opno; + Node *leftarg; if (restrictinfo->pseudoconstant) return; @@ -1530,8 +1534,9 @@ check_hashjoinable(RestrictInfo *restrictinfo) return; opno = ((OpExpr *) clause)->opno; + leftarg = linitial(((OpExpr *) clause)->args); - if (op_hashjoinable(opno) && + if (op_hashjoinable(opno, exprType(leftarg)) && !contain_volatile_functions((Node *) clause)) restrictinfo->hashjoinoperator = opno; } diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index e590ee048fe..b2765613a4f 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -518,6 +518,7 @@ make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info) info->aggsortop); sortcl->sortop = info->aggsortop; sortcl->nulls_first = info->nulls_first; + sortcl->hashable = false; /* no need to make this accurate */ subparse->sortClause = list_make1(sortcl); /* set up LIMIT 1 */ diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 137e2ce0491..754753cc12d 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -861,28 +861,45 @@ testexpr_is_hashable(Node *testexpr) return false; } +/* + * Check expression is hashable + strict + * + * We could use op_hashjoinable() and op_strict(), but do it like this to + * avoid a redundant cache lookup. + */ static bool hash_ok_operator(OpExpr *expr) { Oid opid = expr->opno; - HeapTuple tup; - Form_pg_operator optup; /* quick out if not a binary operator */ if (list_length(expr->args) != 2) return false; - /* else must look up the operator properties */ - tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(opid)); - if (!HeapTupleIsValid(tup)) - elog(ERROR, "cache lookup failed for operator %u", opid); - optup = (Form_pg_operator) GETSTRUCT(tup); - if (!optup->oprcanhash || !func_strict(optup->oprcode)) + if (opid == ARRAY_EQ_OP) + { + /* array_eq is strict, but must check input type to ensure hashable */ + Node *leftarg = linitial(expr->args); + + return op_hashjoinable(opid, exprType(leftarg)); + } + else { + /* else must look up the operator properties */ + HeapTuple tup; + Form_pg_operator optup; + + tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(opid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for operator %u", opid); + optup = (Form_pg_operator) GETSTRUCT(tup); + if (!optup->oprcanhash || !func_strict(optup->oprcode)) + { + ReleaseSysCache(tup); + return false; + } ReleaseSysCache(tup); - return false; + return true; } - ReleaseSysCache(tup); - return true; } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 8214acc5713..20d8074bce4 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -18,6 +18,7 @@ #include "catalog/pg_operator.h" #include "miscadmin.h" +#include "nodes/nodeFuncs.h" #include "optimizer/clauses.h" #include "optimizer/cost.h" #include "optimizer/pathnode.h" @@ -878,6 +879,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, Relids left_varnos; Relids right_varnos; Relids all_varnos; + Oid opinputtype; /* Is it a binary opclause? */ if (!IsA(op, OpExpr) || @@ -908,6 +910,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, left_varnos = pull_varnos(left_expr); right_varnos = pull_varnos(right_expr); all_varnos = bms_union(left_varnos, right_varnos); + opinputtype = exprType(left_expr); /* Does it reference both sides? */ if (!bms_overlap(all_varnos, sjinfo->syn_righthand) || @@ -946,14 +949,14 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, if (all_btree) { /* oprcanmerge is considered a hint... */ - if (!op_mergejoinable(opno) || + if (!op_mergejoinable(opno, opinputtype) || get_mergejoin_opfamilies(opno) == NIL) all_btree = false; } if (all_hash) { /* ... but oprcanhash had better be correct */ - if (!op_hashjoinable(opno)) + if (!op_hashjoinable(opno, opinputtype)) all_hash = false; } if (!(all_btree || all_hash)) diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index 0220aa0e5f8..8096fcbe613 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -18,7 +18,6 @@ #include "nodes/nodeFuncs.h" #include "optimizer/tlist.h" #include "optimizer/var.h" -#include "utils/lsyscache.h" /***************************************************************************** @@ -348,10 +347,7 @@ grouping_is_sortable(List *groupClause) /* * grouping_is_hashable - is it possible to implement grouping list by hashing? * - * We assume hashing is OK if the equality operators are marked oprcanhash. - * (If there isn't actually a supporting hash function, the executor will - * complain at runtime; but this is a misdeclaration of the operator, not - * a system bug.) + * We rely on the parser to have set the hashable flag correctly. */ bool grouping_is_hashable(List *groupClause) @@ -362,7 +358,7 @@ grouping_is_hashable(List *groupClause) { SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem); - if (!op_hashjoinable(groupcl->eqop)) + if (!groupcl->hashable) return false; } return true; |