diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/executor/execExpr.c | 99 | ||||
-rw-r--r-- | src/backend/executor/execExprInterp.c | 262 | ||||
-rw-r--r-- | src/backend/jit/llvm/llvmjit_expr.c | 6 | ||||
-rw-r--r-- | src/backend/jit/llvm/llvmjit_types.c | 1 | ||||
-rw-r--r-- | src/backend/nodes/copyfuncs.c | 1 | ||||
-rw-r--r-- | src/backend/nodes/equalfuncs.c | 6 | ||||
-rw-r--r-- | src/backend/nodes/outfuncs.c | 1 | ||||
-rw-r--r-- | src/backend/nodes/readfuncs.c | 1 | ||||
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 43 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 10 | ||||
-rw-r--r-- | src/backend/optimizer/plan/setrefs.c | 10 | ||||
-rw-r--r-- | src/backend/optimizer/prep/prepqual.c | 1 | ||||
-rw-r--r-- | src/backend/optimizer/util/clauses.c | 64 | ||||
-rw-r--r-- | src/backend/parser/parse_oper.c | 1 | ||||
-rw-r--r-- | src/backend/partitioning/partbounds.c | 1 | ||||
-rw-r--r-- | src/include/catalog/catversion.h | 2 | ||||
-rw-r--r-- | src/include/executor/execExpr.h | 19 | ||||
-rw-r--r-- | src/include/nodes/primnodes.h | 9 | ||||
-rw-r--r-- | src/include/optimizer/optimizer.h | 2 | ||||
-rw-r--r-- | src/test/regress/expected/expressions.out | 118 | ||||
-rw-r--r-- | src/test/regress/sql/expressions.sql | 85 |
21 files changed, 712 insertions, 30 deletions
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 23c0fb9379b..74fcfbea566 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -1149,6 +1149,8 @@ ExecInitExprRec(Expr *node, ExprState *state, FmgrInfo *finfo; FunctionCallInfo fcinfo; AclResult aclresult; + FmgrInfo *hash_finfo; + FunctionCallInfo hash_fcinfo; Assert(list_length(opexpr->args) == 2); scalararg = (Expr *) linitial(opexpr->args); @@ -1163,6 +1165,17 @@ ExecInitExprRec(Expr *node, ExprState *state, get_func_name(opexpr->opfuncid)); InvokeFunctionExecuteHook(opexpr->opfuncid); + if (OidIsValid(opexpr->hashfuncid)) + { + aclresult = pg_proc_aclcheck(opexpr->hashfuncid, + GetUserId(), + ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, + get_func_name(opexpr->hashfuncid)); + InvokeFunctionExecuteHook(opexpr->hashfuncid); + } + /* Set up the primary fmgr lookup information */ finfo = palloc0(sizeof(FmgrInfo)); fcinfo = palloc0(SizeForFunctionCallInfo(2)); @@ -1171,26 +1184,76 @@ ExecInitExprRec(Expr *node, ExprState *state, InitFunctionCallInfoData(*fcinfo, finfo, 2, opexpr->inputcollid, NULL, NULL); - /* Evaluate scalar directly into left function argument */ - ExecInitExprRec(scalararg, state, - &fcinfo->args[0].value, &fcinfo->args[0].isnull); - /* - * Evaluate array argument into our return value. There's no - * danger in that, because the return value is guaranteed to - * be overwritten by EEOP_SCALARARRAYOP, and will not be - * passed to any other expression. + * If hashfuncid is set, we create a EEOP_HASHED_SCALARARRAYOP + * step instead of a EEOP_SCALARARRAYOP. This provides much + * faster lookup performance than the normal linear search + * when the number of items in the array is anything but very + * small. */ - ExecInitExprRec(arrayarg, state, resv, resnull); - - /* And perform the operation */ - scratch.opcode = EEOP_SCALARARRAYOP; - scratch.d.scalararrayop.element_type = InvalidOid; - scratch.d.scalararrayop.useOr = opexpr->useOr; - scratch.d.scalararrayop.finfo = finfo; - scratch.d.scalararrayop.fcinfo_data = fcinfo; - scratch.d.scalararrayop.fn_addr = finfo->fn_addr; - ExprEvalPushStep(state, &scratch); + if (OidIsValid(opexpr->hashfuncid)) + { + hash_finfo = palloc0(sizeof(FmgrInfo)); + hash_fcinfo = palloc0(SizeForFunctionCallInfo(1)); + fmgr_info(opexpr->hashfuncid, hash_finfo); + fmgr_info_set_expr((Node *) node, hash_finfo); + InitFunctionCallInfoData(*hash_fcinfo, hash_finfo, + 1, opexpr->inputcollid, NULL, + NULL); + + scratch.d.hashedscalararrayop.hash_finfo = hash_finfo; + scratch.d.hashedscalararrayop.hash_fcinfo_data = hash_fcinfo; + scratch.d.hashedscalararrayop.hash_fn_addr = hash_finfo->fn_addr; + + /* Evaluate scalar directly into left function argument */ + ExecInitExprRec(scalararg, state, + &fcinfo->args[0].value, &fcinfo->args[0].isnull); + + /* + * Evaluate array argument into our return value. There's + * no danger in that, because the return value is + * guaranteed to be overwritten by + * EEOP_HASHED_SCALARARRAYOP, and will not be passed to + * any other expression. + */ + ExecInitExprRec(arrayarg, state, resv, resnull); + + /* And perform the operation */ + scratch.opcode = EEOP_HASHED_SCALARARRAYOP; + scratch.d.hashedscalararrayop.finfo = finfo; + scratch.d.hashedscalararrayop.fcinfo_data = fcinfo; + scratch.d.hashedscalararrayop.fn_addr = finfo->fn_addr; + + scratch.d.hashedscalararrayop.hash_finfo = hash_finfo; + scratch.d.hashedscalararrayop.hash_fcinfo_data = hash_fcinfo; + scratch.d.hashedscalararrayop.hash_fn_addr = hash_finfo->fn_addr; + + ExprEvalPushStep(state, &scratch); + } + else + { + /* Evaluate scalar directly into left function argument */ + ExecInitExprRec(scalararg, state, + &fcinfo->args[0].value, + &fcinfo->args[0].isnull); + + /* + * Evaluate array argument into our return value. There's + * no danger in that, because the return value is + * guaranteed to be overwritten by EEOP_SCALARARRAYOP, and + * will not be passed to any other expression. + */ + ExecInitExprRec(arrayarg, state, resv, resnull); + + /* And perform the operation */ + scratch.opcode = EEOP_SCALARARRAYOP; + scratch.d.scalararrayop.element_type = InvalidOid; + scratch.d.scalararrayop.useOr = opexpr->useOr; + scratch.d.scalararrayop.finfo = finfo; + scratch.d.scalararrayop.fcinfo_data = fcinfo; + scratch.d.scalararrayop.fn_addr = finfo->fn_addr; + ExprEvalPushStep(state, &scratch); + } break; } diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index 6308286d8c3..07948c1b131 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -179,6 +179,51 @@ static pg_attribute_always_inline void ExecAggPlainTransByRef(AggState *aggstate int setno); /* + * ScalarArrayOpExprHashEntry + * Hash table entry type used during EEOP_HASHED_SCALARARRAYOP + */ +typedef struct ScalarArrayOpExprHashEntry +{ + Datum key; + uint32 status; /* hash status */ + uint32 hash; /* hash value (cached) */ +} ScalarArrayOpExprHashEntry; + +#define SH_PREFIX saophash +#define SH_ELEMENT_TYPE ScalarArrayOpExprHashEntry +#define SH_KEY_TYPE Datum +#define SH_SCOPE static inline +#define SH_DECLARE +#include "lib/simplehash.h" + +static bool saop_hash_element_match(struct saophash_hash *tb, Datum key1, + Datum key2); +static uint32 saop_element_hash(struct saophash_hash *tb, Datum key); + +/* + * ScalarArrayOpExprHashTable + * Hash table for EEOP_HASHED_SCALARARRAYOP + */ +typedef struct ScalarArrayOpExprHashTable +{ + saophash_hash *hashtab; /* underlying hash table */ + struct ExprEvalStep *op; +} ScalarArrayOpExprHashTable; + +/* Define parameters for ScalarArrayOpExpr hash table code generation. */ +#define SH_PREFIX saophash +#define SH_ELEMENT_TYPE ScalarArrayOpExprHashEntry +#define SH_KEY_TYPE Datum +#define SH_KEY key +#define SH_HASH_KEY(tb, key) saop_element_hash(tb, key) +#define SH_EQUAL(tb, a, b) saop_hash_element_match(tb, a, b) +#define SH_SCOPE static inline +#define SH_STORE_HASH +#define SH_GET_HASH(tb, a) a->hash +#define SH_DEFINE +#include "lib/simplehash.h" + +/* * Prepare ExprState for interpreted execution. */ void @@ -426,6 +471,7 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) &&CASE_EEOP_DOMAIN_CHECK, &&CASE_EEOP_CONVERT_ROWTYPE, &&CASE_EEOP_SCALARARRAYOP, + &&CASE_EEOP_HASHED_SCALARARRAYOP, &&CASE_EEOP_XMLEXPR, &&CASE_EEOP_AGGREF, &&CASE_EEOP_GROUPING_FUNC, @@ -1436,6 +1482,14 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) EEO_NEXT(); } + EEO_CASE(EEOP_HASHED_SCALARARRAYOP) + { + /* too complex for an inline implementation */ + ExecEvalHashedScalarArrayOp(state, op, econtext); + + EEO_NEXT(); + } + EEO_CASE(EEOP_DOMAIN_NOTNULL) { /* too complex for an inline implementation */ @@ -3346,6 +3400,214 @@ ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op) } /* + * Hash function for scalar array hash op elements. + * + * We use the element type's default hash opclass, and the column collation + * if the type is collation-sensitive. + */ +static uint32 +saop_element_hash(struct saophash_hash *tb, Datum key) +{ + ScalarArrayOpExprHashTable *elements_tab = (ScalarArrayOpExprHashTable *) tb->private_data; + FunctionCallInfo fcinfo = elements_tab->op->d.hashedscalararrayop.hash_fcinfo_data; + Datum hash; + + fcinfo->args[0].value = key; + fcinfo->args[0].isnull = false; + + hash = elements_tab->op->d.hashedscalararrayop.hash_fn_addr(fcinfo); + + return DatumGetUInt32(hash); +} + +/* + * Matching function for scalar array hash op elements, to be used in hashtable + * lookups. + */ +static bool +saop_hash_element_match(struct saophash_hash *tb, Datum key1, Datum key2) +{ + Datum result; + + ScalarArrayOpExprHashTable *elements_tab = (ScalarArrayOpExprHashTable *) tb->private_data; + FunctionCallInfo fcinfo = elements_tab->op->d.hashedscalararrayop.fcinfo_data; + + fcinfo->args[0].value = key1; + fcinfo->args[0].isnull = false; + fcinfo->args[1].value = key2; + fcinfo->args[1].isnull = false; + + result = elements_tab->op->d.hashedscalararrayop.fn_addr(fcinfo); + + return DatumGetBool(result); +} + +/* + * Evaluate "scalar op ANY (const array)". + * + * Similar to ExecEvalScalarArrayOp, but optimized for faster repeat lookups + * by building a hashtable on the first lookup. This hashtable will be reused + * by subsequent lookups. Unlike ExecEvalScalarArrayOp, this version only + * supports OR semantics. + * + * Source array is in our result area, scalar arg is already evaluated into + * fcinfo->args[0]. + * + * The operator always yields boolean. + */ +void +ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, ExprContext *econtext) +{ + ScalarArrayOpExprHashTable *elements_tab = op->d.hashedscalararrayop.elements_tab; + FunctionCallInfo fcinfo = op->d.hashedscalararrayop.fcinfo_data; + bool strictfunc = op->d.hashedscalararrayop.finfo->fn_strict; + Datum scalar = fcinfo->args[0].value; + bool scalar_isnull = fcinfo->args[0].isnull; + Datum result; + bool resultnull; + bool hashfound; + + /* We don't setup a hashed scalar array op if the array const is null. */ + Assert(!*op->resnull); + + /* + * If the scalar is NULL, and the function is strict, return NULL; no + * point in executing the search. + */ + if (fcinfo->args[0].isnull && strictfunc) + { + *op->resnull = true; + return; + } + + /* Build the hash table on first evaluation */ + if (elements_tab == NULL) + { + int16 typlen; + bool typbyval; + char typalign; + int nitems; + bool has_nulls = false; + char *s; + bits8 *bitmap; + int bitmask; + MemoryContext oldcontext; + ArrayType *arr; + + arr = DatumGetArrayTypeP(*op->resvalue); + nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); + + get_typlenbyvalalign(ARR_ELEMTYPE(arr), + &typlen, + &typbyval, + &typalign); + + oldcontext = MemoryContextSwitchTo(econtext->ecxt_per_query_memory); + + elements_tab = (ScalarArrayOpExprHashTable *) + palloc(sizeof(ScalarArrayOpExprHashTable)); + op->d.hashedscalararrayop.elements_tab = elements_tab; + elements_tab->op = op; + + /* + * Create the hash table sizing it according to the number of elements + * in the array. This does assume that the array has no duplicates. + * If the array happens to contain many duplicate values then it'll + * just mean that we sized the table a bit on the large side. + */ + elements_tab->hashtab = saophash_create(CurrentMemoryContext, nitems, + elements_tab); + + MemoryContextSwitchTo(oldcontext); + + s = (char *) ARR_DATA_PTR(arr); + bitmap = ARR_NULLBITMAP(arr); + bitmask = 1; + for (int i = 0; i < nitems; i++) + { + /* Get array element, checking for NULL. */ + if (bitmap && (*bitmap & bitmask) == 0) + { + has_nulls = true; + } + else + { + Datum element; + + element = fetch_att(s, typbyval, typlen); + s = att_addlength_pointer(s, typlen, s); + s = (char *) att_align_nominal(s, typalign); + + saophash_insert(elements_tab->hashtab, element, &hashfound); + } + + /* Advance bitmap pointer if any. */ + if (bitmap) + { + bitmask <<= 1; + if (bitmask == 0x100) + { + bitmap++; + bitmask = 1; + } + } + } + + /* + * Remember if we had any nulls so that we know if we need to execute + * non-strict functions with a null lhs value if no match is found. + */ + op->d.hashedscalararrayop.has_nulls = has_nulls; + } + + /* Check the hash to see if we have a match. */ + hashfound = NULL != saophash_lookup(elements_tab->hashtab, scalar); + + result = BoolGetDatum(hashfound); + resultnull = false; + + /* + * If we didn't find a match in the array, we still might need to handle + * the possibility of null values. We didn't put any NULLs into the + * hashtable, but instead marked if we found any when building the table + * in has_nulls. + */ + if (!DatumGetBool(result) && op->d.hashedscalararrayop.has_nulls) + { + if (strictfunc) + { + + /* + * We have nulls in the array so a non-null lhs and no match must + * yield NULL. + */ + result = (Datum) 0; + resultnull = true; + } + else + { + /* + * Execute function will null rhs just once. + * + * The hash lookup path will have scribbled on the lhs argument so + * we need to set it up also (even though we entered this function + * with it already set). + */ + fcinfo->args[0].value = scalar; + fcinfo->args[0].isnull = scalar_isnull; + fcinfo->args[1].value = (Datum) 0; + fcinfo->args[1].isnull = true; + + result = op->d.hashedscalararrayop.fn_addr(fcinfo); + resultnull = fcinfo->isnull; + } + } + + *op->resvalue = result; + *op->resnull = resultnull; +} + +/* * Evaluate a NOT NULL domain constraint. */ void diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c index 42bf4754c52..0f9cc790c7d 100644 --- a/src/backend/jit/llvm/llvmjit_expr.c +++ b/src/backend/jit/llvm/llvmjit_expr.c @@ -1836,6 +1836,12 @@ llvm_compile_expr(ExprState *state) LLVMBuildBr(b, opblocks[opno + 1]); break; + case EEOP_HASHED_SCALARARRAYOP: + build_EvalXFunc(b, mod, "ExecEvalHashedScalarArrayOp", + v_state, op, v_econtext); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + case EEOP_XMLEXPR: build_EvalXFunc(b, mod, "ExecEvalXmlExpr", v_state, op); diff --git a/src/backend/jit/llvm/llvmjit_types.c b/src/backend/jit/llvm/llvmjit_types.c index 8bc58b641cf..2deb65c5b5c 100644 --- a/src/backend/jit/llvm/llvmjit_types.c +++ b/src/backend/jit/llvm/llvmjit_types.c @@ -126,6 +126,7 @@ void *referenced_functions[] = ExecEvalRowNull, ExecEvalSQLValueFunction, ExecEvalScalarArrayOp, + ExecEvalHashedScalarArrayOp, ExecEvalSubPlan, ExecEvalSysVar, ExecEvalWholeRowVar, diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index fcc5ebb206f..632cc31a045 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1716,6 +1716,7 @@ _copyScalarArrayOpExpr(const ScalarArrayOpExpr *from) COPY_SCALAR_FIELD(opno); COPY_SCALAR_FIELD(opfuncid); + COPY_SCALAR_FIELD(hashfuncid); COPY_SCALAR_FIELD(useOr); COPY_SCALAR_FIELD(inputcollid); COPY_NODE_FIELD(args); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 936365e09a8..a410a29a178 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -408,6 +408,12 @@ _equalScalarArrayOpExpr(const ScalarArrayOpExpr *a, const ScalarArrayOpExpr *b) b->opfuncid != 0) return false; + /* As above, hashfuncid may differ too */ + if (a->hashfuncid != b->hashfuncid && + a->hashfuncid != 0 && + b->hashfuncid != 0) + return false; + COMPARE_SCALAR_FIELD(useOr); COMPARE_SCALAR_FIELD(inputcollid); COMPARE_NODE_FIELD(args); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 4a8dc2d86dc..c723f6d635f 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1309,6 +1309,7 @@ _outScalarArrayOpExpr(StringInfo str, const ScalarArrayOpExpr *node) WRITE_OID_FIELD(opno); WRITE_OID_FIELD(opfuncid); + WRITE_OID_FIELD(hashfuncid); WRITE_BOOL_FIELD(useOr); WRITE_OID_FIELD(inputcollid); WRITE_NODE_FIELD(args); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 99247278513..3746668f526 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -831,6 +831,7 @@ _readScalarArrayOpExpr(void) READ_OID_FIELD(opno); READ_OID_FIELD(opfuncid); + READ_OID_FIELD(hashfuncid); READ_BOOL_FIELD(useOr); READ_OID_FIELD(inputcollid); READ_NODE_FIELD(args); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 05686d01942..8577c7b1389 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -4436,21 +4436,50 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context) } else if (IsA(node, ScalarArrayOpExpr)) { - /* - * Estimate that the operator will be applied to about half of the - * array elements before the answer is determined. - */ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node; Node *arraynode = (Node *) lsecond(saop->args); QualCost sacosts; + QualCost hcosts; + int estarraylen = estimate_array_length(arraynode); set_sa_opfuncid(saop); sacosts.startup = sacosts.per_tuple = 0; add_function_cost(context->root, saop->opfuncid, NULL, &sacosts); - context->total.startup += sacosts.startup; - context->total.per_tuple += sacosts.per_tuple * - estimate_array_length(arraynode) * 0.5; + + if (OidIsValid(saop->hashfuncid)) + { + /* Handle costs for hashed ScalarArrayOpExpr */ + hcosts.startup = hcosts.per_tuple = 0; + + add_function_cost(context->root, saop->hashfuncid, NULL, &hcosts); + context->total.startup += sacosts.startup + hcosts.startup; + + /* Estimate the cost of building the hashtable. */ + context->total.startup += estarraylen * hcosts.per_tuple; + + /* + * XXX should we charge a little bit for sacosts.per_tuple when + * building the table, or is it ok to assume there will be zero + * hash collision? + */ + + /* + * Charge for hashtable lookups. Charge a single hash and a + * single comparison. + */ + context->total.per_tuple += hcosts.per_tuple + sacosts.per_tuple; + } + else + { + /* + * Estimate that the operator will be applied to about half of the + * array elements before the answer is determined. + */ + context->total.startup += sacosts.startup; + context->total.per_tuple += sacosts.per_tuple * + estimate_array_length(arraynode) * 0.5; + } } else if (IsA(node, Aggref) || IsA(node, WindowFunc)) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 898d7fcb0bd..1868c4eff47 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -1110,6 +1110,16 @@ preprocess_expression(PlannerInfo *root, Node *expr, int kind) #endif } + /* + * Check for ANY ScalarArrayOpExpr with Const arrays and set the + * hashfuncid of any that might execute more quickly by using hash lookups + * instead of a linear search. + */ + if (kind == EXPRKIND_QUAL || kind == EXPRKIND_TARGET) + { + convert_saop_to_hashed_saop(expr); + } + /* Expand SubLinks to SubPlans */ if (root->parse->hasSubLinks) expr = SS_process_sublinks(root, expr, (kind == EXPRKIND_QUAL)); diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 70c0fa07e6e..9f40ed77e64 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -1674,9 +1674,13 @@ fix_expr_common(PlannerInfo *root, Node *node) } else if (IsA(node, ScalarArrayOpExpr)) { - set_sa_opfuncid((ScalarArrayOpExpr *) node); - record_plan_function_dependency(root, - ((ScalarArrayOpExpr *) node)->opfuncid); + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node; + + set_sa_opfuncid(saop); + record_plan_function_dependency(root, saop->opfuncid); + + if (!OidIsValid(saop->hashfuncid)) + record_plan_function_dependency(root, saop->hashfuncid); } else if (IsA(node, Const)) { diff --git a/src/backend/optimizer/prep/prepqual.c b/src/backend/optimizer/prep/prepqual.c index 8d4dc9cd105..42c3e4dc046 100644 --- a/src/backend/optimizer/prep/prepqual.c +++ b/src/backend/optimizer/prep/prepqual.c @@ -127,6 +127,7 @@ negate_clause(Node *node) newopexpr->opno = negator; newopexpr->opfuncid = InvalidOid; + newopexpr->hashfuncid = InvalidOid; newopexpr->useOr = !saopexpr->useOr; newopexpr->inputcollid = saopexpr->inputcollid; newopexpr->args = saopexpr->args; diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 9a6e3dab834..526997327c6 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -106,6 +106,7 @@ static bool contain_leaked_vars_walker(Node *node, void *context); static Relids find_nonnullable_rels_walker(Node *node, bool top_level); static List *find_nonnullable_vars_walker(Node *node, bool top_level); static bool is_strict_saop(ScalarArrayOpExpr *expr, bool falseOK); +static bool convert_saop_to_hashed_saop_walker(Node *node, void *context); static Node *eval_const_expressions_mutator(Node *node, eval_const_expressions_context *context); static bool contain_non_const_walker(Node *node, void *context); @@ -2101,6 +2102,69 @@ eval_const_expressions(PlannerInfo *root, Node *node) return eval_const_expressions_mutator(node, &context); } +#define MIN_ARRAY_SIZE_FOR_HASHED_SAOP 9 +/*-------------------- + * convert_saop_to_hashed_saop + * + * Recursively search 'node' for ScalarArrayOpExprs and fill in the hash + * function for any ScalarArrayOpExpr that looks like it would be useful to + * evaluate using a hash table rather than a linear search. + * + * We'll use a hash table if all of the following conditions are met: + * 1. The 2nd argument of the array contain only Consts. + * 2. useOr is true. + * 3. There's valid hash function for both left and righthand operands and + * these hash functions are the same. + * 4. If the array contains enough elements for us to consider it to be + * worthwhile using a hash table rather than a linear search. + */ +void +convert_saop_to_hashed_saop(Node *node) +{ + (void) convert_saop_to_hashed_saop_walker(node, NULL); +} + +static bool +convert_saop_to_hashed_saop_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + + if (IsA(node, ScalarArrayOpExpr)) + { + ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node; + Expr *arrayarg = (Expr *) lsecond(saop->args); + Oid lefthashfunc; + Oid righthashfunc; + + if (saop->useOr && arrayarg && IsA(arrayarg, Const) && + !((Const *) arrayarg)->constisnull && + get_op_hash_functions(saop->opno, &lefthashfunc, &righthashfunc) && + lefthashfunc == righthashfunc) + { + Datum arrdatum = ((Const *) arrayarg)->constvalue; + ArrayType *arr = (ArrayType *) DatumGetPointer(arrdatum); + int nitems; + + /* + * Only fill in the hash functions if the array looks large enough + * for it to be worth hashing instead of doing a linear search. + */ + nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); + + if (nitems >= MIN_ARRAY_SIZE_FOR_HASHED_SAOP) + { + /* Looks good. Fill in the hash functions */ + saop->hashfuncid = lefthashfunc; + } + return true; + } + } + + return expression_tree_walker(node, convert_saop_to_hashed_saop_walker, NULL); +} + + /*-------------------- * estimate_expression_value * diff --git a/src/backend/parser/parse_oper.c b/src/backend/parser/parse_oper.c index 24013bcac9c..c379d72fcec 100644 --- a/src/backend/parser/parse_oper.c +++ b/src/backend/parser/parse_oper.c @@ -894,6 +894,7 @@ make_scalar_array_op(ParseState *pstate, List *opname, result = makeNode(ScalarArrayOpExpr); result->opno = oprid(tup); result->opfuncid = opform->oprcode; + result->hashfuncid = InvalidOid; result->useOr = useOr; /* inputcollid will be set by parse_collate.c */ result->args = args; diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index bfa6e27e9bb..1290d45963a 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -3812,6 +3812,7 @@ make_partition_op_expr(PartitionKey key, int keynum, saopexpr = makeNode(ScalarArrayOpExpr); saopexpr->opno = operoid; saopexpr->opfuncid = get_opcode(operoid); + saopexpr->hashfuncid = InvalidOid; saopexpr->useOr = true; saopexpr->inputcollid = key->partcollation[keynum]; saopexpr->args = list_make2(arg1, arrexpr); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index abd4bffff5f..45722fdbb13 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202104072 +#define CATALOG_VERSION_NO 202104081 #endif diff --git a/src/include/executor/execExpr.h b/src/include/executor/execExpr.h index 1b7f9865b0a..2449cde7ad3 100644 --- a/src/include/executor/execExpr.h +++ b/src/include/executor/execExpr.h @@ -20,6 +20,7 @@ /* forward references to avoid circularity */ struct ExprEvalStep; struct SubscriptingRefState; +struct ScalarArrayOpExprHashTable; /* Bits in ExprState->flags (see also execnodes.h for public flag bits): */ /* expression's interpreter has been initialized */ @@ -218,6 +219,7 @@ typedef enum ExprEvalOp /* evaluate assorted special-purpose expression types */ EEOP_CONVERT_ROWTYPE, EEOP_SCALARARRAYOP, + EEOP_HASHED_SCALARARRAYOP, EEOP_XMLEXPR, EEOP_AGGREF, EEOP_GROUPING_FUNC, @@ -554,6 +556,21 @@ typedef struct ExprEvalStep PGFunction fn_addr; /* actual call address */ } scalararrayop; + /* for EEOP_HASHED_SCALARARRAYOP */ + struct + { + bool has_nulls; + struct ScalarArrayOpExprHashTable *elements_tab; + FmgrInfo *finfo; /* function's lookup data */ + FunctionCallInfo fcinfo_data; /* arguments etc */ + /* faster to access without additional indirection: */ + PGFunction fn_addr; /* actual call address */ + FmgrInfo *hash_finfo; /* function's lookup data */ + FunctionCallInfo hash_fcinfo_data; /* arguments etc */ + /* faster to access without additional indirection: */ + PGFunction hash_fn_addr; /* actual call address */ + } hashedscalararrayop; + /* for EEOP_XMLEXPR */ struct { @@ -725,6 +742,8 @@ extern void ExecEvalFieldStoreForm(ExprState *state, ExprEvalStep *op, extern void ExecEvalConvertRowtype(ExprState *state, ExprEvalStep *op, ExprContext *econtext); extern void ExecEvalScalarArrayOp(ExprState *state, ExprEvalStep *op); +extern void ExecEvalHashedScalarArrayOp(ExprState *state, ExprEvalStep *op, + ExprContext *econtext); extern void ExecEvalConstraintNotNull(ExprState *state, ExprEvalStep *op); extern void ExecEvalConstraintCheck(ExprState *state, ExprEvalStep *op); extern void ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op); diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index f2ac4e51f16..9ae851d8477 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -578,12 +578,19 @@ typedef OpExpr NullIfExpr; * is almost the same as for the underlying operator, but we need a useOr * flag to remember whether it's ANY or ALL, and we don't have to store * the result type (or the collation) because it must be boolean. + * + * A ScalarArrayOpExpr with a valid hashfuncid is evaluated during execution + * by building a hash table containing the Const values from the rhs arg. + * This table is probed during expression evaluation. Only useOr=true + * ScalarArrayOpExpr with Const arrays on the rhs can have the hashfuncid + * field set. See convert_saop_to_hashed_saop(). */ typedef struct ScalarArrayOpExpr { Expr xpr; Oid opno; /* PG_OPERATOR OID of the operator */ - Oid opfuncid; /* PG_PROC OID of underlying function */ + Oid opfuncid; /* PG_PROC OID of comparison function */ + Oid hashfuncid; /* PG_PROC OID of hash func or InvalidOid */ bool useOr; /* true for ANY, false for ALL */ Oid inputcollid; /* OID of collation that operator should use */ List *args; /* the scalar and array operands */ diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index d587952b7d6..68ebb84bf5d 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -146,6 +146,8 @@ extern bool contain_volatile_functions_not_nextval(Node *clause); extern Node *eval_const_expressions(PlannerInfo *root, Node *node); +extern void convert_saop_to_hashed_saop(Node *node); + extern Node *estimate_expression_value(PlannerInfo *root, Node *node); extern Expr *evaluate_expr(Expr *expr, Oid result_type, int32 result_typmod, diff --git a/src/test/regress/expected/expressions.out b/src/test/regress/expected/expressions.out index 05a6eb07b2e..5944dfd5e1a 100644 --- a/src/test/regress/expected/expressions.out +++ b/src/test/regress/expected/expressions.out @@ -158,3 +158,121 @@ select count(*) from date_tbl 13 (1 row) +-- +-- Tests for ScalarArrayOpExpr with a hashfn +-- +-- create a stable function so that the tests below are not +-- evaluated using the planner's constant folding. +begin; +create function return_int_input(int) returns int as $$ +begin + return $1; +end; +$$ language plpgsql stable; +create function return_text_input(text) returns text as $$ +begin + return $1; +end; +$$ language plpgsql stable; +select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1); + ?column? +---------- + t +(1 row) + +select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, null); + ?column? +---------- + +(1 row) + +select return_int_input(1) in (null, null, null, null, null, null, null, null, null, null, null); + ?column? +---------- + +(1 row) + +select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null); + ?column? +---------- + t +(1 row) + +select return_int_input(null::int) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1); + ?column? +---------- + +(1 row) + +select return_int_input(null::int) in (10, 9, 2, 8, 3, 7, 4, 6, 5, null); + ?column? +---------- + +(1 row) + +select return_text_input('a') in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'); + ?column? +---------- + t +(1 row) + +rollback; +-- Test with non-strict equality function. +-- We need to create our own type for this. +begin; +create type myint; +create function myintin(cstring) returns myint strict immutable language + internal as 'int4in'; +NOTICE: return type myint is only a shell +create function myintout(myint) returns cstring strict immutable language + internal as 'int4out'; +NOTICE: argument type myint is only a shell +create function myinthash(myint) returns integer strict immutable language + internal as 'hashint4'; +NOTICE: argument type myint is only a shell +create type myint (input = myintin, output = myintout, like = int4); +create cast (int4 as myint) without function; +create cast (myint as int4) without function; +create function myinteq(myint, myint) returns bool as $$ +begin + if $1 is null and $2 is null then + return true; + else + return $1::int = $2::int; + end if; +end; +$$ language plpgsql immutable; +create operator = ( + leftarg = myint, + rightarg = myint, + commutator = =, + negator = <>, + procedure = myinteq, + restrict = eqsel, + join = eqjoinsel, + merges +); +create operator class myint_ops +default for type myint using hash as + operator 1 = (myint, myint), + function 1 myinthash(myint); +create table inttest (a myint); +insert into inttest values(1::myint),(null); +-- try an array with enough elements to cause hashing +select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); + a +--- + 1 + +(2 rows) + +-- ensure the result matched with the non-hashed version. We simply remove +-- some array elements so that we don't reach the hashing threshold. +select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint, null); + a +--- + 1 + +(2 rows) + +rollback; diff --git a/src/test/regress/sql/expressions.sql b/src/test/regress/sql/expressions.sql index 1ca8bb151c8..b3fd1b5ecba 100644 --- a/src/test/regress/sql/expressions.sql +++ b/src/test/regress/sql/expressions.sql @@ -65,3 +65,88 @@ select count(*) from date_tbl where f1 not between symmetric '1997-01-01' and '1998-01-01'; select count(*) from date_tbl where f1 not between symmetric '1997-01-01' and '1998-01-01'; + +-- +-- Tests for ScalarArrayOpExpr with a hashfn +-- + +-- create a stable function so that the tests below are not +-- evaluated using the planner's constant folding. +begin; + +create function return_int_input(int) returns int as $$ +begin + return $1; +end; +$$ language plpgsql stable; + +create function return_text_input(text) returns text as $$ +begin + return $1; +end; +$$ language plpgsql stable; + +select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1); +select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, null); +select return_int_input(1) in (null, null, null, null, null, null, null, null, null, null, null); +select return_int_input(1) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1, null); +select return_int_input(null::int) in (10, 9, 2, 8, 3, 7, 4, 6, 5, 1); +select return_int_input(null::int) in (10, 9, 2, 8, 3, 7, 4, 6, 5, null); +select return_text_input('a') in ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'); + +rollback; + +-- Test with non-strict equality function. +-- We need to create our own type for this. + +begin; + +create type myint; +create function myintin(cstring) returns myint strict immutable language + internal as 'int4in'; +create function myintout(myint) returns cstring strict immutable language + internal as 'int4out'; +create function myinthash(myint) returns integer strict immutable language + internal as 'hashint4'; + +create type myint (input = myintin, output = myintout, like = int4); + +create cast (int4 as myint) without function; +create cast (myint as int4) without function; + +create function myinteq(myint, myint) returns bool as $$ +begin + if $1 is null and $2 is null then + return true; + else + return $1::int = $2::int; + end if; +end; +$$ language plpgsql immutable; + +create operator = ( + leftarg = myint, + rightarg = myint, + commutator = =, + negator = <>, + procedure = myinteq, + restrict = eqsel, + join = eqjoinsel, + merges +); + +create operator class myint_ops +default for type myint using hash as + operator 1 = (myint, myint), + function 1 myinthash(myint); + +create table inttest (a myint); +insert into inttest values(1::myint),(null); + +-- try an array with enough elements to cause hashing +select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint,6::myint,7::myint,8::myint,9::myint, null); +-- ensure the result matched with the non-hashed version. We simply remove +-- some array elements so that we don't reach the hashing threshold. +select * from inttest where a in (1::myint,2::myint,3::myint,4::myint,5::myint, null); + +rollback; |