diff options
Diffstat (limited to 'src')
53 files changed, 3934 insertions, 615 deletions
diff --git a/src/backend/catalog/pg_aggregate.c b/src/backend/catalog/pg_aggregate.c index 9dbec508a0d..7248e6789ca 100644 --- a/src/backend/catalog/pg_aggregate.c +++ b/src/backend/catalog/pg_aggregate.c @@ -36,6 +36,7 @@ static Oid lookup_agg_function(List *fnName, int nargs, Oid *input_types, + Oid variadicArgType, Oid *rettype); @@ -45,12 +46,15 @@ static Oid lookup_agg_function(List *fnName, int nargs, Oid *input_types, Oid AggregateCreate(const char *aggName, Oid aggNamespace, + char aggKind, int numArgs, + int numDirectArgs, oidvector *parameterTypes, Datum allParameterTypes, Datum parameterModes, Datum parameterNames, List *parameterDefaults, + Oid variadicArgType, List *aggtransfnName, List *aggfinalfnName, List *aggsortopName, @@ -71,7 +75,7 @@ AggregateCreate(const char *aggName, bool hasInternalArg; Oid rettype; Oid finaltype; - Oid *fnArgs; + Oid fnArgs[FUNC_MAX_ARGS]; int nargs_transfn; Oid procOid; TupleDesc tupDesc; @@ -87,6 +91,22 @@ AggregateCreate(const char *aggName, if (!aggtransfnName) elog(ERROR, "aggregate must have a transition function"); + if (numDirectArgs < 0 || numDirectArgs > numArgs) + elog(ERROR, "incorrect number of direct args for aggregate"); + + /* + * Aggregates can have at most FUNC_MAX_ARGS-1 args, else the transfn + * and/or finalfn will be unrepresentable in pg_proc. We must check now + * to protect fixed-size arrays here and possibly in called functions. + */ + if (numArgs < 0 || numArgs > FUNC_MAX_ARGS - 1) + ereport(ERROR, + (errcode(ERRCODE_TOO_MANY_ARGUMENTS), + errmsg_plural("aggregates cannot have more than %d argument", + "aggregates cannot have more than %d arguments", + FUNC_MAX_ARGS - 1, + FUNC_MAX_ARGS - 1))); + /* check for polymorphic and INTERNAL arguments */ hasPolyArg = false; hasInternalArg = false; @@ -108,12 +128,75 @@ AggregateCreate(const char *aggName, errmsg("cannot determine transition data type"), errdetail("An aggregate using a polymorphic transition type must have at least one polymorphic argument."))); - /* find the transfn */ - nargs_transfn = numArgs + 1; - fnArgs = (Oid *) palloc(nargs_transfn * sizeof(Oid)); - fnArgs[0] = aggTransType; - memcpy(fnArgs + 1, aggArgTypes, numArgs * sizeof(Oid)); - transfn = lookup_agg_function(aggtransfnName, nargs_transfn, fnArgs, + /* + * An ordered-set aggregate that is VARIADIC must be VARIADIC ANY. In + * principle we could support regular variadic types, but it would make + * things much more complicated because we'd have to assemble the correct + * subsets of arguments into array values. Since no standard aggregates + * have use for such a case, we aren't bothering for now. + */ + if (AGGKIND_IS_ORDERED_SET(aggKind) && OidIsValid(variadicArgType) && + variadicArgType != ANYOID) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("a variadic ordered-set aggregate must use VARIADIC type ANY"))); + + /* + * If it's a hypothetical-set aggregate, there must be at least as many + * direct arguments as aggregated ones, and the last N direct arguments + * must match the aggregated ones in type. (We have to check this again + * when the aggregate is called, in case ANY is involved, but it makes + * sense to reject the aggregate definition now if the declared arg types + * don't match up.) It's unconditionally OK if numDirectArgs == numArgs, + * indicating that the grammar merged identical VARIADIC entries from both + * lists. Otherwise, if the agg is VARIADIC, then we had VARIADIC only on + * the aggregated side, which is not OK. Otherwise, insist on the last N + * parameter types on each side matching exactly. + */ + if (aggKind == AGGKIND_HYPOTHETICAL && + numDirectArgs < numArgs) + { + int numAggregatedArgs = numArgs - numDirectArgs; + + if (OidIsValid(variadicArgType) || + numDirectArgs < numAggregatedArgs || + memcmp(aggArgTypes + (numDirectArgs - numAggregatedArgs), + aggArgTypes + numDirectArgs, + numAggregatedArgs * sizeof(Oid)) != 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("a hypothetical-set aggregate must have direct arguments matching its aggregated arguments"))); + } + + /* + * Find the transfn. For ordinary aggs, it takes the transtype plus all + * aggregate arguments. For ordered-set aggs, it takes the transtype plus + * all aggregated args, but not direct args. However, we have to treat + * specially the case where a trailing VARIADIC item is considered to + * cover both direct and aggregated args. + */ + if (AGGKIND_IS_ORDERED_SET(aggKind)) + { + if (numDirectArgs < numArgs) + nargs_transfn = numArgs - numDirectArgs + 1; + else + { + /* special case with VARIADIC last arg */ + Assert(variadicArgType != InvalidOid); + nargs_transfn = 2; + } + fnArgs[0] = aggTransType; + memcpy(fnArgs + 1, aggArgTypes + (numArgs - (nargs_transfn - 1)), + (nargs_transfn - 1) * sizeof(Oid)); + } + else + { + nargs_transfn = numArgs + 1; + fnArgs[0] = aggTransType; + memcpy(fnArgs + 1, aggArgTypes, numArgs * sizeof(Oid)); + } + transfn = lookup_agg_function(aggtransfnName, nargs_transfn, + fnArgs, variadicArgType, &rettype); /* @@ -156,9 +239,44 @@ AggregateCreate(const char *aggName, /* handle finalfn, if supplied */ if (aggfinalfnName) { + int nargs_finalfn; + + /* + * For ordinary aggs, the finalfn just takes the transtype. For + * ordered-set aggs, it takes the transtype plus all args. (The + * aggregated args are useless at runtime, and are actually passed as + * NULLs, but we may need them in the function signature to allow + * resolution of a polymorphic agg's result type.) + */ fnArgs[0] = aggTransType; - finalfn = lookup_agg_function(aggfinalfnName, 1, fnArgs, + if (AGGKIND_IS_ORDERED_SET(aggKind)) + { + nargs_finalfn = numArgs + 1; + memcpy(fnArgs + 1, aggArgTypes, numArgs * sizeof(Oid)); + } + else + { + nargs_finalfn = 1; + /* variadic-ness of the aggregate doesn't affect finalfn */ + variadicArgType = InvalidOid; + } + finalfn = lookup_agg_function(aggfinalfnName, nargs_finalfn, + fnArgs, variadicArgType, &finaltype); + + /* + * The finalfn of an ordered-set agg will certainly be passed at least + * one null argument, so complain if it's strict. Nothing bad would + * happen at runtime (you'd just get a null result), but it's surely + * not what the user wants, so let's complain now. + * + * Note: it's likely that a strict transfn would also be a mistake, + * but the case isn't quite so airtight, so we let that pass. + */ + if (AGGKIND_IS_ORDERED_SET(aggKind) && func_strict(finalfn)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("final function of an ordered-set aggregate must not be declared STRICT"))); } else { @@ -270,6 +388,8 @@ AggregateCreate(const char *aggName, values[i] = (Datum) NULL; } values[Anum_pg_aggregate_aggfnoid - 1] = ObjectIdGetDatum(procOid); + values[Anum_pg_aggregate_aggkind - 1] = CharGetDatum(aggKind); + values[Anum_pg_aggregate_aggnumdirectargs - 1] = Int16GetDatum(numDirectArgs); values[Anum_pg_aggregate_aggtransfn - 1] = ObjectIdGetDatum(transfn); values[Anum_pg_aggregate_aggfinalfn - 1] = ObjectIdGetDatum(finalfn); values[Anum_pg_aggregate_aggsortop - 1] = ObjectIdGetDatum(sortop); @@ -333,6 +453,7 @@ static Oid lookup_agg_function(List *fnName, int nargs, Oid *input_types, + Oid variadicArgType, Oid *rettype) { Oid fnOid; @@ -372,6 +493,21 @@ lookup_agg_function(List *fnName, NIL, input_types)))); /* + * If the agg is declared to take VARIADIC ANY, the underlying functions + * had better be declared that way too, else they may receive too many + * parameters; but func_get_detail would have been happy with plain ANY. + * (Probably nothing very bad would happen, but it wouldn't work as the + * user expects.) Other combinations should work without any special + * pushups, given that we told func_get_detail not to expand VARIADIC. + */ + if (variadicArgType == ANYOID && vatype != ANYOID) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("function %s must accept VARIADIC ANY to be used in this aggregate", + func_signature_string(fnName, nargs, + NIL, input_types)))); + + /* * If there are any polymorphic types involved, enforce consistency, and * possibly refine the result type. It's OK if the result is still * polymorphic at this point, though. @@ -388,8 +524,7 @@ lookup_agg_function(List *fnName, */ for (i = 0; i < nargs; i++) { - if (!IsPolymorphicType(true_oid_array[i]) && - !IsBinaryCoercible(input_types[i], true_oid_array[i])) + if (!IsBinaryCoercible(input_types[i], true_oid_array[i])) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), errmsg("function %s requires run-time type coercion", diff --git a/src/backend/commands/aggregatecmds.c b/src/backend/commands/aggregatecmds.c index 6fc3e045492..b570d3e5fa5 100644 --- a/src/backend/commands/aggregatecmds.c +++ b/src/backend/commands/aggregatecmds.c @@ -45,7 +45,10 @@ * * "oldstyle" signals the old (pre-8.2) style where the aggregate input type * is specified by a BASETYPE element in the parameters. Otherwise, - * "args" is a list of FunctionParameter structs defining the agg's arguments. + * "args" is a pair, whose first element is a list of FunctionParameter structs + * defining the agg's arguments (both direct and aggregated), and whose second + * element is an Integer node with the number of direct args, or -1 if this + * isn't an ordered-set aggregate. * "parameters" is a list of DefElem representing the agg's definition clauses. */ Oid @@ -55,6 +58,7 @@ DefineAggregate(List *name, List *args, bool oldstyle, List *parameters, char *aggName; Oid aggNamespace; AclResult aclresult; + char aggKind = AGGKIND_NORMAL; List *transfuncName = NIL; List *finalfuncName = NIL; List *sortoperatorName = NIL; @@ -63,11 +67,13 @@ DefineAggregate(List *name, List *args, bool oldstyle, List *parameters, int32 transSpace = 0; char *initval = NULL; int numArgs; + int numDirectArgs = 0; oidvector *parameterTypes; ArrayType *allParameterTypes; ArrayType *parameterModes; ArrayType *parameterNames; List *parameterDefaults; + Oid variadicArgType; Oid transTypeId; char transTypeType; ListCell *pl; @@ -81,6 +87,19 @@ DefineAggregate(List *name, List *args, bool oldstyle, List *parameters, aclcheck_error(aclresult, ACL_KIND_NAMESPACE, get_namespace_name(aggNamespace)); + /* Deconstruct the output of the aggr_args grammar production */ + if (!oldstyle) + { + Assert(list_length(args) == 2); + numDirectArgs = intVal(lsecond(args)); + if (numDirectArgs >= 0) + aggKind = AGGKIND_ORDERED_SET; + else + numDirectArgs = 0; + args = (List *) linitial(args); + } + + /* Examine aggregate's definition clauses */ foreach(pl, parameters) { DefElem *defel = (DefElem *) lfirst(pl); @@ -99,6 +118,17 @@ DefineAggregate(List *name, List *args, bool oldstyle, List *parameters, sortoperatorName = defGetQualifiedName(defel); else if (pg_strcasecmp(defel->defname, "basetype") == 0) baseType = defGetTypeName(defel); + else if (pg_strcasecmp(defel->defname, "hypothetical") == 0) + { + if (defGetBoolean(defel)) + { + if (aggKind == AGGKIND_NORMAL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), + errmsg("only ordered-set aggregates can be hypothetical"))); + aggKind = AGGKIND_HYPOTHETICAL; + } + } else if (pg_strcasecmp(defel->defname, "stype") == 0) transType = defGetTypeName(defel); else if (pg_strcasecmp(defel->defname, "stype1") == 0) @@ -162,6 +192,7 @@ DefineAggregate(List *name, List *args, bool oldstyle, List *parameters, parameterModes = NULL; parameterNames = NULL; parameterDefaults = NIL; + variadicArgType = InvalidOid; } else { @@ -186,6 +217,7 @@ DefineAggregate(List *name, List *args, bool oldstyle, List *parameters, ¶meterModes, ¶meterNames, ¶meterDefaults, + &variadicArgType, &requiredResultType); /* Parameter defaults are not currently allowed by the grammar */ Assert(parameterDefaults == NIL); @@ -241,12 +273,15 @@ DefineAggregate(List *name, List *args, bool oldstyle, List *parameters, */ return AggregateCreate(aggName, /* aggregate name */ aggNamespace, /* namespace */ + aggKind, numArgs, + numDirectArgs, parameterTypes, PointerGetDatum(allParameterTypes), PointerGetDatum(parameterModes), PointerGetDatum(parameterNames), parameterDefaults, + variadicArgType, transfuncName, /* step function name */ finalfuncName, /* final function name */ sortoperatorName, /* sort operator name */ diff --git a/src/backend/commands/functioncmds.c b/src/backend/commands/functioncmds.c index ca754b47ff9..49b046597cf 100644 --- a/src/backend/commands/functioncmds.c +++ b/src/backend/commands/functioncmds.c @@ -168,6 +168,8 @@ compute_return_type(TypeName *returnType, Oid languageOid, * * Results are stored into output parameters. parameterTypes must always * be created, but the other arrays are set to NULL if not needed. + * variadicArgType is set to the variadic array type if there's a VARIADIC + * parameter (there can be only one); or to InvalidOid if not. * requiredResultType is set to InvalidOid if there are no OUT parameters, * else it is set to the OID of the implied result type. */ @@ -181,6 +183,7 @@ interpret_function_parameter_list(List *parameters, ArrayType **parameterModes, ArrayType **parameterNames, List **parameterDefaults, + Oid *variadicArgType, Oid *requiredResultType) { int parameterCount = list_length(parameters); @@ -197,6 +200,7 @@ interpret_function_parameter_list(List *parameters, int i; ParseState *pstate; + *variadicArgType = InvalidOid; /* default result */ *requiredResultType = InvalidOid; /* default result */ inTypes = (Oid *) palloc(parameterCount * sizeof(Oid)); @@ -293,6 +297,7 @@ interpret_function_parameter_list(List *parameters, if (fp->mode == FUNC_PARAM_VARIADIC) { + *variadicArgType = toid; varCount++; /* validate variadic parameter type */ switch (toid) @@ -823,6 +828,7 @@ CreateFunction(CreateFunctionStmt *stmt, const char *queryString) ArrayType *parameterModes; ArrayType *parameterNames; List *parameterDefaults; + Oid variadicArgType; Oid requiredResultType; bool isWindowFunc, isStrict, @@ -920,6 +926,7 @@ CreateFunction(CreateFunctionStmt *stmt, const char *queryString) ¶meterModes, ¶meterNames, ¶meterDefaults, + &variadicArgType, &requiredResultType); if (stmt->returnType) diff --git a/src/backend/executor/execQual.c b/src/backend/executor/execQual.c index 90c27530e9c..67dca78b771 100644 --- a/src/backend/executor/execQual.c +++ b/src/backend/executor/execQual.c @@ -4408,6 +4408,8 @@ ExecInitExpr(Expr *node, PlanState *parent) aggstate->aggs = lcons(astate, aggstate->aggs); naggs = ++aggstate->numaggs; + astate->aggdirectargs = (List *) ExecInitExpr((Expr *) aggref->aggdirectargs, + parent); astate->args = (List *) ExecInitExpr((Expr *) aggref->args, parent); astate->aggfilter = ExecInitExpr(aggref->aggfilter, diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c index ff6a123bc40..893a54b21bb 100644 --- a/src/backend/executor/functions.c +++ b/src/backend/executor/functions.c @@ -380,8 +380,8 @@ sql_fn_post_column_ref(ParseState *pstate, ColumnRef *cref, Node *var) param = ParseFuncOrColumn(pstate, list_make1(subfield), list_make1(param), - NIL, NULL, false, false, false, - NULL, true, cref->location); + NULL, + cref->location); } return param; diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index e02a6ffa8c3..0e2160d2f1c 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -8,14 +8,16 @@ * transvalue = initcond * foreach input_tuple do * transvalue = transfunc(transvalue, input_value(s)) - * result = finalfunc(transvalue) + * result = finalfunc(transvalue, direct_argument(s)) * * If a finalfunc is not supplied then the result is just the ending * value of transvalue. * - * If an aggregate call specifies DISTINCT or ORDER BY, we sort the input - * tuples and eliminate duplicates (if required) before performing the - * above-depicted process. + * If a normal aggregate call specifies DISTINCT or ORDER BY, we sort the + * input tuples and eliminate duplicates (if required) before performing + * the above-depicted process. (However, we don't do that for ordered-set + * aggregates; their "ORDER BY" inputs are ordinary aggregate arguments + * so far as this module is concerned.) * * If transfunc is marked "strict" in pg_proc and initcond is NULL, * then the first non-NULL input_value is assigned directly to transvalue, @@ -33,6 +35,14 @@ * of course). A non-strict finalfunc can make its own choice of * what to return for a NULL ending transvalue. * + * Ordered-set aggregates are treated specially in one other way: we + * evaluate any "direct" arguments and pass them to the finalfunc along + * with the transition value. In addition, NULL placeholders are + * provided to match the remaining finalfunc arguments, which correspond + * to the aggregated expressions. (These arguments have no use at + * runtime, but may be needed to allow resolution of a polymorphic + * aggregate's result type.) + * * We compute aggregate input expressions and run the transition functions * in a temporary econtext (aggstate->tmpcontext). This is reset at * least once per input tuple, so when the transvalue datatype is @@ -40,7 +50,7 @@ * memory context, and free the prior value to avoid memory leakage. * We store transvalues in the memory context aggstate->aggcontext, * which is also used for the hashtable structures in AGG_HASHED mode. - * The node's regular econtext (aggstate->csstate.cstate.cs_ExprContext) + * The node's regular econtext (aggstate->ss.ps.ps_ExprContext) * is used to run finalize functions and compute the output tuple; * this context can be reset once per output tuple. * @@ -66,6 +76,13 @@ * AggState is available as context in earlier releases (back to 8.1), * but direct examination of the node is needed to use it before 9.0. * + * As of 9.4, aggregate transition functions can also use AggGetAggref() + * to get hold of the Aggref expression node for their aggregate call. + * This is mainly intended for ordered-set aggregates, which are not + * supported as window functions. (A regular aggregate function would + * need some fallback logic to use this, since there's no Aggref node + * for a window function.) + * * * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -82,7 +99,6 @@ #include "catalog/objectaccess.h" #include "catalog/pg_aggregate.h" #include "catalog/pg_proc.h" -#include "catalog/pg_type.h" #include "executor/executor.h" #include "executor/nodeAgg.h" #include "miscadmin.h" @@ -114,12 +130,27 @@ typedef struct AggStatePerAggData AggrefExprState *aggrefstate; Aggref *aggref; - /* number of input arguments for aggregate function proper */ + /* + * Nominal number of arguments for aggregate function. For plain aggs, + * this excludes any ORDER BY expressions. For ordered-set aggs, this + * counts both the direct and aggregated (ORDER BY) arguments. + */ int numArguments; - /* number of inputs including ORDER BY expressions */ + /* + * Number of aggregated input columns. This includes ORDER BY expressions + * in both the plain-agg and ordered-set cases. Ordered-set direct args + * are not counted, though. + */ int numInputs; + /* + * Number of aggregated input columns to pass to the transfn. This + * includes the ORDER BY columns for ordered-set aggs, but not for plain + * aggs. (This doesn't count the transition state value!) + */ + int numTransInputs; + /* Oids of transfer functions */ Oid transfn_oid; Oid finalfn_oid; /* may be InvalidOid */ @@ -379,7 +410,7 @@ advance_transition_function(AggState *aggstate, AggStatePerGroup pergroupstate, FunctionCallInfoData *fcinfo) { - int numArguments = peraggstate->numArguments; + int numTransInputs = peraggstate->numTransInputs; MemoryContext oldContext; Datum newVal; int i; @@ -390,7 +421,7 @@ advance_transition_function(AggState *aggstate, * For a strict transfn, nothing happens when there's a NULL input; we * just keep the prior transValue. */ - for (i = 1; i <= numArguments; i++) + for (i = 1; i <= numTransInputs; i++) { if (fcinfo->argnull[i]) return; @@ -430,11 +461,14 @@ advance_transition_function(AggState *aggstate, /* We run the transition functions in per-input-tuple memory context */ oldContext = MemoryContextSwitchTo(aggstate->tmpcontext->ecxt_per_tuple_memory); + /* set up aggstate->curperagg for AggGetAggref() */ + aggstate->curperagg = peraggstate; + /* * OK to call the transition function */ InitFunctionCallInfoData(*fcinfo, &(peraggstate->transfn), - numArguments + 1, + numTransInputs + 1, peraggstate->aggCollation, (void *) aggstate, NULL); fcinfo->arg[0] = pergroupstate->transValue; @@ -442,6 +476,8 @@ advance_transition_function(AggState *aggstate, newVal = FunctionCallInvoke(fcinfo); + aggstate->curperagg = NULL; + /* * If pass-by-ref datatype, must copy the new value into aggcontext and * pfree the prior transValue. But if transfn returned a pointer to its @@ -485,15 +521,15 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) AggStatePerAgg peraggstate = &aggstate->peragg[aggno]; AggStatePerGroup pergroupstate = &pergroup[aggno]; ExprState *filter = peraggstate->aggrefstate->aggfilter; - int nargs = peraggstate->numArguments; + int numTransInputs = peraggstate->numTransInputs; int i; TupleTableSlot *slot; /* Skip anything FILTERed out */ if (filter) { - bool isnull; Datum res; + bool isnull; res = ExecEvalExprSwitchContext(filter, aggstate->tmpcontext, &isnull, NULL); @@ -512,18 +548,18 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) /* * If the transfn is strict, we want to check for nullity before * storing the row in the sorter, to save space if there are a lot - * of nulls. Note that we must only check numArguments columns, + * of nulls. Note that we must only check numTransInputs columns, * not numInputs, since nullity in columns used only for sorting * is not relevant here. */ if (peraggstate->transfn.fn_strict) { - for (i = 0; i < nargs; i++) + for (i = 0; i < numTransInputs; i++) { if (slot->tts_isnull[i]) break; } - if (i < nargs) + if (i < numTransInputs) continue; } @@ -542,8 +578,8 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup) /* Load values into fcinfo */ /* Start from 1, since the 0th arg will be the transition value */ - Assert(slot->tts_nvalid >= nargs); - for (i = 0; i < nargs; i++) + Assert(slot->tts_nvalid >= numTransInputs); + for (i = 0; i < numTransInputs; i++) { fcinfo.arg[i + 1] = slot->tts_values[i]; fcinfo.argnull[i + 1] = slot->tts_isnull[i]; @@ -671,7 +707,7 @@ process_ordered_aggregate_multi(AggState *aggstate, FunctionCallInfoData fcinfo; TupleTableSlot *slot1 = peraggstate->evalslot; TupleTableSlot *slot2 = peraggstate->uniqslot; - int numArguments = peraggstate->numArguments; + int numTransInputs = peraggstate->numTransInputs; int numDistinctCols = peraggstate->numDistinctCols; bool haveOldValue = false; int i; @@ -685,10 +721,11 @@ process_ordered_aggregate_multi(AggState *aggstate, while (tuplesort_gettupleslot(peraggstate->sortstate, true, slot1)) { /* - * Extract the first numArguments as datums to pass to the transfn. - * (This will help execTuplesMatch too, so do it immediately.) + * Extract the first numTransInputs columns as datums to pass to the + * transfn. (This will help execTuplesMatch too, so we do it + * immediately.) */ - slot_getsomeattrs(slot1, numArguments); + slot_getsomeattrs(slot1, numTransInputs); if (numDistinctCols == 0 || !haveOldValue || @@ -700,7 +737,7 @@ process_ordered_aggregate_multi(AggState *aggstate, { /* Load values into fcinfo */ /* Start from 1, since the 0th arg will be the transition value */ - for (i = 0; i < numArguments; i++) + for (i = 0; i < numTransInputs; i++) { fcinfo.arg[i + 1] = slot1->tts_values[i]; fcinfo.argnull[i + 1] = slot1->tts_isnull[i]; @@ -746,23 +783,73 @@ finalize_aggregate(AggState *aggstate, AggStatePerGroup pergroupstate, Datum *resultVal, bool *resultIsNull) { + FunctionCallInfoData fcinfo; + bool anynull = false; MemoryContext oldContext; + int i; + ListCell *lc; oldContext = MemoryContextSwitchTo(aggstate->ss.ps.ps_ExprContext->ecxt_per_tuple_memory); /* + * Evaluate any direct arguments. We do this even if there's no finalfn + * (which is unlikely anyway), so that side-effects happen as expected. + */ + i = 1; + foreach(lc, peraggstate->aggrefstate->aggdirectargs) + { + ExprState *expr = (ExprState *) lfirst(lc); + + fcinfo.arg[i] = ExecEvalExpr(expr, + aggstate->ss.ps.ps_ExprContext, + &fcinfo.argnull[i], + NULL); + anynull |= fcinfo.argnull[i]; + i++; + } + + /* * Apply the agg's finalfn if one is provided, else return transValue. */ if (OidIsValid(peraggstate->finalfn_oid)) { - FunctionCallInfoData fcinfo; + int numFinalArgs; - InitFunctionCallInfoData(fcinfo, &(peraggstate->finalfn), 1, + /* + * Identify number of arguments being passed to the finalfn. For a + * plain agg it's just one (the transition state value). For + * ordered-set aggs we also pass the direct argument(s), plus nulls + * corresponding to the aggregate-input columns. + */ + if (AGGKIND_IS_ORDERED_SET(peraggstate->aggref->aggkind)) + numFinalArgs = peraggstate->numArguments + 1; + else + numFinalArgs = 1; + Assert(i <= numFinalArgs); + + /* set up aggstate->curperagg for AggGetAggref() */ + aggstate->curperagg = peraggstate; + + InitFunctionCallInfoData(fcinfo, &(peraggstate->finalfn), + numFinalArgs, peraggstate->aggCollation, (void *) aggstate, NULL); + + /* Fill in the transition state value */ fcinfo.arg[0] = pergroupstate->transValue; fcinfo.argnull[0] = pergroupstate->transValueIsNull; - if (fcinfo.flinfo->fn_strict && pergroupstate->transValueIsNull) + anynull |= pergroupstate->transValueIsNull; + + /* Fill any remaining argument positions with nulls */ + while (i < numFinalArgs) + { + fcinfo.arg[i] = (Datum) 0; + fcinfo.argnull[i] = true; + anynull = true; + i++; + } + + if (fcinfo.flinfo->fn_strict && anynull) { /* don't call a strict function with NULL inputs */ *resultVal = (Datum) 0; @@ -773,6 +860,7 @@ finalize_aggregate(AggState *aggstate, *resultVal = FunctionCallInvoke(&fcinfo); *resultIsNull = fcinfo.isnull; } + aggstate->curperagg = NULL; } else { @@ -1094,8 +1182,13 @@ agg_retrieve_direct(AggState *aggstate) * aggcontext (which contains any pass-by-ref transvalues of the old * group). We also clear any child contexts of the aggcontext; some * aggregate functions store working state in such contexts. + * + * We use ReScanExprContext not just ResetExprContext because we want + * any registered shutdown callbacks to be called. That allows + * aggregate functions to ensure they've cleaned up any non-memory + * resources. */ - ResetExprContext(econtext); + ReScanExprContext(econtext); MemoryContextResetAndDeleteChildren(aggstate->aggcontext); @@ -1164,6 +1257,16 @@ agg_retrieve_direct(AggState *aggstate) } /* + * Use the representative input tuple for any references to + * non-aggregated input columns in aggregate direct args, the node + * qual, and the tlist. (If we are not grouping, and there are no + * input rows at all, we will come here with an empty firstSlot ... + * but if not grouping, there can't be any references to + * non-aggregated input columns, so no problem.) + */ + econtext->ecxt_outertuple = firstSlot; + + /* * Done scanning input tuple group. Finalize each aggregate * calculation, and stash results in the per-output-tuple context. */ @@ -1189,15 +1292,6 @@ agg_retrieve_direct(AggState *aggstate) } /* - * Use the representative input tuple for any references to - * non-aggregated input columns in the qual and tlist. (If we are not - * grouping, and there are no input rows at all, we will come here - * with an empty firstSlot ... but if not grouping, there can't be any - * references to non-aggregated input columns, so no problem.) - */ - econtext->ecxt_outertuple = firstSlot; - - /* * Check the qual (HAVING clause); if the group does not match, ignore * it and loop back to try to process another group. */ @@ -1316,6 +1410,10 @@ agg_retrieve_hash_table(AggState *aggstate) /* * Clear the per-output-tuple context for each group + * + * We intentionally don't use ReScanExprContext here; if any aggs have + * registered shutdown callbacks, they mustn't be called yet, since we + * might not be done with that agg. */ ResetExprContext(econtext); @@ -1412,6 +1510,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggstate->eqfunctions = NULL; aggstate->hashfunctions = NULL; aggstate->peragg = NULL; + aggstate->curperagg = NULL; aggstate->agg_done = false; aggstate->pergroup = NULL; aggstate->grp_firstTuple = NULL; @@ -1565,6 +1664,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) AggStatePerAgg peraggstate; Oid inputTypes[FUNC_MAX_ARGS]; int numArguments; + int numDirectArgs; int numInputs; int numSortCols; int numDistinctCols; @@ -1604,28 +1704,12 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) /* Mark Aggref state node with assigned index in the result array */ aggrefstate->aggno = aggno; - /* Fill in the peraggstate data */ + /* Begin filling in the peraggstate data */ peraggstate->aggrefstate = aggrefstate; peraggstate->aggref = aggref; - numInputs = list_length(aggref->args); - peraggstate->numInputs = numInputs; peraggstate->sortstate = NULL; - /* - * Get actual datatypes of the inputs. These could be different from - * the agg's declared input types, when the agg accepts ANY or a - * polymorphic type. - */ - numArguments = 0; - foreach(lc, aggref->args) - { - TargetEntry *tle = (TargetEntry *) lfirst(lc); - - if (!tle->resjunk) - inputTypes[numArguments++] = exprType((Node *) tle->expr); - } - peraggstate->numArguments = numArguments; - + /* Fetch the pg_aggregate row */ aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggref->aggfnoid)); if (!HeapTupleIsValid(aggTuple)) @@ -1674,28 +1758,38 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) } } + /* + * Get actual datatypes of the (nominal) aggregate inputs. These + * could be different from the agg's declared input types, when the + * agg accepts ANY or a polymorphic type. + */ + numArguments = get_aggregate_argtypes(aggref, inputTypes); + peraggstate->numArguments = numArguments; + + /* Count the "direct" arguments, if any */ + numDirectArgs = list_length(aggref->aggdirectargs); + + /* Count the number of aggregated input columns */ + numInputs = list_length(aggref->args); + peraggstate->numInputs = numInputs; + + /* Detect how many columns to pass to the transfn */ + if (AGGKIND_IS_ORDERED_SET(aggref->aggkind)) + peraggstate->numTransInputs = numInputs; + else + peraggstate->numTransInputs = numArguments; + /* resolve actual type of transition state, if polymorphic */ - aggtranstype = aggform->aggtranstype; - if (IsPolymorphicType(aggtranstype)) - { - /* have to fetch the agg's declared input types... */ - Oid *declaredArgTypes; - int agg_nargs; - - (void) get_func_signature(aggref->aggfnoid, - &declaredArgTypes, &agg_nargs); - Assert(agg_nargs == numArguments); - aggtranstype = enforce_generic_type_consistency(inputTypes, - declaredArgTypes, - agg_nargs, - aggtranstype, - false); - pfree(declaredArgTypes); - } + aggtranstype = resolve_aggregate_transtype(aggref->aggfnoid, + aggform->aggtranstype, + inputTypes, + numArguments); /* build expression trees using actual argument & result types */ build_aggregate_fnexprs(inputTypes, numArguments, + numDirectArgs, + AGGKIND_IS_ORDERED_SET(aggref->aggkind), aggref->aggvariadic, aggtranstype, aggref->aggtype, @@ -1740,14 +1834,14 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) /* * If the transfn is strict and the initval is NULL, make sure input * type and transtype are the same (or at least binary-compatible), so - * that it's OK to use the first input value as the initial + * that it's OK to use the first aggregated input value as the initial * transValue. This should have been checked at agg definition time, * but just in case... */ if (peraggstate->transfn.fn_strict && peraggstate->initValueIsNull) { - if (numArguments < 1 || - !IsBinaryCoercible(inputTypes[0], aggtranstype)) + if (numArguments <= numDirectArgs || + !IsBinaryCoercible(inputTypes[numDirectArgs], aggtranstype)) ereport(ERROR, (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION), errmsg("aggregate %u needs to have compatible input type and transition type", @@ -1755,8 +1849,8 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) } /* - * Get a tupledesc corresponding to the inputs (including sort - * expressions) of the agg. + * Get a tupledesc corresponding to the aggregated inputs (including + * sort expressions) of the agg. */ peraggstate->evaldesc = ExecTypeFromTL(aggref->args, false); @@ -1771,14 +1865,20 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) NULL); /* - * If we're doing either DISTINCT or ORDER BY, then we have a list of - * SortGroupClause nodes; fish out the data in them and stick them - * into arrays. + * If we're doing either DISTINCT or ORDER BY for a plain agg, then we + * have a list of SortGroupClause nodes; fish out the data in them and + * stick them into arrays. We ignore ORDER BY for an ordered-set agg, + * however; the agg's transfn and finalfn are responsible for that. * * Note that by construction, if there is a DISTINCT clause then the * ORDER BY clause is a prefix of it (see transformDistinctClause). */ - if (aggref->aggdistinct) + if (AGGKIND_IS_ORDERED_SET(aggref->aggkind)) + { + sortlist = NIL; + numSortCols = numDistinctCols = 0; + } + else if (aggref->aggdistinct) { sortlist = aggref->aggdistinct; numSortCols = numDistinctCols = list_length(sortlist); @@ -1805,7 +1905,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) /* If we have only one input, we need its len/byval info. */ if (numInputs == 1) { - get_typlenbyval(inputTypes[0], + get_typlenbyval(inputTypes[numDirectArgs], &peraggstate->inputtypeLen, &peraggstate->inputtypeByVal); } @@ -1908,6 +2008,9 @@ ExecEndAgg(AggState *node) tuplesort_end(peraggstate->sortstate); } + /* And ensure any agg shutdown callbacks have been called */ + ReScanExprContext(node->ss.ps.ps_ExprContext); + /* * Free both the expr contexts. */ @@ -1967,6 +2070,8 @@ ExecReScanAgg(AggState *node) peraggstate->sortstate = NULL; } + /* We don't need to ReScanExprContext here; ExecReScan already did it */ + /* Release first tuple of group, if we have made a copy */ if (node->grp_firstTuple != NULL) { @@ -2047,6 +2152,71 @@ AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext) } /* + * AggGetAggref - allow an aggregate support function to get its Aggref + * + * If the function is being called as an aggregate support function, + * return the Aggref node for the aggregate call. Otherwise, return NULL. + * + * Note that if an aggregate is being used as a window function, this will + * return NULL. We could provide a similar function to return the relevant + * WindowFunc node in such cases, but it's not needed yet. + */ +Aggref * +AggGetAggref(FunctionCallInfo fcinfo) +{ + if (fcinfo->context && IsA(fcinfo->context, AggState)) + { + AggStatePerAgg curperagg = ((AggState *) fcinfo->context)->curperagg; + + if (curperagg) + return curperagg->aggref; + } + return NULL; +} + +/* + * AggGetPerTupleEContext - fetch per-input-tuple ExprContext + * + * This is useful in agg final functions; the econtext returned is the + * same per-tuple context that the transfn was called in (which can + * safely get reset during the final function). + * + * As above, this is currently not useful for aggs called as window functions. + */ +ExprContext * +AggGetPerTupleEContext(FunctionCallInfo fcinfo) +{ + if (fcinfo->context && IsA(fcinfo->context, AggState)) + { + AggState *aggstate = (AggState *) fcinfo->context; + + return aggstate->tmpcontext; + } + return NULL; +} + +/* + * AggGetPerAggEContext - fetch per-output-tuple ExprContext + * + * This is useful for aggs to register shutdown callbacks, which will ensure + * that non-memory resources are freed. + * + * As above, this is currently not useful for aggs called as window functions. + */ +ExprContext * +AggGetPerAggEContext(FunctionCallInfo fcinfo) +{ + if (fcinfo->context && IsA(fcinfo->context, AggState)) + { + AggState *aggstate = (AggState *) fcinfo->context; + + return aggstate->ss.ps.ps_ExprContext; + } + return NULL; +} + + +/* * aggregate_dummy - dummy execution routine for aggregate functions * * This function is listed as the implementation (prosrc field) of pg_proc diff --git a/src/backend/executor/nodeWindowAgg.c b/src/backend/executor/nodeWindowAgg.c index 544ba989de1..81ad9337dbd 100644 --- a/src/backend/executor/nodeWindowAgg.c +++ b/src/backend/executor/nodeWindowAgg.c @@ -37,7 +37,6 @@ #include "catalog/objectaccess.h" #include "catalog/pg_aggregate.h" #include "catalog/pg_proc.h" -#include "catalog/pg_type.h" #include "executor/executor.h" #include "executor/nodeWindowAgg.h" #include "miscadmin.h" @@ -1796,27 +1795,16 @@ initialize_peragg(WindowAggState *winstate, WindowFunc *wfunc, } /* resolve actual type of transition state, if polymorphic */ - aggtranstype = aggform->aggtranstype; - if (IsPolymorphicType(aggtranstype)) - { - /* have to fetch the agg's declared input types... */ - Oid *declaredArgTypes; - int agg_nargs; - - get_func_signature(wfunc->winfnoid, - &declaredArgTypes, &agg_nargs); - Assert(agg_nargs == numArguments); - aggtranstype = enforce_generic_type_consistency(inputTypes, - declaredArgTypes, - agg_nargs, - aggtranstype, - false); - pfree(declaredArgTypes); - } + aggtranstype = resolve_aggregate_transtype(wfunc->winfnoid, + aggform->aggtranstype, + inputTypes, + numArguments); /* build expression trees using actual argument & result types */ build_aggregate_fnexprs(inputTypes, numArguments, + 0, /* no ordered-set window functions yet */ + false, false, /* no variadic window functions yet */ aggtranstype, wfunc->wintype, diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index 3e102310c59..e4184c584cb 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -1132,12 +1132,14 @@ _copyAggref(const Aggref *from) COPY_SCALAR_FIELD(aggtype); COPY_SCALAR_FIELD(aggcollid); COPY_SCALAR_FIELD(inputcollid); + COPY_NODE_FIELD(aggdirectargs); COPY_NODE_FIELD(args); COPY_NODE_FIELD(aggorder); COPY_NODE_FIELD(aggdistinct); COPY_NODE_FIELD(aggfilter); COPY_SCALAR_FIELD(aggstar); COPY_SCALAR_FIELD(aggvariadic); + COPY_SCALAR_FIELD(aggkind); COPY_SCALAR_FIELD(agglevelsup); COPY_LOCATION_FIELD(location); @@ -2180,6 +2182,7 @@ _copyFuncCall(const FuncCall *from) COPY_NODE_FIELD(args); COPY_NODE_FIELD(agg_order); COPY_NODE_FIELD(agg_filter); + COPY_SCALAR_FIELD(agg_within_group); COPY_SCALAR_FIELD(agg_star); COPY_SCALAR_FIELD(agg_distinct); COPY_SCALAR_FIELD(func_variadic); diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c index 329755c703f..0cdb947c17b 100644 --- a/src/backend/nodes/equalfuncs.c +++ b/src/backend/nodes/equalfuncs.c @@ -193,12 +193,14 @@ _equalAggref(const Aggref *a, const Aggref *b) COMPARE_SCALAR_FIELD(aggtype); COMPARE_SCALAR_FIELD(aggcollid); COMPARE_SCALAR_FIELD(inputcollid); + COMPARE_NODE_FIELD(aggdirectargs); COMPARE_NODE_FIELD(args); COMPARE_NODE_FIELD(aggorder); COMPARE_NODE_FIELD(aggdistinct); COMPARE_NODE_FIELD(aggfilter); COMPARE_SCALAR_FIELD(aggstar); COMPARE_SCALAR_FIELD(aggvariadic); + COMPARE_SCALAR_FIELD(aggkind); COMPARE_SCALAR_FIELD(agglevelsup); COMPARE_LOCATION_FIELD(location); @@ -2021,6 +2023,7 @@ _equalFuncCall(const FuncCall *a, const FuncCall *b) COMPARE_NODE_FIELD(args); COMPARE_NODE_FIELD(agg_order); COMPARE_NODE_FIELD(agg_filter); + COMPARE_SCALAR_FIELD(agg_within_group); COMPARE_SCALAR_FIELD(agg_star); COMPARE_SCALAR_FIELD(agg_distinct); COMPARE_SCALAR_FIELD(func_variadic); diff --git a/src/backend/nodes/makefuncs.c b/src/backend/nodes/makefuncs.c index d3ed4fe98b9..83d9b8deba4 100644 --- a/src/backend/nodes/makefuncs.c +++ b/src/backend/nodes/makefuncs.c @@ -546,9 +546,10 @@ makeFuncCall(List *name, List *args, int location) n->args = args; n->agg_order = NIL; n->agg_filter = NULL; - n->agg_star = FALSE; - n->agg_distinct = FALSE; - n->func_variadic = FALSE; + n->agg_within_group = false; + n->agg_star = false; + n->agg_distinct = false; + n->func_variadic = false; n->over = NULL; n->location = location; return n; diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index d7db67dc9ea..17626f9bdce 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -1453,6 +1453,10 @@ exprLocation(const Node *expr) case T_Constraint: loc = ((const Constraint *) expr)->location; break; + case T_FunctionParameter: + /* just use typename's location */ + loc = exprLocation((Node *) ((const FunctionParameter *) expr)->argType); + break; case T_XmlSerialize: /* XMLSERIALIZE keyword should always be the first thing */ loc = ((const XmlSerialize *) expr)->location; @@ -1625,6 +1629,9 @@ expression_tree_walker(Node *node, Aggref *expr = (Aggref *) node; /* recurse directly on List */ + if (expression_tree_walker((Node *) expr->aggdirectargs, + walker, context)) + return true; if (expression_tree_walker((Node *) expr->args, walker, context)) return true; @@ -2157,6 +2164,7 @@ expression_tree_mutator(Node *node, Aggref *newnode; FLATCOPY(newnode, aggref, Aggref); + MUTATE(newnode->aggdirectargs, aggref->aggdirectargs, List *); MUTATE(newnode->args, aggref->args, List *); MUTATE(newnode->aggorder, aggref->aggorder, List *); MUTATE(newnode->aggdistinct, aggref->aggdistinct, List *); diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 22c7d40156b..4f63906d8e0 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -953,12 +953,14 @@ _outAggref(StringInfo str, const Aggref *node) WRITE_OID_FIELD(aggtype); WRITE_OID_FIELD(aggcollid); WRITE_OID_FIELD(inputcollid); + WRITE_NODE_FIELD(aggdirectargs); WRITE_NODE_FIELD(args); WRITE_NODE_FIELD(aggorder); WRITE_NODE_FIELD(aggdistinct); WRITE_NODE_FIELD(aggfilter); WRITE_BOOL_FIELD(aggstar); WRITE_BOOL_FIELD(aggvariadic); + WRITE_CHAR_FIELD(aggkind); WRITE_UINT_FIELD(agglevelsup); WRITE_LOCATION_FIELD(location); } @@ -2084,6 +2086,7 @@ _outFuncCall(StringInfo str, const FuncCall *node) WRITE_NODE_FIELD(args); WRITE_NODE_FIELD(agg_order); WRITE_NODE_FIELD(agg_filter); + WRITE_BOOL_FIELD(agg_within_group); WRITE_BOOL_FIELD(agg_star); WRITE_BOOL_FIELD(agg_distinct); WRITE_BOOL_FIELD(func_variadic); diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 2e2cfa7af6a..aba6d4ec3c8 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -492,12 +492,14 @@ _readAggref(void) READ_OID_FIELD(aggtype); READ_OID_FIELD(aggcollid); READ_OID_FIELD(inputcollid); + READ_NODE_FIELD(aggdirectargs); READ_NODE_FIELD(args); READ_NODE_FIELD(aggorder); READ_NODE_FIELD(aggdistinct); READ_NODE_FIELD(aggfilter); READ_BOOL_FIELD(aggstar); READ_BOOL_FIELD(aggvariadic); + READ_CHAR_FIELD(aggkind); READ_UINT_FIELD(agglevelsup); READ_LOCATION_FIELD(location); diff --git a/src/backend/optimizer/plan/planagg.c b/src/backend/optimizer/plan/planagg.c index b85f48d6ffb..53fc2381ca0 100644 --- a/src/backend/optimizer/plan/planagg.c +++ b/src/backend/optimizer/plan/planagg.c @@ -328,17 +328,20 @@ find_minmax_aggs_walker(Node *node, List **context) * that differs for each of those equal values of the argument * expression makes the result predictable once again. This is a * niche requirement, and we do not implement it with subquery paths. + * In any case, this test lets us reject ordered-set aggregates + * quickly. */ if (aggref->aggorder != NIL) return true; + /* note: we do not care if DISTINCT is mentioned ... */ /* * We might implement the optimization when a FILTER clause is present - * by adding the filter to the quals of the generated subquery. + * by adding the filter to the quals of the generated subquery. For + * now, just punt. */ if (aggref->aggfilter != NULL) return true; - /* note: we do not care if DISTINCT is mentioned ... */ aggsortop = fetch_agg_sort_op(aggref->aggfnoid); if (!OidIsValid(aggsortop)) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 2eb862e2080..1da4b2f7c9a 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -2659,7 +2659,9 @@ choose_hashed_grouping(PlannerInfo *root, * Executor doesn't support hashed aggregation with DISTINCT or ORDER BY * aggregates. (Doing so would imply storing *all* the input values in * the hash table, and/or running many sorts in parallel, either of which - * seems like a certain loser.) + * seems like a certain loser.) We similarly don't support ordered-set + * aggregates in hashed aggregation, but that case is included in the + * numOrderedAggs count. */ can_hash = (agg_costs->numOrderedAggs == 0 && grouping_is_hashable(parse->groupClause)); diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index a7fdd52c294..eaf85dc9bd4 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -37,6 +37,7 @@ #include "optimizer/prep.h" #include "optimizer/var.h" #include "parser/analyze.h" +#include "parser/parse_agg.h" #include "parser/parse_coerce.h" #include "parser/parse_func.h" #include "rewrite/rewriteManip.h" @@ -463,9 +464,8 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context) Oid aggtranstype; int32 aggtransspace; QualCost argcosts; - Oid *inputTypes; + Oid inputTypes[FUNC_MAX_ARGS]; int numArguments; - ListCell *l; Assert(aggref->agglevelsup == 0); @@ -482,7 +482,7 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context) aggtransspace = aggform->aggtransspace; ReleaseSysCache(aggTuple); - /* count it */ + /* count it; note ordered-set aggs always have nonempty aggorder */ costs->numAggs++; if (aggref->aggorder != NIL || aggref->aggdistinct != NIL) costs->numOrderedAggs++; @@ -498,43 +498,40 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context) costs->transCost.per_tuple += argcosts.per_tuple; /* - * Add the filter's cost to per-input-row costs. XXX We should reduce - * input expression costs according to filter selectivity. + * Add any filter's cost to per-input-row costs. + * + * XXX Ideally we should reduce input expression costs according to + * filter selectivity, but it's not clear it's worth the trouble. */ - cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter, - context->root); - costs->transCost.startup += argcosts.startup; - costs->transCost.per_tuple += argcosts.per_tuple; - - /* extract argument types (ignoring any ORDER BY expressions) */ - inputTypes = (Oid *) palloc(sizeof(Oid) * list_length(aggref->args)); - numArguments = 0; - foreach(l, aggref->args) + if (aggref->aggfilter) { - TargetEntry *tle = (TargetEntry *) lfirst(l); - - if (!tle->resjunk) - inputTypes[numArguments++] = exprType((Node *) tle->expr); + cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter, + context->root); + costs->transCost.startup += argcosts.startup; + costs->transCost.per_tuple += argcosts.per_tuple; } - /* resolve actual type of transition state, if polymorphic */ - if (IsPolymorphicType(aggtranstype)) + /* + * If there are direct arguments, treat their evaluation cost like the + * cost of the finalfn. + */ + if (aggref->aggdirectargs) { - /* have to fetch the agg's declared input types... */ - Oid *declaredArgTypes; - int agg_nargs; - - (void) get_func_signature(aggref->aggfnoid, - &declaredArgTypes, &agg_nargs); - Assert(agg_nargs == numArguments); - aggtranstype = enforce_generic_type_consistency(inputTypes, - declaredArgTypes, - agg_nargs, - aggtranstype, - false); - pfree(declaredArgTypes); + cost_qual_eval_node(&argcosts, (Node *) aggref->aggdirectargs, + context->root); + costs->transCost.startup += argcosts.startup; + costs->finalCost += argcosts.per_tuple; } + /* extract argument types (ignoring any ORDER BY expressions) */ + numArguments = get_aggregate_argtypes(aggref, inputTypes); + + /* resolve actual type of transition state, if polymorphic */ + aggtranstype = resolve_aggregate_transtype(aggref->aggfnoid, + aggtranstype, + inputTypes, + numArguments); + /* * If the transition type is pass-by-value then it doesn't add * anything to the required size of the hashtable. If it is @@ -551,14 +548,16 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context) else { /* - * If transition state is of same type as first input, assume - * it's the same typmod (same width) as well. This works for - * cases like MAX/MIN and is probably somewhat reasonable - * otherwise. + * If transition state is of same type as first aggregated + * input, assume it's the same typmod (same width) as well. + * This works for cases like MAX/MIN and is probably somewhat + * reasonable otherwise. */ + int numdirectargs = list_length(aggref->aggdirectargs); int32 aggtranstypmod; - if (numArguments > 0 && aggtranstype == inputTypes[0]) + if (numArguments > numdirectargs && + aggtranstype == inputTypes[numdirectargs]) aggtranstypmod = exprTypmod((Node *) linitial(aggref->args)); else aggtranstypmod = -1; @@ -587,17 +586,11 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context) } /* - * Complain if the aggregate's arguments contain any aggregates; - * nested agg functions are semantically nonsensical. Aggregates in - * the FILTER clause are detected in transformAggregateCall(). - */ - if (contain_agg_clause((Node *) aggref->args)) - ereport(ERROR, - (errcode(ERRCODE_GROUPING_ERROR), - errmsg("aggregate function calls cannot be nested"))); - - /* - * Having checked that, we need not recurse into the argument. + * We assume that the parser checked that there are no aggregates (of + * this level anyway) in the aggregated arguments, direct arguments, + * or filter clause. Hence, we need not recurse into any of them. (If + * either the parser or the planner screws up on this point, the + * executor will still catch it; see ExecInitExpr.) */ return false; } @@ -662,17 +655,10 @@ find_window_functions_walker(Node *node, WindowFuncLists *lists) lists->numWindowFuncs++; /* - * Complain if the window function's arguments contain window - * functions. Window functions in the FILTER clause are detected in - * transformAggregateCall(). - */ - if (contain_window_function((Node *) wfunc->args)) - ereport(ERROR, - (errcode(ERRCODE_WINDOWING_ERROR), - errmsg("window function calls cannot be nested"))); - - /* - * Having checked that, we need not recurse into the argument. + * We assume that the parser checked that there are no window + * functions in the arguments or filter clause. Hence, we need not + * recurse into them. (If either the parser or the planner screws up + * on this point, the executor will still catch it; see ExecInitExpr.) */ return false; } diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index b4e5552636e..0249f5cdf35 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -142,6 +142,9 @@ static void check_qualified_name(List *names, core_yyscan_t yyscanner); static List *check_func_name(List *names, core_yyscan_t yyscanner); static List *check_indirection(List *indirection, core_yyscan_t yyscanner); static List *extractArgTypes(List *parameters); +static List *extractAggrArgTypes(List *aggrargs); +static List *makeOrderedSetArgs(List *directargs, List *orderedargs, + core_yyscan_t yyscanner); static void insertSelectOptions(SelectStmt *stmt, List *sortClause, List *lockingClause, Node *limitOffset, Node *limitCount, @@ -491,12 +494,13 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type <with> with_clause opt_with_clause %type <list> cte_list +%type <list> within_group_clause +%type <node> filter_clause %type <list> window_clause window_definition_list opt_partition_clause %type <windef> window_definition over_clause window_specification opt_frame_clause frame_extent frame_bound %type <str> opt_existing_window_name %type <boolean> opt_if_not_exists -%type <node> filter_clause /* * Non-keyword token types. These are hard-wired into the "flex" lexer. @@ -599,7 +603,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); VACUUM VALID VALIDATE VALIDATOR VALUE_P VALUES VARCHAR VARIADIC VARYING VERBOSE VERSION_P VIEW VOLATILE - WHEN WHERE WHITESPACE_P WINDOW WITH WITHOUT WORK WRAPPER WRITE + WHEN WHERE WHITESPACE_P WINDOW WITH WITHIN WITHOUT WORK WRAPPER WRITE XML_P XMLATTRIBUTES XMLCONCAT XMLELEMENT XMLEXISTS XMLFOREST XMLPARSE XMLPI XMLROOT XMLSERIALIZE @@ -3715,7 +3719,7 @@ AlterExtensionContentsStmt: n->action = $4; n->objtype = OBJECT_AGGREGATE; n->objname = $6; - n->objargs = extractArgTypes($7); + n->objargs = extractAggrArgTypes($7); $$ = (Node *)n; } | ALTER EXTENSION name add_drop CAST '(' Typename AS Typename ')' @@ -5294,7 +5298,7 @@ CommentStmt: CommentStmt *n = makeNode(CommentStmt); n->objtype = OBJECT_AGGREGATE; n->objname = $4; - n->objargs = extractArgTypes($5); + n->objargs = extractAggrArgTypes($5); n->comment = $7; $$ = (Node *) n; } @@ -5460,7 +5464,7 @@ SecLabelStmt: n->provider = $3; n->objtype = OBJECT_AGGREGATE; n->objname = $6; - n->objargs = extractArgTypes($7); + n->objargs = extractAggrArgTypes($7); n->label = $9; $$ = (Node *) n; } @@ -6460,9 +6464,52 @@ aggr_arg: func_arg } ; -/* Zero-argument aggregates are named with * for consistency with COUNT(*) */ -aggr_args: '(' aggr_args_list ')' { $$ = $2; } - | '(' '*' ')' { $$ = NIL; } +/* + * The SQL standard offers no guidance on how to declare aggregate argument + * lists, since it doesn't have CREATE AGGREGATE etc. We accept these cases: + * + * (*) - normal agg with no args + * (aggr_arg,...) - normal agg with args + * (ORDER BY aggr_arg,...) - ordered-set agg with no direct args + * (aggr_arg,... ORDER BY aggr_arg,...) - ordered-set agg with direct args + * + * The zero-argument case is spelled with '*' for consistency with COUNT(*). + * + * An additional restriction is that if the direct-args list ends in a + * VARIADIC item, the ordered-args list must contain exactly one item that + * is also VARIADIC with the same type. This allows us to collapse the two + * VARIADIC items into one, which is necessary to represent the aggregate in + * pg_proc. We check this at the grammar stage so that we can return a list + * in which the second VARIADIC item is already discarded, avoiding extra work + * in cases such as DROP AGGREGATE. + * + * The return value of this production is a two-element list, in which the + * first item is a sublist of FunctionParameter nodes (with any duplicate + * VARIADIC item already dropped, as per above) and the second is an integer + * Value node, containing -1 if there was no ORDER BY and otherwise the number + * of argument declarations before the ORDER BY. (If this number is equal + * to the first sublist's length, then we dropped a duplicate VARIADIC item.) + * This representation is passed as-is to CREATE AGGREGATE; for operations + * on existing aggregates, we can just apply extractArgTypes to the first + * sublist. + */ +aggr_args: '(' '*' ')' + { + $$ = list_make2(NIL, makeInteger(-1)); + } + | '(' aggr_args_list ')' + { + $$ = list_make2($2, makeInteger(-1)); + } + | '(' ORDER BY aggr_args_list ')' + { + $$ = list_make2($4, makeInteger(0)); + } + | '(' aggr_args_list ORDER BY aggr_args_list ')' + { + /* this is the only case requiring consistency checking */ + $$ = makeOrderedSetArgs($2, $5, yyscanner); + } ; aggr_args_list: @@ -6668,7 +6715,7 @@ RemoveAggrStmt: DropStmt *n = makeNode(DropStmt); n->removeType = OBJECT_AGGREGATE; n->objects = list_make1($3); - n->arguments = list_make1(extractArgTypes($4)); + n->arguments = list_make1(extractAggrArgTypes($4)); n->behavior = $5; n->missing_ok = false; n->concurrent = false; @@ -6679,7 +6726,7 @@ RemoveAggrStmt: DropStmt *n = makeNode(DropStmt); n->removeType = OBJECT_AGGREGATE; n->objects = list_make1($5); - n->arguments = list_make1(extractArgTypes($6)); + n->arguments = list_make1(extractAggrArgTypes($6)); n->behavior = $7; n->missing_ok = true; n->concurrent = false; @@ -6895,7 +6942,7 @@ RenameStmt: ALTER AGGREGATE func_name aggr_args RENAME TO name RenameStmt *n = makeNode(RenameStmt); n->renameType = OBJECT_AGGREGATE; n->object = $3; - n->objarg = extractArgTypes($4); + n->objarg = extractAggrArgTypes($4); n->newname = $7; n->missing_ok = false; $$ = (Node *)n; @@ -7369,7 +7416,7 @@ AlterObjectSchemaStmt: AlterObjectSchemaStmt *n = makeNode(AlterObjectSchemaStmt); n->objectType = OBJECT_AGGREGATE; n->object = $3; - n->objarg = extractArgTypes($4); + n->objarg = extractAggrArgTypes($4); n->newschema = $7; n->missing_ok = false; $$ = (Node *)n; @@ -7598,7 +7645,7 @@ AlterOwnerStmt: ALTER AGGREGATE func_name aggr_args OWNER TO RoleId AlterOwnerStmt *n = makeNode(AlterOwnerStmt); n->objectType = OBJECT_AGGREGATE; n->object = $3; - n->objarg = extractArgTypes($4); + n->objarg = extractAggrArgTypes($4); n->newowner = $7; $$ = (Node *)n; } @@ -11165,26 +11212,24 @@ func_application: func_name '(' ')' { $$ = (Node *) makeFuncCall($1, NIL, @1); } - | func_name '(' func_arg_list ')' + | func_name '(' func_arg_list opt_sort_clause ')' { - $$ = (Node *) makeFuncCall($1, $3, @1); + FuncCall *n = makeFuncCall($1, $3, @1); + n->agg_order = $4; + $$ = (Node *)n; } - | func_name '(' VARIADIC func_arg_expr ')' + | func_name '(' VARIADIC func_arg_expr opt_sort_clause ')' { FuncCall *n = makeFuncCall($1, list_make1($4), @1); n->func_variadic = TRUE; + n->agg_order = $5; $$ = (Node *)n; } - | func_name '(' func_arg_list ',' VARIADIC func_arg_expr ')' + | func_name '(' func_arg_list ',' VARIADIC func_arg_expr opt_sort_clause ')' { FuncCall *n = makeFuncCall($1, lappend($3, $6), @1); n->func_variadic = TRUE; - $$ = (Node *)n; - } - | func_name '(' func_arg_list sort_clause ')' - { - FuncCall *n = makeFuncCall($1, $3, @1); - n->agg_order = $4; + n->agg_order = $7; $$ = (Node *)n; } | func_name '(' ALL func_arg_list opt_sort_clause ')' @@ -11232,12 +11277,40 @@ func_application: func_name '(' ')' * (Note that many of the special SQL functions wouldn't actually make any * sense as functional index entries, but we ignore that consideration here.) */ -func_expr: func_application filter_clause over_clause +func_expr: func_application within_group_clause filter_clause over_clause { - FuncCall *n = (FuncCall*)$1; - n->agg_filter = $2; - n->over = $3; - $$ = (Node*)n; + FuncCall *n = (FuncCall *) $1; + /* + * The order clause for WITHIN GROUP and the one for + * plain-aggregate ORDER BY share a field, so we have to + * check here that at most one is present. We also check + * for DISTINCT and VARIADIC here to give a better error + * location. Other consistency checks are deferred to + * parse analysis. + */ + if ($2 != NIL) + { + if (n->agg_order != NIL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use multiple ORDER BY clauses with WITHIN GROUP"), + parser_errposition(@2))); + if (n->agg_distinct) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use DISTINCT with WITHIN GROUP"), + parser_errposition(@2))); + if (n->func_variadic) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use VARIADIC with WITHIN GROUP"), + parser_errposition(@2))); + n->agg_order = $2; + n->agg_within_group = TRUE; + } + n->agg_filter = $3; + n->over = $4; + $$ = (Node *) n; } | func_expr_common_subexpr { $$ = $1; } @@ -11625,6 +11698,20 @@ xmlexists_argument: /* + * Aggregate decoration clauses + */ +within_group_clause: + WITHIN GROUP_P '(' sort_clause ')' { $$ = $4; } + | /*EMPTY*/ { $$ = NIL; } + ; + +filter_clause: + FILTER '(' WHERE a_expr ')' { $$ = $4; } + | /*EMPTY*/ { $$ = NULL; } + ; + + +/* * Window Definitions */ window_clause: @@ -11647,11 +11734,6 @@ window_definition: } ; -filter_clause: - FILTER '(' WHERE a_expr ')' { $$ = $4; } - | /*EMPTY*/ { $$ = NULL; } - ; - over_clause: OVER window_specification { $$ = $2; } | OVER ColId @@ -12416,16 +12498,17 @@ AexprConst: Iconst t->location = @1; $$ = makeStringConstCast($2, @2, t); } - | func_name '(' func_arg_list ')' Sconst + | func_name '(' func_arg_list opt_sort_clause ')' Sconst { /* generic syntax with a type modifier */ TypeName *t = makeTypeNameFromNameList($1); ListCell *lc; /* - * We must use func_arg_list in the production to avoid - * reduce/reduce conflicts, but we don't actually wish - * to allow NamedArgExpr in this context. + * We must use func_arg_list and opt_sort_clause in the + * production to avoid reduce/reduce conflicts, but we + * don't actually wish to allow NamedArgExpr in this + * context, nor ORDER BY. */ foreach(lc, $3) { @@ -12437,9 +12520,15 @@ AexprConst: Iconst errmsg("type modifier cannot have parameter name"), parser_errposition(arg->location))); } + if ($4 != NIL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("type modifier cannot have ORDER BY"), + parser_errposition(@4))); + t->typmods = $3; t->location = @1; - $$ = makeStringConstCast($5, @5, t); + $$ = makeStringConstCast($6, @6, t); } | ConstTypename Sconst { @@ -12800,6 +12889,7 @@ unreserved_keyword: | VIEW | VOLATILE | WHITESPACE_P + | WITHIN | WITHOUT | WORK | WRAPPER @@ -13275,6 +13365,52 @@ extractArgTypes(List *parameters) return result; } +/* extractAggrArgTypes() + * As above, but work from the output of the aggr_args production. + */ +static List * +extractAggrArgTypes(List *aggrargs) +{ + Assert(list_length(aggrargs) == 2); + return extractArgTypes((List *) linitial(aggrargs)); +} + +/* makeOrderedSetArgs() + * Build the result of the aggr_args production (which see the comments for). + * This handles only the case where both given lists are nonempty, so that + * we have to deal with multiple VARIADIC arguments. + */ +static List * +makeOrderedSetArgs(List *directargs, List *orderedargs, + core_yyscan_t yyscanner) +{ + FunctionParameter *lastd = (FunctionParameter *) llast(directargs); + + /* No restriction unless last direct arg is VARIADIC */ + if (lastd->mode == FUNC_PARAM_VARIADIC) + { + FunctionParameter *firsto = (FunctionParameter *) linitial(orderedargs); + + /* + * We ignore the names, though the aggr_arg production allows them; + * it doesn't allow default values, so those need not be checked. + */ + if (list_length(orderedargs) != 1 || + firsto->mode != FUNC_PARAM_VARIADIC || + !equal(lastd->argType, firsto->argType)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("an ordered-set aggregate with a VARIADIC direct argument must have one VARIADIC aggregated argument of the same data type"), + parser_errposition(exprLocation((Node *) firsto)))); + + /* OK, drop the duplicate VARIADIC argument from the internal form */ + orderedargs = NIL; + } + + return list_make2(list_concat(directargs, orderedargs), + makeInteger(list_length(directargs))); +} + /* insertSelectOptions() * Insert ORDER BY, etc into an already-constructed SelectStmt. * diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index 98cb58a7cc0..04a20eefcd4 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -14,16 +14,20 @@ */ #include "postgres.h" +#include "catalog/pg_aggregate.h" #include "catalog/pg_constraint.h" +#include "catalog/pg_type.h" #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "optimizer/tlist.h" #include "parser/parse_agg.h" #include "parser/parse_clause.h" +#include "parser/parse_coerce.h" #include "parser/parse_expr.h" #include "parser/parsetree.h" #include "rewrite/rewriteManip.h" #include "utils/builtins.h" +#include "utils/lsyscache.h" typedef struct @@ -42,9 +46,13 @@ typedef struct bool have_non_var_grouping; List **func_grouped_rels; int sublevels_up; + bool in_agg_direct_args; } check_ungrouped_columns_context; -static int check_agg_arguments(ParseState *pstate, List *args, Expr *filter); +static int check_agg_arguments(ParseState *pstate, + List *directargs, + List *args, + Expr *filter); static bool check_agg_arguments_walker(Node *node, check_agg_arguments_context *context); static void check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry, @@ -59,15 +67,21 @@ static bool check_ungrouped_columns_walker(Node *node, * Finish initial transformation of an aggregate call * * parse_func.c has recognized the function as an aggregate, and has set up - * all the fields of the Aggref except args, aggorder, aggdistinct and - * agglevelsup. The passed-in args list has been through standard expression - * transformation, while the passed-in aggorder list hasn't been transformed - * at all. + * all the fields of the Aggref except aggdirectargs, args, aggorder, + * aggdistinct and agglevelsup. The passed-in args list has been through + * standard expression transformation and type coercion to match the agg's + * declared arg types, while the passed-in aggorder list hasn't been + * transformed at all. * - * Here we convert the args list into a targetlist by inserting TargetEntry - * nodes, and then transform the aggorder and agg_distinct specifications to - * produce lists of SortGroupClause nodes. (That might also result in adding - * resjunk expressions to the targetlist.) + * Here we separate the args list into direct and aggregated args, storing the + * former in agg->aggdirectargs and the latter in agg->args. The regular + * args, but not the direct args, are converted into a targetlist by inserting + * TargetEntry nodes. We then transform the aggorder and agg_distinct + * specifications to produce lists of SortGroupClause nodes for agg->aggorder + * and agg->aggdistinct. (For a regular aggregate, this might result in + * adding resjunk expressions to the targetlist; but for ordered-set + * aggregates the aggorder list will always be one-to-one with the aggregated + * args.) * * We must also determine which query level the aggregate actually belongs to, * set agglevelsup accordingly, and mark p_hasAggs true in the corresponding @@ -77,76 +91,122 @@ void transformAggregateCall(ParseState *pstate, Aggref *agg, List *args, List *aggorder, bool agg_distinct) { - List *tlist; - List *torder; + List *tlist = NIL; + List *torder = NIL; List *tdistinct = NIL; - AttrNumber attno; + AttrNumber attno = 1; int save_next_resno; int min_varlevel; ListCell *lc; const char *err; bool errkind; - /* - * Transform the plain list of Exprs into a targetlist. We don't bother - * to assign column names to the entries. - */ - tlist = NIL; - attno = 1; - foreach(lc, args) + if (AGGKIND_IS_ORDERED_SET(agg->aggkind)) { - Expr *arg = (Expr *) lfirst(lc); - TargetEntry *tle = makeTargetEntry(arg, attno++, NULL, false); + /* + * For an ordered-set agg, the args list includes direct args and + * aggregated args; we must split them apart. + */ + int numDirectArgs = list_length(args) - list_length(aggorder); + List *aargs; + ListCell *lc2; - tlist = lappend(tlist, tle); - } + Assert(numDirectArgs >= 0); - /* - * If we have an ORDER BY, transform it. This will add columns to the - * tlist if they appear in ORDER BY but weren't already in the arg list. - * They will be marked resjunk = true so we can tell them apart from - * regular aggregate arguments later. - * - * We need to mess with p_next_resno since it will be used to number any - * new targetlist entries. - */ - save_next_resno = pstate->p_next_resno; - pstate->p_next_resno = attno; + aargs = list_copy_tail(args, numDirectArgs); + agg->aggdirectargs = list_truncate(args, numDirectArgs); - torder = transformSortClause(pstate, - aggorder, - &tlist, - EXPR_KIND_ORDER_BY, - true /* fix unknowns */ , - true /* force SQL99 rules */ ); + /* + * Build a tlist from the aggregated args, and make a sortlist entry + * for each one. Note that the expressions in the SortBy nodes are + * ignored (they are the raw versions of the transformed args); we are + * just looking at the sort information in the SortBy nodes. + */ + forboth(lc, aargs, lc2, aggorder) + { + Expr *arg = (Expr *) lfirst(lc); + SortBy *sortby = (SortBy *) lfirst(lc2); + TargetEntry *tle; - /* - * If we have DISTINCT, transform that to produce a distinctList. - */ - if (agg_distinct) + /* We don't bother to assign column names to the entries */ + tle = makeTargetEntry(arg, attno++, NULL, false); + tlist = lappend(tlist, tle); + + torder = addTargetToSortList(pstate, tle, + torder, tlist, sortby, + true /* fix unknowns */ ); + } + + /* Never any DISTINCT in an ordered-set agg */ + Assert(!agg_distinct); + } + else { - tdistinct = transformDistinctClause(pstate, &tlist, torder, true); + /* Regular aggregate, so it has no direct args */ + agg->aggdirectargs = NIL; + + /* + * Transform the plain list of Exprs into a targetlist. + */ + foreach(lc, args) + { + Expr *arg = (Expr *) lfirst(lc); + TargetEntry *tle; + + /* We don't bother to assign column names to the entries */ + tle = makeTargetEntry(arg, attno++, NULL, false); + tlist = lappend(tlist, tle); + } + + /* + * If we have an ORDER BY, transform it. This will add columns to the + * tlist if they appear in ORDER BY but weren't already in the arg + * list. They will be marked resjunk = true so we can tell them apart + * from regular aggregate arguments later. + * + * We need to mess with p_next_resno since it will be used to number + * any new targetlist entries. + */ + save_next_resno = pstate->p_next_resno; + pstate->p_next_resno = attno; + + torder = transformSortClause(pstate, + aggorder, + &tlist, + EXPR_KIND_ORDER_BY, + true /* fix unknowns */ , + true /* force SQL99 rules */ ); /* - * Remove this check if executor support for hashed distinct for - * aggregates is ever added. + * If we have DISTINCT, transform that to produce a distinctList. */ - foreach(lc, tdistinct) + if (agg_distinct) { - SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc); + tdistinct = transformDistinctClause(pstate, &tlist, torder, true); - if (!OidIsValid(sortcl->sortop)) + /* + * Remove this check if executor support for hashed distinct for + * aggregates is ever added. + */ + foreach(lc, tdistinct) { - Node *expr = get_sortgroupclause_expr(sortcl, tlist); - - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_FUNCTION), - errmsg("could not identify an ordering operator for type %s", - format_type_be(exprType(expr))), - errdetail("Aggregates with DISTINCT must be able to sort their inputs."), - parser_errposition(pstate, exprLocation(expr)))); + SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc); + + if (!OidIsValid(sortcl->sortop)) + { + Node *expr = get_sortgroupclause_expr(sortcl, tlist); + + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an ordering operator for type %s", + format_type_be(exprType(expr))), + errdetail("Aggregates with DISTINCT must be able to sort their inputs."), + parser_errposition(pstate, exprLocation(expr)))); + } } } + + pstate->p_next_resno = save_next_resno; } /* Update the Aggref with the transformation results */ @@ -154,13 +214,14 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, agg->aggorder = torder; agg->aggdistinct = tdistinct; - pstate->p_next_resno = save_next_resno; - /* * Check the arguments to compute the aggregate's level and detect * improper nesting. */ - min_varlevel = check_agg_arguments(pstate, agg->args, agg->aggfilter); + min_varlevel = check_agg_arguments(pstate, + agg->aggdirectargs, + agg->args, + agg->aggfilter); agg->agglevelsup = min_varlevel; /* Mark the correct pstate level as having aggregates */ @@ -302,8 +363,17 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, * one is its parent, etc). * * The aggregate's level is the same as the level of the lowest-level variable - * or aggregate in its arguments or filter expression; or if it contains no - * variables at all, we presume it to be local. + * or aggregate in its aggregated arguments (including any ORDER BY columns) + * or filter expression; or if it contains no variables at all, we presume it + * to be local. + * + * Vars/Aggs in direct arguments are *not* counted towards determining the + * agg's level, as those arguments aren't evaluated per-row but only + * per-group, and so in some sense aren't really agg arguments. However, + * this can mean that we decide an agg is upper-level even when its direct + * args contain lower-level Vars/Aggs, and that case has to be disallowed. + * (This is a little strange, but the SQL standard seems pretty definite that + * direct args are not to be considered when setting the agg's level.) * * We also take this opportunity to detect any aggregates or window functions * nested within the arguments. We can throw error immediately if we find @@ -312,7 +382,10 @@ transformAggregateCall(ParseState *pstate, Aggref *agg, * which we can't know until we finish scanning the arguments. */ static int -check_agg_arguments(ParseState *pstate, List *args, Expr *filter) +check_agg_arguments(ParseState *pstate, + List *directargs, + List *args, + Expr *filter) { int agglevel; check_agg_arguments_context context; @@ -337,8 +410,9 @@ check_agg_arguments(ParseState *pstate, List *args, Expr *filter) if (context.min_varlevel < 0) { if (context.min_agglevel < 0) - return 0; - agglevel = context.min_agglevel; + agglevel = 0; + else + agglevel = context.min_agglevel; } else if (context.min_agglevel < 0) agglevel = context.min_varlevel; @@ -349,12 +423,49 @@ check_agg_arguments(ParseState *pstate, List *args, Expr *filter) * If there's a nested aggregate of the same semantic level, complain. */ if (agglevel == context.min_agglevel) + { + int aggloc; + + aggloc = locate_agg_of_level((Node *) args, agglevel); + if (aggloc < 0) + aggloc = locate_agg_of_level((Node *) filter, agglevel); ereport(ERROR, (errcode(ERRCODE_GROUPING_ERROR), errmsg("aggregate function calls cannot be nested"), - parser_errposition(pstate, - locate_agg_of_level((Node *) args, - agglevel)))); + parser_errposition(pstate, aggloc))); + } + + /* + * Now check for vars/aggs in the direct arguments, and throw error if + * needed. Note that we allow a Var of the agg's semantic level, but not + * an Agg of that level. In principle such Aggs could probably be + * supported, but it would create an ordering dependency among the + * aggregates at execution time. Since the case appears neither to be + * required by spec nor particularly useful, we just treat it as a + * nested-aggregate situation. + */ + if (directargs) + { + context.min_varlevel = -1; + context.min_agglevel = -1; + (void) expression_tree_walker((Node *) directargs, + check_agg_arguments_walker, + (void *) &context); + if (context.min_varlevel >= 0 && context.min_varlevel < agglevel) + ereport(ERROR, + (errcode(ERRCODE_GROUPING_ERROR), + errmsg("outer-level aggregate cannot contain a lower-level variable in its direct arguments"), + parser_errposition(pstate, + locate_var_of_level((Node *) directargs, + context.min_varlevel)))); + if (context.min_agglevel >= 0 && context.min_agglevel <= agglevel) + ereport(ERROR, + (errcode(ERRCODE_GROUPING_ERROR), + errmsg("aggregate function calls cannot be nested"), + parser_errposition(pstate, + locate_agg_of_level((Node *) directargs, + context.min_agglevel)))); + } return agglevel; } @@ -442,6 +553,10 @@ transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc, /* * A window function call can't contain another one (but aggs are OK). XXX * is this required by spec, or just an unimplemented feature? + * + * Note: we don't need to check the filter expression here, because the + * context checks done below and in transformAggregateCall would have + * already rejected any window funcs or aggs within the filter. */ if (pstate->p_hasWindowFuncs && contain_windowfuncs((Node *) wfunc->args)) @@ -800,6 +915,7 @@ check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry, context.have_non_var_grouping = have_non_var_grouping; context.func_grouped_rels = func_grouped_rels; context.sublevels_up = 0; + context.in_agg_direct_args = false; check_ungrouped_columns_walker(node, &context); } @@ -815,16 +931,39 @@ check_ungrouped_columns_walker(Node *node, IsA(node, Param)) return false; /* constants are always acceptable */ - /* - * If we find an aggregate call of the original level, do not recurse into - * its arguments or filter; ungrouped vars there are not an error. We can - * also skip looking at aggregates of higher levels, since they could not - * possibly contain Vars of concern to us (see transformAggregateCall). - * We do need to look at aggregates of lower levels, however. - */ - if (IsA(node, Aggref) && - (int) ((Aggref *) node)->agglevelsup >= context->sublevels_up) - return false; + if (IsA(node, Aggref)) + { + Aggref *agg = (Aggref *) node; + + if ((int) agg->agglevelsup == context->sublevels_up) + { + /* + * If we find an aggregate call of the original level, do not + * recurse into its normal arguments, ORDER BY arguments, or + * filter; ungrouped vars there are not an error. But we should + * check direct arguments as though they weren't in an aggregate. + * We set a special flag in the context to help produce a useful + * error message for ungrouped vars in direct arguments. + */ + bool result; + + Assert(!context->in_agg_direct_args); + context->in_agg_direct_args = true; + result = check_ungrouped_columns_walker((Node *) agg->aggdirectargs, + context); + context->in_agg_direct_args = false; + return result; + } + + /* + * We can skip recursing into aggregates of higher levels altogether, + * since they could not possibly contain Vars of concern to us (see + * transformAggregateCall). We do need to look at aggregates of lower + * levels, however. + */ + if ((int) agg->agglevelsup > context->sublevels_up) + return false; + } /* * If we have any GROUP BY items that are not simple Vars, check to see if @@ -917,6 +1056,8 @@ check_ungrouped_columns_walker(Node *node, (errcode(ERRCODE_GROUPING_ERROR), errmsg("column \"%s.%s\" must appear in the GROUP BY clause or be used in an aggregate function", rte->eref->aliasname, attname), + context->in_agg_direct_args ? + errdetail("Direct arguments of an ordered-set aggregate must use only grouped columns.") : 0, parser_errposition(context->pstate, var->location))); else ereport(ERROR, @@ -944,6 +1085,93 @@ check_ungrouped_columns_walker(Node *node, } /* + * get_aggregate_argtypes + * Identify the specific datatypes passed to an aggregate call. + * + * Given an Aggref, extract the actual datatypes of the input arguments. + * The input datatypes are reported in a way that matches up with the + * aggregate's declaration, ie, any ORDER BY columns attached to a plain + * aggregate are ignored, but we report both direct and aggregated args of + * an ordered-set aggregate. + * + * Datatypes are returned into inputTypes[], which must reference an array + * of length FUNC_MAX_ARGS. + * + * The function result is the number of actual arguments. + */ +int +get_aggregate_argtypes(Aggref *aggref, Oid *inputTypes) +{ + int numArguments = 0; + ListCell *lc; + + /* Any direct arguments of an ordered-set aggregate come first */ + foreach(lc, aggref->aggdirectargs) + { + Node *expr = (Node *) lfirst(lc); + + inputTypes[numArguments] = exprType(expr); + numArguments++; + } + + /* Now get the regular (aggregated) arguments */ + foreach(lc, aggref->args) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + /* Ignore ordering columns of a plain aggregate */ + if (tle->resjunk) + continue; + + inputTypes[numArguments] = exprType((Node *) tle->expr); + numArguments++; + } + + return numArguments; +} + +/* + * resolve_aggregate_transtype + * Identify the transition state value's datatype for an aggregate call. + * + * This function resolves a polymorphic aggregate's state datatype. + * It must be passed the aggtranstype from the aggregate's catalog entry, + * as well as the actual argument types extracted by get_aggregate_argtypes. + * (We could fetch these values internally, but for all existing callers that + * would just duplicate work the caller has to do too, so we pass them in.) + */ +Oid +resolve_aggregate_transtype(Oid aggfuncid, + Oid aggtranstype, + Oid *inputTypes, + int numArguments) +{ + /* resolve actual type of transition state, if polymorphic */ + if (IsPolymorphicType(aggtranstype)) + { + /* have to fetch the agg's declared input types... */ + Oid *declaredArgTypes; + int agg_nargs; + + (void) get_func_signature(aggfuncid, &declaredArgTypes, &agg_nargs); + + /* + * VARIADIC ANY aggs could have more actual than declared args, but + * such extra args can't affect polymorphic type resolution. + */ + Assert(agg_nargs <= numArguments); + + aggtranstype = enforce_generic_type_consistency(inputTypes, + declaredArgTypes, + agg_nargs, + aggtranstype, + false); + pfree(declaredArgTypes); + } + return aggtranstype; +} + +/* * Create expression trees for the transition and final functions * of an aggregate. These are needed so that polymorphic functions * can be used within an aggregate --- without the expression trees, @@ -956,6 +1184,9 @@ check_ungrouped_columns_walker(Node *node, * resolved to actual types (ie, none should ever be ANYELEMENT etc). * agg_input_collation is the aggregate function's input collation. * + * For an ordered-set aggregate, remember that agg_input_types describes + * the direct arguments followed by the aggregated arguments. + * * transfn_oid and finalfn_oid identify the funcs to be called; the latter * may be InvalidOid. * @@ -965,6 +1196,8 @@ check_ungrouped_columns_walker(Node *node, void build_aggregate_fnexprs(Oid *agg_input_types, int agg_num_inputs, + int agg_num_direct_inputs, + bool agg_ordered_set, bool agg_variadic, Oid agg_state_type, Oid agg_result_type, @@ -995,7 +1228,7 @@ build_aggregate_fnexprs(Oid *agg_input_types, args = list_make1(argp); - for (i = 0; i < agg_num_inputs; i++) + for (i = agg_num_direct_inputs; i < agg_num_inputs; i++) { argp = makeNode(Param); argp->paramkind = PARAM_EXEC; @@ -1035,10 +1268,26 @@ build_aggregate_fnexprs(Oid *agg_input_types, argp->location = -1; args = list_make1(argp); + if (agg_ordered_set) + { + for (i = 0; i < agg_num_inputs; i++) + { + argp = makeNode(Param); + argp->paramkind = PARAM_EXEC; + argp->paramid = -1; + argp->paramtype = agg_input_types[i]; + argp->paramtypmod = -1; + argp->paramcollid = agg_input_collation; + argp->location = -1; + args = lappend(args, argp); + } + } + *finalfnexpr = (Expr *) makeFuncExpr(finalfn_oid, agg_result_type, args, InvalidOid, agg_input_collation, COERCE_EXPLICIT_CALL); + /* finalfn is currently never treated as variadic */ } diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index 87b0c8fd418..05ddb8c3e74 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -75,9 +75,6 @@ static TargetEntry *findTargetlistEntrySQL99(ParseState *pstate, Node *node, List **tlist, ParseExprKind exprKind); static int get_matching_location(int sortgroupref, List *sortgrouprefs, List *exprs); -static List *addTargetToSortList(ParseState *pstate, TargetEntry *tle, - List *sortlist, List *targetlist, SortBy *sortby, - bool resolveUnknown); static List *addTargetToGroupList(ParseState *pstate, TargetEntry *tle, List *grouplist, List *targetlist, int location, bool resolveUnknown); @@ -2177,7 +2174,7 @@ get_matching_location(int sortgroupref, List *sortgrouprefs, List *exprs) * * Returns the updated SortGroupClause list. */ -static List * +List * addTargetToSortList(ParseState *pstate, TargetEntry *tle, List *sortlist, List *targetlist, SortBy *sortby, bool resolveUnknown) diff --git a/src/backend/parser/parse_coerce.c b/src/backend/parser/parse_coerce.c index b6df2c60b46..efd483d8139 100644 --- a/src/backend/parser/parse_coerce.c +++ b/src/backend/parser/parse_coerce.c @@ -2009,6 +2009,10 @@ IsBinaryCoercible(Oid srctype, Oid targettype) if (srctype == targettype) return true; + /* Anything is coercible to ANY or ANYELEMENT */ + if (targettype == ANYOID || targettype == ANYELEMENTOID) + return true; + /* If srctype is a domain, reduce to its base type */ if (OidIsValid(srctype)) srctype = getBaseType(srctype); diff --git a/src/backend/parser/parse_collate.c b/src/backend/parser/parse_collate.c index c02f98acc71..f33fe3e305a 100644 --- a/src/backend/parser/parse_collate.c +++ b/src/backend/parser/parse_collate.c @@ -40,7 +40,9 @@ */ #include "postgres.h" +#include "catalog/pg_aggregate.h" #include "catalog/pg_collation.h" +#include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "parser/parse_collate.h" #include "utils/lsyscache.h" @@ -73,6 +75,18 @@ typedef struct static bool assign_query_collations_walker(Node *node, ParseState *pstate); static bool assign_collations_walker(Node *node, assign_collations_context *context); +static void merge_collation_state(Oid collation, + CollateStrength strength, + int location, + Oid collation2, + int location2, + assign_collations_context *context); +static void assign_aggregate_collations(Aggref *aggref, + assign_collations_context *loccontext); +static void assign_ordered_set_collations(Aggref *aggref, + assign_collations_context *loccontext); +static void assign_hypothetical_collations(Aggref *aggref, + assign_collations_context *loccontext); /* @@ -258,6 +272,9 @@ assign_collations_walker(Node *node, assign_collations_context *context) loccontext.collation = InvalidOid; loccontext.strength = COLLATE_NONE; loccontext.location = -1; + /* Set these fields just to suppress uninitialized-value warnings: */ + loccontext.collation2 = InvalidOid; + loccontext.location2 = -1; /* * Recurse if appropriate, then determine the collation for this node. @@ -570,40 +587,31 @@ assign_collations_walker(Node *node, assign_collations_context *context) case T_Aggref: { /* - * Aggref is a special case because expressions - * used only for ordering shouldn't be taken to - * conflict with each other or with regular args. - * So we apply assign_expr_collations() to them - * rather than passing down our loccontext. - * - * Note that we recurse to each TargetEntry, not - * directly to its contained expression, so that - * the case above for T_TargetEntry will apply - * appropriate checks to agg ORDER BY items. - * - * Likewise, we assign collations for the (bool) - * expression in aggfilter, independently of any - * other args. - * - * We need not recurse into the aggorder or - * aggdistinct lists, because those contain only - * SortGroupClause nodes which we need not - * process. + * Aggref is messy enough that we give it its own + * function, in fact three of them. The FILTER + * clause is independent of the rest of the + * aggregate, however, so it can be processed + * separately. */ Aggref *aggref = (Aggref *) node; - ListCell *lc; - foreach(lc, aggref->args) + switch (aggref->aggkind) { - TargetEntry *tle = (TargetEntry *) lfirst(lc); - - Assert(IsA(tle, TargetEntry)); - if (tle->resjunk) - assign_expr_collations(context->pstate, - (Node *) tle); - else - (void) assign_collations_walker((Node *) tle, + case AGGKIND_NORMAL: + assign_aggregate_collations(aggref, + &loccontext); + break; + case AGGKIND_ORDERED_SET: + assign_ordered_set_collations(aggref, &loccontext); + break; + case AGGKIND_HYPOTHETICAL: + assign_hypothetical_collations(aggref, + &loccontext); + break; + default: + elog(ERROR, "unrecognized aggkind: %d", + (int) aggref->aggkind); } assign_expr_collations(context->pstate, @@ -730,9 +738,33 @@ assign_collations_walker(Node *node, assign_collations_context *context) } /* - * Now, merge my information into my parent's state. If the collation - * strength for this node is different from what's already in *context, - * then this node either dominates or is dominated by earlier siblings. + * Now, merge my information into my parent's state. + */ + merge_collation_state(collation, + strength, + location, + loccontext.collation2, + loccontext.location2, + context); + + return false; +} + +/* + * Merge collation state of a subexpression into the context for its parent. + */ +static void +merge_collation_state(Oid collation, + CollateStrength strength, + int location, + Oid collation2, + int location2, + assign_collations_context *context) +{ + /* + * If the collation strength for this node is different from what's + * already in *context, then this node either dominates or is dominated by + * earlier siblings. */ if (strength > context->strength) { @@ -743,8 +775,8 @@ assign_collations_walker(Node *node, assign_collations_context *context) /* Bubble up error info if applicable */ if (strength == COLLATE_CONFLICT) { - context->collation2 = loccontext.collation2; - context->location2 = loccontext.location2; + context->collation2 = collation2; + context->location2 = location2; } } else if (strength == context->strength) @@ -805,6 +837,201 @@ assign_collations_walker(Node *node, assign_collations_context *context) break; } } +} - return false; +/* + * Aggref is a special case because expressions used only for ordering + * shouldn't be taken to conflict with each other or with regular args, + * indeed shouldn't affect the aggregate's result collation at all. + * We handle this by applying assign_expr_collations() to them rather than + * passing down our loccontext. + * + * Note that we recurse to each TargetEntry, not directly to its contained + * expression, so that the case above for T_TargetEntry will complain if we + * can't resolve a collation for an ORDER BY item (whether or not it is also + * a normal aggregate arg). + * + * We need not recurse into the aggorder or aggdistinct lists, because those + * contain only SortGroupClause nodes which we need not process. + */ +static void +assign_aggregate_collations(Aggref *aggref, + assign_collations_context *loccontext) +{ + ListCell *lc; + + /* Plain aggregates have no direct args */ + Assert(aggref->aggdirectargs == NIL); + + /* Process aggregated args, holding resjunk ones at arm's length */ + foreach(lc, aggref->args) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + Assert(IsA(tle, TargetEntry)); + if (tle->resjunk) + assign_expr_collations(loccontext->pstate, (Node *) tle); + else + (void) assign_collations_walker((Node *) tle, loccontext); + } +} + +/* + * For ordered-set aggregates, it's somewhat unclear how best to proceed. + * The spec-defined inverse distribution functions have only one sort column + * and don't return collatable types, but this is clearly too restrictive in + * the general case. Our solution is to consider that the aggregate's direct + * arguments contribute normally to determination of the aggregate's own + * collation, while aggregated arguments contribute only when the aggregate + * is designed to have exactly one aggregated argument (i.e., it has a single + * aggregated argument and is non-variadic). If it can have more than one + * aggregated argument, we process the aggregated arguments as independent + * sort columns. This avoids throwing error for something like + * agg(...) within group (order by x collate "foo", y collate "bar") + * while also guaranteeing that variadic aggregates don't change in behavior + * depending on how many sort columns a particular call happens to have. + * + * Otherwise this is much like the plain-aggregate case. + */ +static void +assign_ordered_set_collations(Aggref *aggref, + assign_collations_context *loccontext) +{ + bool merge_sort_collations; + ListCell *lc; + + /* Merge sort collations to parent only if there can be only one */ + merge_sort_collations = (list_length(aggref->args) == 1 && + get_func_variadictype(aggref->aggfnoid) == InvalidOid); + + /* Direct args, if any, are normal children of the Aggref node */ + (void) assign_collations_walker((Node *) aggref->aggdirectargs, + loccontext); + + /* Process aggregated args appropriately */ + foreach(lc, aggref->args) + { + TargetEntry *tle = (TargetEntry *) lfirst(lc); + + Assert(IsA(tle, TargetEntry)); + if (merge_sort_collations) + (void) assign_collations_walker((Node *) tle, loccontext); + else + assign_expr_collations(loccontext->pstate, (Node *) tle); + } +} + +/* + * Hypothetical-set aggregates are even more special: per spec, we need to + * unify the collations of each pair of hypothetical and aggregated args. + * And we need to force the choice of collation down into the sort column + * to ensure that the sort happens with the chosen collation. Other than + * that, the behavior is like regular ordered-set aggregates. Note that + * hypothetical direct arguments contribute to the aggregate collation + * only when their partner aggregated arguments do. + */ +static void +assign_hypothetical_collations(Aggref *aggref, + assign_collations_context *loccontext) +{ + ListCell *h_cell = list_head(aggref->aggdirectargs); + ListCell *s_cell = list_head(aggref->args); + bool merge_sort_collations; + int extra_args; + + /* Merge sort collations to parent only if there can be only one */ + merge_sort_collations = (list_length(aggref->args) == 1 && + get_func_variadictype(aggref->aggfnoid) == InvalidOid); + + /* Process any non-hypothetical direct args */ + extra_args = list_length(aggref->aggdirectargs) - list_length(aggref->args); + Assert(extra_args >= 0); + while (extra_args-- > 0) + { + (void) assign_collations_walker((Node *) lfirst(h_cell), loccontext); + h_cell = lnext(h_cell); + } + + /* Scan hypothetical args and aggregated args in parallel */ + while (h_cell && s_cell) + { + Node *h_arg = (Node *) lfirst(h_cell); + TargetEntry *s_tle = (TargetEntry *) lfirst(s_cell); + assign_collations_context paircontext; + + /* + * Assign collations internally in this pair of expressions, then + * choose a common collation for them. This should match + * select_common_collation(), but we can't use that function as-is + * because we need access to the whole collation state so we can + * bubble it up to the aggregate function's level. + */ + paircontext.pstate = loccontext->pstate; + paircontext.collation = InvalidOid; + paircontext.strength = COLLATE_NONE; + paircontext.location = -1; + /* Set these fields just to suppress uninitialized-value warnings: */ + paircontext.collation2 = InvalidOid; + paircontext.location2 = -1; + + (void) assign_collations_walker(h_arg, &paircontext); + (void) assign_collations_walker((Node *) s_tle->expr, &paircontext); + + /* deal with collation conflict */ + if (paircontext.strength == COLLATE_CONFLICT) + ereport(ERROR, + (errcode(ERRCODE_COLLATION_MISMATCH), + errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"", + get_collation_name(paircontext.collation), + get_collation_name(paircontext.collation2)), + errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."), + parser_errposition(paircontext.pstate, + paircontext.location2))); + + /* + * At this point paircontext.collation can be InvalidOid only if the + * type is not collatable; no need to do anything in that case. If we + * do have to change the sort column's collation, do it by inserting a + * RelabelType node into the sort column TLE. + * + * XXX This is pretty grotty for a couple of reasons: + * assign_collations_walker isn't supposed to be changing the + * expression structure like this, and a parse-time change of + * collation ought to be signaled by a CollateExpr not a RelabelType + * (the use of RelabelType for collation marking is supposed to be a + * planner/executor thing only). But we have no better alternative. + * In particular, injecting a CollateExpr could result in the + * expression being interpreted differently after dump/reload, since + * we might be effectively promoting an implicit collation to + * explicit. This kluge is relying on ruleutils.c not printing a + * COLLATE clause for a RelabelType, and probably on some other + * fragile behaviors. + */ + if (OidIsValid(paircontext.collation) && + paircontext.collation != exprCollation((Node *) s_tle->expr)) + { + s_tle->expr = (Expr *) + makeRelabelType(s_tle->expr, + exprType((Node *) s_tle->expr), + exprTypmod((Node *) s_tle->expr), + paircontext.collation, + COERCE_IMPLICIT_CAST); + } + + /* + * If appropriate, merge this column's collation state up to the + * aggregate function. + */ + if (merge_sort_collations) + merge_collation_state(paircontext.collation, + paircontext.strength, + paircontext.location, + paircontext.collation2, + paircontext.location2, + loccontext); + + h_cell = lnext(h_cell); + s_cell = lnext(s_cell); + } + Assert(h_cell == NULL && s_cell == NULL); } diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 68b711dfd9a..3a3489fcb36 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -463,8 +463,8 @@ transformIndirection(ParseState *pstate, Node *basenode, List *indirection) newresult = ParseFuncOrColumn(pstate, list_make1(n), list_make1(result), - NIL, NULL, false, false, false, - NULL, true, location); + NULL, + location); if (newresult == NULL) unknown_attribute(pstate, result, strVal(n), location); result = newresult; @@ -631,8 +631,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref) node = ParseFuncOrColumn(pstate, list_make1(makeString(colname)), list_make1(node), - NIL, NULL, false, false, false, - NULL, true, cref->location); + NULL, + cref->location); } break; } @@ -676,8 +676,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref) node = ParseFuncOrColumn(pstate, list_make1(makeString(colname)), list_make1(node), - NIL, NULL, false, false, false, - NULL, true, cref->location); + NULL, + cref->location); } break; } @@ -734,8 +734,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref) node = ParseFuncOrColumn(pstate, list_make1(makeString(colname)), list_make1(node), - NIL, NULL, false, false, false, - NULL, true, cref->location); + NULL, + cref->location); } break; } @@ -1242,7 +1242,6 @@ transformFuncCall(ParseState *pstate, FuncCall *fn) { List *targs; ListCell *args; - Expr *tagg_filter; /* Transform the list of arguments ... */ targs = NIL; @@ -1253,26 +1252,30 @@ transformFuncCall(ParseState *pstate, FuncCall *fn) } /* - * Transform the aggregate filter using transformWhereClause(), to which - * FILTER is virtually identical... + * When WITHIN GROUP is used, we treat its ORDER BY expressions as + * additional arguments to the function, for purposes of function lookup + * and argument type coercion. So, transform each such expression and add + * them to the targs list. We don't explicitly mark where each argument + * came from, but ParseFuncOrColumn can tell what's what by reference to + * list_length(fn->agg_order). */ - tagg_filter = NULL; - if (fn->agg_filter != NULL) - tagg_filter = (Expr *) - transformWhereClause(pstate, (Node *) fn->agg_filter, - EXPR_KIND_FILTER, "FILTER"); + if (fn->agg_within_group) + { + Assert(fn->agg_order != NIL); + foreach(args, fn->agg_order) + { + SortBy *arg = (SortBy *) lfirst(args); + + targs = lappend(targs, transformExpr(pstate, arg->node, + EXPR_KIND_ORDER_BY)); + } + } /* ... and hand off to ParseFuncOrColumn */ return ParseFuncOrColumn(pstate, fn->funcname, targs, - fn->agg_order, - tagg_filter, - fn->agg_star, - fn->agg_distinct, - fn->func_variadic, - fn->over, - false, + fn, fn->location); } diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c index ede36d159a3..6aaa73380e5 100644 --- a/src/backend/parser/parse_func.c +++ b/src/backend/parser/parse_func.c @@ -15,6 +15,7 @@ #include "postgres.h" #include "access/htup_details.h" +#include "catalog/pg_aggregate.h" #include "catalog/pg_proc.h" #include "catalog/pg_type.h" #include "funcapi.h" @@ -22,6 +23,7 @@ #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" #include "parser/parse_agg.h" +#include "parser/parse_clause.h" #include "parser/parse_coerce.h" #include "parser/parse_func.h" #include "parser/parse_relation.h" @@ -32,6 +34,9 @@ #include "utils/syscache.h" +static void unify_hypothetical_args(ParseState *pstate, + List *fargs, int numAggregatedArgs, + Oid *actual_arg_types, Oid *declared_arg_types); static Oid FuncNameAsType(List *funcname); static Node *ParseComplexProjection(ParseState *pstate, char *funcname, Node *first_arg, int location); @@ -47,24 +52,30 @@ static Node *ParseComplexProjection(ParseState *pstate, char *funcname, * a function of a single complex-type argument can be written like a * column reference, allowing functions to act like computed columns. * - * Hence, both cases come through here. The is_column parameter tells us - * which syntactic construct is actually being dealt with, but this is - * intended to be used only to deliver an appropriate error message, - * not to affect the semantics. When is_column is true, we should have - * a single argument (the putative table), unqualified function name - * equal to the column name, and no aggregate or variadic decoration. - * Also, when is_column is true, we return NULL on failure rather than + * Hence, both cases come through here. If fn is null, we're dealing with + * column syntax not function syntax, but in principle that should not + * affect the lookup behavior, only which error messages we deliver. + * The FuncCall struct is needed however to carry various decoration that + * applies to aggregate and window functions. + * + * Also, when fn is null, we return NULL on failure rather than * reporting a no-such-function error. * - * The argument expressions (in fargs) and filter must have been transformed - * already. But the agg_order expressions, if any, have not been. + * The argument expressions (in fargs) must have been transformed + * already. However, nothing in *fn has been transformed. */ Node * ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, - List *agg_order, Expr *agg_filter, - bool agg_star, bool agg_distinct, bool func_variadic, - WindowDef *over, bool is_column, int location) + FuncCall *fn, int location) { + bool is_column = (fn == NULL); + List *agg_order = (fn ? fn->agg_order : NIL); + Expr *agg_filter = NULL; + bool agg_within_group = (fn ? fn->agg_within_group : false); + bool agg_star = (fn ? fn->agg_star : false); + bool agg_distinct = (fn ? fn->agg_distinct : false); + bool func_variadic = (fn ? fn->func_variadic : false); + WindowDef *over = (fn ? fn->over : NULL); Oid rettype; Oid funcid; ListCell *l; @@ -81,6 +92,15 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, int nvargs; Oid vatype; FuncDetailCode fdresult; + char aggkind = 0; + + /* + * If there's an aggregate filter, transform it using transformWhereClause + */ + if (fn && fn->agg_filter != NULL) + agg_filter = (Expr *) transformWhereClause(pstate, fn->agg_filter, + EXPR_KIND_FILTER, + "FILTER"); /* * Most of the rest of the parser just assumes that functions do not have @@ -101,10 +121,12 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, * Extract arg type info in preparation for function lookup. * * If any arguments are Param markers of type VOID, we discard them from - * the parameter list. This is a hack to allow the JDBC driver to not - * have to distinguish "input" and "output" parameter symbols while - * parsing function-call constructs. We can't use foreach() because we - * may modify the list ... + * the parameter list. This is a hack to allow the JDBC driver to not have + * to distinguish "input" and "output" parameter symbols while parsing + * function-call constructs. Don't do this if dealing with column syntax, + * nor if we had WITHIN GROUP (because in that case it's critical to keep + * the argument count unchanged). We can't use foreach() because we may + * modify the list ... */ nargs = 0; for (l = list_head(fargs); l != NULL; l = nextl) @@ -114,7 +136,8 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, nextl = lnext(l); - if (argtype == VOIDOID && IsA(arg, Param) &&!is_column) + if (argtype == VOIDOID && IsA(arg, Param) && + !is_column && !agg_within_group) { fargs = list_delete_ptr(fargs, arg); continue; @@ -247,6 +270,12 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, errmsg("DISTINCT specified, but %s is not an aggregate function", NameListToString(funcname)), parser_errposition(pstate, location))); + if (agg_within_group) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("WITHIN GROUP specified, but %s is not an aggregate function", + NameListToString(funcname)), + parser_errposition(pstate, location))); if (agg_order != NIL) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -266,8 +295,181 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, NameListToString(funcname)), parser_errposition(pstate, location))); } - else if (!(fdresult == FUNCDETAIL_AGGREGATE || - fdresult == FUNCDETAIL_WINDOWFUNC)) + else if (fdresult == FUNCDETAIL_AGGREGATE) + { + /* + * It's an aggregate; fetch needed info from the pg_aggregate entry. + */ + HeapTuple tup; + Form_pg_aggregate classForm; + int catDirectArgs; + + tup = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(tup)) /* should not happen */ + elog(ERROR, "cache lookup failed for aggregate %u", funcid); + classForm = (Form_pg_aggregate) GETSTRUCT(tup); + aggkind = classForm->aggkind; + catDirectArgs = classForm->aggnumdirectargs; + ReleaseSysCache(tup); + + /* Now check various disallowed cases. */ + if (AGGKIND_IS_ORDERED_SET(aggkind)) + { + int numAggregatedArgs; + int numDirectArgs; + + if (!agg_within_group) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("WITHIN GROUP is required for ordered-set aggregate %s", + NameListToString(funcname)), + parser_errposition(pstate, location))); + if (over) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("OVER is not supported for ordered-set aggregate %s", + NameListToString(funcname)), + parser_errposition(pstate, location))); + /* gram.y rejects DISTINCT + WITHIN GROUP */ + Assert(!agg_distinct); + /* gram.y rejects VARIADIC + WITHIN GROUP */ + Assert(!func_variadic); + + /* + * Since func_get_detail was working with an undifferentiated list + * of arguments, it might have selected an aggregate that doesn't + * really match because it requires a different division of direct + * and aggregated arguments. Check that the number of direct + * arguments is actually OK; if not, throw an "undefined function" + * error, similarly to the case where a misplaced ORDER BY is used + * in a regular aggregate call. + */ + numAggregatedArgs = list_length(agg_order); + numDirectArgs = nargs - numAggregatedArgs; + Assert(numDirectArgs >= 0); + + if (!OidIsValid(vatype)) + { + /* Test is simple if aggregate isn't variadic */ + if (numDirectArgs != catDirectArgs) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(funcname, nargs, + argnames, + actual_arg_types)), + errhint("There is an ordered-set aggregate %s, but it requires %d direct arguments, not %d.", + NameListToString(funcname), + catDirectArgs, numDirectArgs), + parser_errposition(pstate, location))); + } + else + { + /* + * If it's variadic, we have two cases depending on whether + * the agg was "... ORDER BY VARIADIC" or "..., VARIADIC ORDER + * BY VARIADIC". It's the latter if catDirectArgs equals + * pronargs; to save a catalog lookup, we reverse-engineer + * pronargs from the info we got from func_get_detail. + */ + int pronargs; + + pronargs = nargs; + if (nvargs > 1) + pronargs -= nvargs - 1; + if (catDirectArgs < pronargs) + { + /* VARIADIC isn't part of direct args, so still easy */ + if (numDirectArgs != catDirectArgs) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(funcname, nargs, + argnames, + actual_arg_types)), + errhint("There is an ordered-set aggregate %s, but it requires %d direct arguments, not %d.", + NameListToString(funcname), + catDirectArgs, numDirectArgs), + parser_errposition(pstate, location))); + } + else + { + /* + * Both direct and aggregated args were declared variadic. + * For a standard ordered-set aggregate, it's okay as long + * as there aren't too few direct args. For a + * hypothetical-set aggregate, we assume that the + * hypothetical arguments are those that matched the + * variadic parameter; there must be just as many of them + * as there are aggregated arguments. + */ + if (aggkind == AGGKIND_HYPOTHETICAL) + { + if (nvargs != 2 * numAggregatedArgs) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(funcname, nargs, + argnames, + actual_arg_types)), + errhint("To use the hypothetical-set aggregate %s, the number of hypothetical direct arguments (here %d) must match the number of ordering columns (here %d).", + NameListToString(funcname), + nvargs - numAggregatedArgs, numAggregatedArgs), + parser_errposition(pstate, location))); + } + else + { + if (nvargs <= numAggregatedArgs) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("function %s does not exist", + func_signature_string(funcname, nargs, + argnames, + actual_arg_types)), + errhint("There is an ordered-set aggregate %s, but it requires at least %d direct arguments.", + NameListToString(funcname), + catDirectArgs), + parser_errposition(pstate, location))); + } + } + } + + /* Check type matching of hypothetical arguments */ + if (aggkind == AGGKIND_HYPOTHETICAL) + unify_hypothetical_args(pstate, fargs, numAggregatedArgs, + actual_arg_types, declared_arg_types); + } + else + { + /* Normal aggregate, so it can't have WITHIN GROUP */ + if (agg_within_group) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("%s is not an ordered-set aggregate, so it cannot have WITHIN GROUP", + NameListToString(funcname)), + parser_errposition(pstate, location))); + } + } + else if (fdresult == FUNCDETAIL_WINDOWFUNC) + { + /* + * True window functions must be called with a window definition. + */ + if (!over) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("window function %s requires an OVER clause", + NameListToString(funcname)), + parser_errposition(pstate, location))); + /* And, per spec, WITHIN GROUP isn't allowed */ + if (agg_within_group) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("window function %s cannot have WITHIN GROUP", + NameListToString(funcname)), + parser_errposition(pstate, location))); + } + else { /* * Oops. Time to die. @@ -290,7 +492,7 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, errhint("Could not choose a best candidate function. " "You might need to add explicit type casts."), parser_errposition(pstate, location))); - else if (list_length(agg_order) > 1) + else if (list_length(agg_order) > 1 && !agg_within_group) { /* It's agg(x, ORDER BY y,z) ... perhaps misplaced ORDER BY */ ereport(ERROR, @@ -424,10 +626,12 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, aggref->aggfnoid = funcid; aggref->aggtype = rettype; /* aggcollid and inputcollid will be set by parse_collate.c */ - /* args, aggorder, aggdistinct will be set by transformAggregateCall */ + /* aggdirectargs and args will be set by transformAggregateCall */ + /* aggorder and aggdistinct will be set by transformAggregateCall */ aggref->aggfilter = agg_filter; aggref->aggstar = agg_star; aggref->aggvariadic = func_variadic; + aggref->aggkind = aggkind; /* agglevelsup will be set by transformAggregateCall */ aggref->location = location; @@ -435,7 +639,7 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, * Reject attempt to call a parameterless aggregate without (*) * syntax. This is mere pedantry but some folks insisted ... */ - if (fargs == NIL && !agg_star) + if (fargs == NIL && !agg_star && !agg_within_group) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("%s(*) must be used to call a parameterless aggregate function", @@ -473,14 +677,8 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, /* window function */ WindowFunc *wfunc = makeNode(WindowFunc); - /* - * True window functions must be called with a window definition. - */ - if (!over) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("window function call requires an OVER clause"), - parser_errposition(pstate, location))); + Assert(over); /* lack of this was checked above */ + Assert(!agg_within_group); /* also checked above */ wfunc->winfnoid = funcid; wfunc->wintype = rettype; @@ -513,22 +711,21 @@ ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, parser_errposition(pstate, location))); /* - * Reject window functions which are not aggregates in the case of - * FILTER. + * ordered aggs not allowed in windows yet */ - if (!wfunc->winagg && agg_filter) + if (agg_order != NIL) ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("FILTER is not implemented in non-aggregate window functions"), + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("aggregate ORDER BY is not implemented for window functions"), parser_errposition(pstate, location))); /* - * ordered aggs not allowed in windows yet + * FILTER is not yet supported with true window functions */ - if (agg_order != NIL) + if (!wfunc->winagg && agg_filter) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("aggregate ORDER BY is not implemented for window functions"), + errmsg("FILTER is not implemented for non-aggregate window functions"), parser_errposition(pstate, location))); if (retset) @@ -1348,6 +1545,101 @@ func_get_detail(List *funcname, /* + * unify_hypothetical_args() + * + * Ensure that each hypothetical direct argument of a hypothetical-set + * aggregate has the same type as the corresponding aggregated argument. + * Modify the expressions in the fargs list, if necessary, and update + * actual_arg_types[]. + * + * If the agg declared its args non-ANY (even ANYELEMENT), we need only a + * sanity check that the declared types match; make_fn_arguments will coerce + * the actual arguments to match the declared ones. But if the declaration + * is ANY, nothing will happen in make_fn_arguments, so we need to fix any + * mismatch here. We use the same type resolution logic as UNION etc. + */ +static void +unify_hypothetical_args(ParseState *pstate, + List *fargs, + int numAggregatedArgs, + Oid *actual_arg_types, + Oid *declared_arg_types) +{ + Node *args[FUNC_MAX_ARGS]; + int numDirectArgs, + numNonHypotheticalArgs; + int i; + ListCell *lc; + + numDirectArgs = list_length(fargs) - numAggregatedArgs; + numNonHypotheticalArgs = numDirectArgs - numAggregatedArgs; + /* safety check (should only trigger with a misdeclared agg) */ + if (numNonHypotheticalArgs < 0) + elog(ERROR, "incorrect number of arguments to hypothetical-set aggregate"); + + /* Deconstruct fargs into an array for ease of subscripting */ + i = 0; + foreach(lc, fargs) + { + args[i++] = (Node *) lfirst(lc); + } + + /* Check each hypothetical arg and corresponding aggregated arg */ + for (i = numNonHypotheticalArgs; i < numDirectArgs; i++) + { + int aargpos = numDirectArgs + (i - numNonHypotheticalArgs); + Oid commontype; + + /* A mismatch means AggregateCreate didn't check properly ... */ + if (declared_arg_types[i] != declared_arg_types[aargpos]) + elog(ERROR, "hypothetical-set aggregate has inconsistent declared argument types"); + + /* No need to unify if make_fn_arguments will coerce */ + if (declared_arg_types[i] != ANYOID) + continue; + + /* + * Select common type, giving preference to the aggregated argument's + * type (we'd rather coerce the direct argument once than coerce all + * the aggregated values). + */ + commontype = select_common_type(pstate, + list_make2(args[aargpos], args[i]), + "WITHIN GROUP", + NULL); + + /* + * Perform the coercions. We don't need to worry about NamedArgExprs + * here because they aren't supported with aggregates. + */ + args[i] = coerce_type(pstate, + args[i], + actual_arg_types[i], + commontype, -1, + COERCION_IMPLICIT, + COERCE_IMPLICIT_CAST, + -1); + actual_arg_types[i] = commontype; + args[aargpos] = coerce_type(pstate, + args[aargpos], + actual_arg_types[aargpos], + commontype, -1, + COERCION_IMPLICIT, + COERCE_IMPLICIT_CAST, + -1); + actual_arg_types[aargpos] = commontype; + } + + /* Reconstruct fargs from array */ + i = 0; + foreach(lc, fargs) + { + lfirst(lc) = args[i++]; + } +} + + +/* * make_fn_arguments() * * Given the actual argument expressions for a function, and the desired diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 41a89823681..1ae9fa033a8 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -21,7 +21,8 @@ OBJS = acl.o arrayfuncs.o array_selfuncs.o array_typanalyze.o \ enum.o float.o format_type.o \ geo_ops.o geo_selfuncs.o int.o int8.o json.o jsonfuncs.o like.o \ lockfuncs.o misc.o nabstime.o name.o numeric.o numutils.o \ - oid.o oracle_compat.o pseudotypes.o rangetypes.o rangetypes_gist.o \ + oid.o oracle_compat.o orderedsetaggs.o \ + pseudotypes.o rangetypes.o rangetypes_gist.o \ rowtypes.o regexp.o regproc.o ruleutils.o selfuncs.o \ tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \ network.o mac.o inet_cidr_ntop.o inet_net_pton.o \ diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c new file mode 100644 index 00000000000..28a484fd7f4 --- /dev/null +++ b/src/backend/utils/adt/orderedsetaggs.c @@ -0,0 +1,1346 @@ +/*------------------------------------------------------------------------- + * + * orderedsetaggs.c + * Ordered-set aggregate functions. + * + * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/orderedsetaggs.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <math.h> + +#include "catalog/pg_aggregate.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_type.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/tlist.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/timestamp.h" +#include "utils/tuplesort.h" + + +/* + * Generic support for ordered-set aggregates + */ + +typedef struct OrderedSetAggState +{ + /* Aggref for this aggregate: */ + Aggref *aggref; + /* Sort object we're accumulating data in: */ + Tuplesortstate *sortstate; + /* Number of normal rows inserted into sortstate: */ + int64 number_of_rows; + + /* These fields are used only when accumulating tuples: */ + + /* Tuple descriptor for tuples inserted into sortstate: */ + TupleDesc tupdesc; + /* Tuple slot we can use for inserting/extracting tuples: */ + TupleTableSlot *tupslot; + + /* These fields are used only when accumulating datums: */ + + /* Info about datatype of datums being sorted: */ + Oid datumtype; + int16 typLen; + bool typByVal; + char typAlign; + /* Info about equality operator associated with sort operator: */ + Oid eqOperator; +} OrderedSetAggState; + +static void ordered_set_shutdown(Datum arg); + + +/* + * Set up working state for an ordered-set aggregate + */ +static OrderedSetAggState * +ordered_set_startup(FunctionCallInfo fcinfo, bool use_tuples) +{ + OrderedSetAggState *osastate; + Aggref *aggref; + ExprContext *peraggecontext; + MemoryContext aggcontext; + MemoryContext oldcontext; + List *sortlist; + int numSortCols; + + /* Must be called as aggregate; get the Agg node's query-lifespan context */ + if (AggCheckCallContext(fcinfo, &aggcontext) != AGG_CONTEXT_AGGREGATE) + elog(ERROR, "ordered-set aggregate called in non-aggregate context"); + /* Need the Aggref as well */ + aggref = AggGetAggref(fcinfo); + if (!aggref) + elog(ERROR, "ordered-set aggregate called in non-aggregate context"); + if (!AGGKIND_IS_ORDERED_SET(aggref->aggkind)) + elog(ERROR, "ordered-set aggregate support function called for non-ordered-set aggregate"); + /* Also get output exprcontext so we can register shutdown callback */ + peraggecontext = AggGetPerAggEContext(fcinfo); + if (!peraggecontext) + elog(ERROR, "ordered-set aggregate called in non-aggregate context"); + + /* Initialize working-state object in the aggregate-lifespan context */ + osastate = (OrderedSetAggState *) + MemoryContextAllocZero(aggcontext, sizeof(OrderedSetAggState)); + osastate->aggref = aggref; + + /* Extract the sort information */ + sortlist = aggref->aggorder; + numSortCols = list_length(sortlist); + + if (use_tuples) + { + bool ishypothetical = (aggref->aggkind == AGGKIND_HYPOTHETICAL); + AttrNumber *sortColIdx; + Oid *sortOperators; + Oid *sortCollations; + bool *sortNullsFirst; + ListCell *lc; + int i; + + if (ishypothetical) + numSortCols++; /* make space for flag column */ + /* these arrays are made in short-lived context */ + sortColIdx = (AttrNumber *) palloc(numSortCols * sizeof(AttrNumber)); + sortOperators = (Oid *) palloc(numSortCols * sizeof(Oid)); + sortCollations = (Oid *) palloc(numSortCols * sizeof(Oid)); + sortNullsFirst = (bool *) palloc(numSortCols * sizeof(bool)); + + i = 0; + foreach(lc, sortlist) + { + SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc); + TargetEntry *tle = get_sortgroupclause_tle(sortcl, aggref->args); + + /* the parser should have made sure of this */ + Assert(OidIsValid(sortcl->sortop)); + + sortColIdx[i] = tle->resno; + sortOperators[i] = sortcl->sortop; + sortCollations[i] = exprCollation((Node *) tle->expr); + sortNullsFirst[i] = sortcl->nulls_first; + i++; + } + + if (ishypothetical) + { + /* Add an integer flag column as the last sort column */ + sortColIdx[i] = list_length(aggref->args) + 1; + sortOperators[i] = Int4LessOperator; + sortCollations[i] = InvalidOid; + sortNullsFirst[i] = false; + i++; + } + + Assert(i == numSortCols); + + /* Now build the stuff we need in aggregate-lifespan context */ + oldcontext = MemoryContextSwitchTo(aggcontext); + + /* + * Get a tupledesc corresponding to the aggregated inputs (including + * sort expressions) of the agg. + */ + osastate->tupdesc = ExecTypeFromTL(aggref->args, false); + + /* If we need a flag column, hack the tupledesc to include that */ + if (ishypothetical) + { + TupleDesc newdesc; + int natts = osastate->tupdesc->natts; + + newdesc = CreateTemplateTupleDesc(natts + 1, false); + for (i = 1; i <= natts; i++) + TupleDescCopyEntry(newdesc, i, osastate->tupdesc, i); + + TupleDescInitEntry(newdesc, + (AttrNumber) ++natts, + "flag", + INT4OID, + -1, + 0); + + FreeTupleDesc(osastate->tupdesc); + osastate->tupdesc = newdesc; + } + + /* Initialize tuplesort object */ + osastate->sortstate = tuplesort_begin_heap(osastate->tupdesc, + numSortCols, + sortColIdx, + sortOperators, + sortCollations, + sortNullsFirst, + work_mem, false); + + /* Create slot we'll use to store/retrieve rows */ + osastate->tupslot = MakeSingleTupleTableSlot(osastate->tupdesc); + } + else + { + /* Sort single datums */ + SortGroupClause *sortcl; + TargetEntry *tle; + Oid sortColType; + Oid sortOperator; + Oid eqOperator; + Oid sortCollation; + bool sortNullsFirst; + + if (numSortCols != 1 || aggref->aggkind == AGGKIND_HYPOTHETICAL) + elog(ERROR, "ordered-set aggregate support function does not support multiple aggregated columns"); + + sortcl = (SortGroupClause *) linitial(sortlist); + tle = get_sortgroupclause_tle(sortcl, aggref->args); + + /* the parser should have made sure of this */ + Assert(OidIsValid(sortcl->sortop)); + + sortColType = exprType((Node *) tle->expr); + sortOperator = sortcl->sortop; + eqOperator = sortcl->eqop; + sortCollation = exprCollation((Node *) tle->expr); + sortNullsFirst = sortcl->nulls_first; + + /* Save datatype info */ + osastate->datumtype = sortColType; + get_typlenbyvalalign(sortColType, + &osastate->typLen, + &osastate->typByVal, + &osastate->typAlign); + osastate->eqOperator = eqOperator; + + /* Now build the stuff we need in aggregate-lifespan context */ + oldcontext = MemoryContextSwitchTo(aggcontext); + + /* Initialize tuplesort object */ + osastate->sortstate = tuplesort_begin_datum(sortColType, + sortOperator, + sortCollation, + sortNullsFirst, + work_mem, false); + } + + /* Now register a shutdown callback to clean it all up */ + RegisterExprContextCallback(peraggecontext, + ordered_set_shutdown, + PointerGetDatum(osastate)); + + MemoryContextSwitchTo(oldcontext); + + return osastate; +} + +/* + * Clean up when evaluation of an ordered-set aggregate is complete. + * + * We don't need to bother freeing objects in the aggcontext memory context, + * since that will get reset anyway by nodeAgg.c, but we should take care to + * release any potential non-memory resources. + * + * This callback is arguably unnecessary, since we don't support use of + * ordered-set aggs in AGG_HASHED mode and there is currently no non-error + * code path in non-hashed modes wherein nodeAgg.c won't call the finalfn + * after calling the transfn one or more times. So in principle we could rely + * on the finalfn to delete the tuplestore etc. However, it's possible that + * such a code path might exist in future, and in any case it'd be + * notationally tedious and sometimes require extra data copying to ensure + * we always delete the tuplestore in the finalfn. + */ +static void +ordered_set_shutdown(Datum arg) +{ + OrderedSetAggState *osastate = (OrderedSetAggState *) DatumGetPointer(arg); + + /* Tuplesort object might have temp files. */ + if (osastate->sortstate) + tuplesort_end(osastate->sortstate); + osastate->sortstate = NULL; + /* The tupleslot probably can't be holding a pin, but let's be safe. */ + if (osastate->tupslot) + ExecDropSingleTupleTableSlot(osastate->tupslot); + osastate->tupslot = NULL; +} + + +/* + * Generic transition function for ordered-set aggregates + * with a single input column in which we want to suppress nulls + */ +Datum +ordered_set_transition(PG_FUNCTION_ARGS) +{ + OrderedSetAggState *osastate; + + /* If first call, create the transition state workspace */ + if (PG_ARGISNULL(0)) + osastate = ordered_set_startup(fcinfo, false); + else + { + /* safety check */ + if (AggCheckCallContext(fcinfo, NULL) != AGG_CONTEXT_AGGREGATE) + elog(ERROR, "ordered-set aggregate called in non-aggregate context"); + osastate = (OrderedSetAggState *) PG_GETARG_POINTER(0); + } + + /* Load the datum into the tuplesort object, but only if it's not null */ + if (!PG_ARGISNULL(1)) + { + tuplesort_putdatum(osastate->sortstate, PG_GETARG_DATUM(1), false); + osastate->number_of_rows++; + } + + PG_RETURN_POINTER(osastate); +} + +/* + * Generic transition function for ordered-set aggregates + * with (potentially) multiple aggregated input columns + */ +Datum +ordered_set_transition_multi(PG_FUNCTION_ARGS) +{ + OrderedSetAggState *osastate; + TupleTableSlot *slot; + int nargs; + int i; + + /* If first call, create the transition state workspace */ + if (PG_ARGISNULL(0)) + osastate = ordered_set_startup(fcinfo, true); + else + { + /* safety check */ + if (AggCheckCallContext(fcinfo, NULL) != AGG_CONTEXT_AGGREGATE) + elog(ERROR, "ordered-set aggregate called in non-aggregate context"); + osastate = (OrderedSetAggState *) PG_GETARG_POINTER(0); + } + + /* Form a tuple from all the other inputs besides the transition value */ + slot = osastate->tupslot; + ExecClearTuple(slot); + nargs = PG_NARGS() - 1; + for (i = 0; i < nargs; i++) + { + slot->tts_values[i] = PG_GETARG_DATUM(i + 1); + slot->tts_isnull[i] = PG_ARGISNULL(i + 1); + } + if (osastate->aggref->aggkind == AGGKIND_HYPOTHETICAL) + { + /* Add a zero flag value to mark this row as a normal input row */ + slot->tts_values[i] = Int32GetDatum(0); + slot->tts_isnull[i] = false; + i++; + } + Assert(i == slot->tts_tupleDescriptor->natts); + ExecStoreVirtualTuple(slot); + + /* Load the row into the tuplesort object */ + tuplesort_puttupleslot(osastate->sortstate, slot); + osastate->number_of_rows++; + + PG_RETURN_POINTER(osastate); +} + + +/* + * percentile_disc(float8) within group(anyelement) - discrete percentile + */ +Datum +percentile_disc_final(PG_FUNCTION_ARGS) +{ + OrderedSetAggState *osastate; + double percentile; + Datum val; + bool isnull; + int64 rownum; + + /* safety check */ + if (AggCheckCallContext(fcinfo, NULL) != AGG_CONTEXT_AGGREGATE) + elog(ERROR, "ordered-set aggregate called in non-aggregate context"); + + /* Get and check the percentile argument */ + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); + + percentile = PG_GETARG_FLOAT8(1); + + if (percentile < 0 || percentile > 1 || isnan(percentile)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("percentile value %g is not between 0 and 1", + percentile))); + + /* If there were no regular rows, the result is NULL */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + osastate = (OrderedSetAggState *) PG_GETARG_POINTER(0); + + /* number_of_rows could be zero if we only saw NULL input values */ + if (osastate->number_of_rows == 0) + PG_RETURN_NULL(); + + /* Finish the sort */ + tuplesort_performsort(osastate->sortstate); + + /*---------- + * We need the smallest K such that (K/N) >= percentile. + * N>0, therefore K >= N*percentile, therefore K = ceil(N*percentile). + * So we skip K-1 rows (if K>0) and return the next row fetched. + *---------- + */ + rownum = (int64) ceil(percentile * osastate->number_of_rows); + Assert(rownum <= osastate->number_of_rows); + + if (rownum > 1) + { + if (!tuplesort_skiptuples(osastate->sortstate, rownum - 1, true)) + elog(ERROR, "missing row in percentile_disc"); + } + + if (!tuplesort_getdatum(osastate->sortstate, true, &val, &isnull)) + elog(ERROR, "missing row in percentile_disc"); + + /* + * Note: we *cannot* clean up the tuplesort object here, because the value + * to be returned is allocated inside its sortcontext. We could use + * datumCopy to copy it out of there, but it doesn't seem worth the + * trouble, since the cleanup callback will clear the tuplesort later. + */ + + /* We shouldn't have stored any nulls, but do the right thing anyway */ + if (isnull) + PG_RETURN_NULL(); + else + PG_RETURN_DATUM(val); +} + + +/* + * For percentile_cont, we need a way to interpolate between consecutive + * values. Use a helper function for that, so that we can share the rest + * of the code between types. + */ +typedef Datum (*LerpFunc) (Datum lo, Datum hi, double pct); + +static Datum +float8_lerp(Datum lo, Datum hi, double pct) +{ + double loval = DatumGetFloat8(lo); + double hival = DatumGetFloat8(hi); + + return Float8GetDatum(loval + (pct * (hival - loval))); +} + +static Datum +interval_lerp(Datum lo, Datum hi, double pct) +{ + Datum diff_result = DirectFunctionCall2(interval_mi, hi, lo); + Datum mul_result = DirectFunctionCall2(interval_mul, + diff_result, + Float8GetDatumFast(pct)); + + return DirectFunctionCall2(interval_pl, mul_result, lo); +} + +/* + * Continuous percentile + */ +static Datum +percentile_cont_final_common(FunctionCallInfo fcinfo, + Oid expect_type, + LerpFunc lerpfunc) +{ + OrderedSetAggState *osastate; + double percentile; + int64 first_row = 0; + int64 second_row = 0; + Datum val; + Datum first_val; + Datum second_val; + double proportion; + bool isnull; + + /* safety check */ + if (AggCheckCallContext(fcinfo, NULL) != AGG_CONTEXT_AGGREGATE) + elog(ERROR, "ordered-set aggregate called in non-aggregate context"); + + /* Get and check the percentile argument */ + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); + + percentile = PG_GETARG_FLOAT8(1); + + if (percentile < 0 || percentile > 1 || isnan(percentile)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("percentile value %g is not between 0 and 1", + percentile))); + + /* If there were no regular rows, the result is NULL */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + osastate = (OrderedSetAggState *) PG_GETARG_POINTER(0); + + /* number_of_rows could be zero if we only saw NULL input values */ + if (osastate->number_of_rows == 0) + PG_RETURN_NULL(); + + Assert(expect_type == osastate->datumtype); + + /* Finish the sort */ + tuplesort_performsort(osastate->sortstate); + + first_row = floor(percentile * (osastate->number_of_rows - 1)); + second_row = ceil(percentile * (osastate->number_of_rows - 1)); + + Assert(first_row < osastate->number_of_rows); + + if (!tuplesort_skiptuples(osastate->sortstate, first_row, true)) + elog(ERROR, "missing row in percentile_cont"); + + if (!tuplesort_getdatum(osastate->sortstate, true, &first_val, &isnull)) + elog(ERROR, "missing row in percentile_cont"); + if (isnull) + PG_RETURN_NULL(); + + if (first_row == second_row) + { + val = first_val; + } + else + { + if (!tuplesort_getdatum(osastate->sortstate, true, &second_val, &isnull)) + elog(ERROR, "missing row in percentile_cont"); + + if (isnull) + PG_RETURN_NULL(); + + proportion = (percentile * (osastate->number_of_rows - 1)) - first_row; + val = lerpfunc(first_val, second_val, proportion); + } + + /* + * Note: we *cannot* clean up the tuplesort object here, because the value + * to be returned may be allocated inside its sortcontext. We could use + * datumCopy to copy it out of there, but it doesn't seem worth the + * trouble, since the cleanup callback will clear the tuplesort later. + */ + + if (isnull) + PG_RETURN_NULL(); + else + PG_RETURN_DATUM(val); +} + +/* + * percentile_cont(float8) within group (float8) - continuous percentile + */ +Datum +percentile_cont_float8_final(PG_FUNCTION_ARGS) +{ + return percentile_cont_final_common(fcinfo, FLOAT8OID, float8_lerp); +} + +/* + * percentile_cont(float8) within group (interval) - continuous percentile + */ +Datum +percentile_cont_interval_final(PG_FUNCTION_ARGS) +{ + return percentile_cont_final_common(fcinfo, INTERVALOID, interval_lerp); +} + + +/* + * Support code for handling arrays of percentiles + * + * Note: in each pct_info entry, second_row should be equal to or + * exactly one more than first_row. + */ +struct pct_info +{ + int64 first_row; /* first row to sample */ + int64 second_row; /* possible second row to sample */ + double proportion; /* interpolation fraction */ + int idx; /* index of this item in original array */ +}; + +/* + * Sort comparator to sort pct_infos by first_row then second_row + */ +static int +pct_info_cmp(const void *pa, const void *pb) +{ + const struct pct_info *a = (const struct pct_info *) pa; + const struct pct_info *b = (const struct pct_info *) pb; + + if (a->first_row != b->first_row) + return (a->first_row < b->first_row) ? -1 : 1; + if (a->second_row != b->second_row) + return (a->second_row < b->second_row) ? -1 : 1; + return 0; +} + +/* + * Construct array showing which rows to sample for percentiles. + */ +static struct pct_info * +setup_pct_info(int num_percentiles, + Datum *percentiles_datum, + bool *percentiles_null, + int64 rowcount, + bool continuous) +{ + struct pct_info *pct_info; + int i; + + pct_info = (struct pct_info *) palloc(num_percentiles * sizeof(struct pct_info)); + + for (i = 0; i < num_percentiles; i++) + { + pct_info[i].idx = i; + + if (percentiles_null[i]) + { + /* dummy entry for any NULL in array */ + pct_info[i].first_row = 0; + pct_info[i].second_row = 0; + pct_info[i].proportion = 0; + } + else + { + double p = DatumGetFloat8(percentiles_datum[i]); + + if (p < 0 || p > 1 || isnan(p)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("percentile value %g is not between 0 and 1", + p))); + + if (continuous) + { + pct_info[i].first_row = 1 + floor(p * (rowcount - 1)); + pct_info[i].second_row = 1 + ceil(p * (rowcount - 1)); + pct_info[i].proportion = (p * (rowcount - 1)) - floor(p * (rowcount - 1)); + } + else + { + /*---------- + * We need the smallest K such that (K/N) >= percentile. + * N>0, therefore K >= N*percentile, therefore + * K = ceil(N*percentile); but not less than 1. + *---------- + */ + int64 row = (int64) ceil(p * rowcount); + + row = Max(1, row); + pct_info[i].first_row = row; + pct_info[i].second_row = row; + pct_info[i].proportion = 0; + } + } + } + + /* + * The parameter array wasn't necessarily in sorted order, but we need to + * visit the rows in order, so sort by first_row/second_row. + */ + qsort(pct_info, num_percentiles, sizeof(struct pct_info), pct_info_cmp); + + return pct_info; +} + +/* + * percentile_disc(float8[]) within group (anyelement) - discrete percentiles + */ +Datum +percentile_disc_multi_final(PG_FUNCTION_ARGS) +{ + OrderedSetAggState *osastate; + ArrayType *param; + Datum *percentiles_datum; + bool *percentiles_null; + int num_percentiles; + struct pct_info *pct_info; + Datum *result_datum; + bool *result_isnull; + int64 rownum = 0; + Datum val = (Datum) 0; + bool isnull = true; + int i; + + /* safety check */ + if (AggCheckCallContext(fcinfo, NULL) != AGG_CONTEXT_AGGREGATE) + elog(ERROR, "ordered-set aggregate called in non-aggregate context"); + + /* If there were no regular rows, the result is NULL */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + osastate = (OrderedSetAggState *) PG_GETARG_POINTER(0); + + /* number_of_rows could be zero if we only saw NULL input values */ + if (osastate->number_of_rows == 0) + PG_RETURN_NULL(); + + /* Deconstruct the percentile-array input */ + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); + param = PG_GETARG_ARRAYTYPE_P(1); + + deconstruct_array(param, FLOAT8OID, + /* hard-wired info on type float8 */ + 8, FLOAT8PASSBYVAL, 'd', + &percentiles_datum, + &percentiles_null, + &num_percentiles); + + if (num_percentiles == 0) + PG_RETURN_POINTER(construct_empty_array(osastate->datumtype)); + + pct_info = setup_pct_info(num_percentiles, + percentiles_datum, + percentiles_null, + osastate->number_of_rows, + false); + + result_datum = (Datum *) palloc(num_percentiles * sizeof(Datum)); + result_isnull = (bool *) palloc(num_percentiles * sizeof(bool)); + + /* + * Start by dealing with any nulls in the param array - those are sorted + * to the front on row=0, so set the corresponding result indexes to null + */ + for (i = 0; i < num_percentiles; i++) + { + int idx = pct_info[i].idx; + + if (pct_info[i].first_row > 0) + break; + + result_datum[idx] = (Datum) 0; + result_isnull[idx] = true; + } + + /* + * If there's anything left after doing the nulls, then grind the input + * and extract the needed values + */ + if (i < num_percentiles) + { + /* Finish the sort */ + tuplesort_performsort(osastate->sortstate); + + for (; i < num_percentiles; i++) + { + int64 target_row = pct_info[i].first_row; + int idx = pct_info[i].idx; + + /* Advance to target row, if not already there */ + if (target_row > rownum) + { + if (!tuplesort_skiptuples(osastate->sortstate, target_row - rownum - 1, true)) + elog(ERROR, "missing row in percentile_disc"); + + if (!tuplesort_getdatum(osastate->sortstate, true, &val, &isnull)) + elog(ERROR, "missing row in percentile_disc"); + + rownum = target_row; + } + + result_datum[idx] = val; + result_isnull[idx] = isnull; + } + } + + /* + * We could clean up the tuplesort object after forming the array, but + * probably not worth the trouble. + */ + + /* We make the output array the same shape as the input */ + PG_RETURN_POINTER(construct_md_array(result_datum, result_isnull, + ARR_NDIM(param), + ARR_DIMS(param), + ARR_LBOUND(param), + osastate->datumtype, + osastate->typLen, + osastate->typByVal, + osastate->typAlign)); +} + +/* + * percentile_cont(float8[]) within group () - continuous percentiles + */ +static Datum +percentile_cont_multi_final_common(FunctionCallInfo fcinfo, + Oid expect_type, + int16 typLen, bool typByVal, char typAlign, + LerpFunc lerpfunc) +{ + OrderedSetAggState *osastate; + ArrayType *param; + Datum *percentiles_datum; + bool *percentiles_null; + int num_percentiles; + struct pct_info *pct_info; + Datum *result_datum; + bool *result_isnull; + int64 rownum = 0; + Datum first_val = (Datum) 0; + Datum second_val = (Datum) 0; + bool isnull; + int i; + + /* safety check */ + if (AggCheckCallContext(fcinfo, NULL) != AGG_CONTEXT_AGGREGATE) + elog(ERROR, "ordered-set aggregate called in non-aggregate context"); + + /* If there were no regular rows, the result is NULL */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + osastate = (OrderedSetAggState *) PG_GETARG_POINTER(0); + + /* number_of_rows could be zero if we only saw NULL input values */ + if (osastate->number_of_rows == 0) + PG_RETURN_NULL(); + + Assert(expect_type == osastate->datumtype); + + /* Deconstruct the percentile-array input */ + if (PG_ARGISNULL(1)) + PG_RETURN_NULL(); + param = PG_GETARG_ARRAYTYPE_P(1); + + deconstruct_array(param, FLOAT8OID, + /* hard-wired info on type float8 */ + 8, FLOAT8PASSBYVAL, 'd', + &percentiles_datum, + &percentiles_null, + &num_percentiles); + + if (num_percentiles == 0) + PG_RETURN_POINTER(construct_empty_array(osastate->datumtype)); + + pct_info = setup_pct_info(num_percentiles, + percentiles_datum, + percentiles_null, + osastate->number_of_rows, + true); + + result_datum = (Datum *) palloc(num_percentiles * sizeof(Datum)); + result_isnull = (bool *) palloc(num_percentiles * sizeof(bool)); + + /* + * Start by dealing with any nulls in the param array - those are sorted + * to the front on row=0, so set the corresponding result indexes to null + */ + for (i = 0; i < num_percentiles; i++) + { + int idx = pct_info[i].idx; + + if (pct_info[i].first_row > 0) + break; + + result_datum[idx] = (Datum) 0; + result_isnull[idx] = true; + } + + /* + * If there's anything left after doing the nulls, then grind the input + * and extract the needed values + */ + if (i < num_percentiles) + { + /* Finish the sort */ + tuplesort_performsort(osastate->sortstate); + + for (; i < num_percentiles; i++) + { + int64 target_row = pct_info[i].first_row; + bool need_lerp = (pct_info[i].second_row > target_row); + int idx = pct_info[i].idx; + + /* Advance to first_row, if not already there */ + if (target_row > rownum) + { + if (!tuplesort_skiptuples(osastate->sortstate, target_row - rownum - 1, true)) + elog(ERROR, "missing row in percentile_cont"); + + if (!tuplesort_getdatum(osastate->sortstate, true, &first_val, &isnull) || isnull) + elog(ERROR, "missing row in percentile_cont"); + + rownum = target_row; + } + else + { + /* + * We are already at the target row, so we must previously + * have read its value into second_val. + */ + first_val = second_val; + } + + /* Fetch second_row if needed */ + if (need_lerp) + { + if (!tuplesort_getdatum(osastate->sortstate, true, &second_val, &isnull) || isnull) + elog(ERROR, "missing row in percentile_cont"); + rownum++; + } + else + second_val = first_val; + + /* Compute appropriate result */ + if (need_lerp) + result_datum[idx] = lerpfunc(first_val, second_val, + pct_info[i].proportion); + else + result_datum[idx] = first_val; + + result_isnull[idx] = false; + } + } + + /* + * We could clean up the tuplesort object after forming the array, but + * probably not worth the trouble. + */ + + /* We make the output array the same shape as the input */ + PG_RETURN_POINTER(construct_md_array(result_datum, result_isnull, + ARR_NDIM(param), + ARR_DIMS(param), ARR_LBOUND(param), + expect_type, + typLen, + typByVal, + typAlign)); +} + +/* + * percentile_cont(float8[]) within group (float8) - continuous percentiles + */ +Datum +percentile_cont_float8_multi_final(PG_FUNCTION_ARGS) +{ + return percentile_cont_multi_final_common(fcinfo, + FLOAT8OID, + /* hard-wired info on type float8 */ + 8, FLOAT8PASSBYVAL, 'd', + float8_lerp); +} + +/* + * percentile_cont(float8[]) within group (interval) - continuous percentiles + */ +Datum +percentile_cont_interval_multi_final(PG_FUNCTION_ARGS) +{ + return percentile_cont_multi_final_common(fcinfo, + INTERVALOID, + /* hard-wired info on type interval */ + 16, false, 'd', + interval_lerp); +} + + +/* + * mode() within group (anyelement) - most common value + */ +Datum +mode_final(PG_FUNCTION_ARGS) +{ + OrderedSetAggState *osastate; + Datum val; + bool isnull; + Datum mode_val = (Datum) 0; + int64 mode_freq = 0; + Datum last_val = (Datum) 0; + int64 last_val_freq = 0; + bool last_val_is_mode = false; + FmgrInfo equalfn; + bool shouldfree; + + /* safety check */ + if (AggCheckCallContext(fcinfo, NULL) != AGG_CONTEXT_AGGREGATE) + elog(ERROR, "ordered-set aggregate called in non-aggregate context"); + + /* If there were no regular rows, the result is NULL */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + osastate = (OrderedSetAggState *) PG_GETARG_POINTER(0); + + /* number_of_rows could be zero if we only saw NULL input values */ + if (osastate->number_of_rows == 0) + PG_RETURN_NULL(); + + /* Look up the equality function for the datatype */ + fmgr_info(get_opcode(osastate->eqOperator), &equalfn); + + shouldfree = !(osastate->typByVal); + + /* Finish the sort */ + tuplesort_performsort(osastate->sortstate); + + /* Scan tuples and count frequencies */ + while (tuplesort_getdatum(osastate->sortstate, true, &val, &isnull)) + { + /* we don't expect any nulls, but ignore them if found */ + if (isnull) + continue; + + if (last_val_freq == 0) + { + /* first nonnull value - it's the mode for now */ + mode_val = last_val = val; + mode_freq = last_val_freq = 1; + last_val_is_mode = true; + } + else if (DatumGetBool(FunctionCall2(&equalfn, val, last_val))) + { + /* value equal to previous value, count it */ + if (last_val_is_mode) + mode_freq++; /* needn't maintain last_val_freq */ + else if (++last_val_freq > mode_freq) + { + /* last_val becomes new mode */ + if (shouldfree) + pfree(DatumGetPointer(mode_val)); + mode_val = last_val; + mode_freq = last_val_freq; + last_val_is_mode = true; + } + if (shouldfree) + pfree(DatumGetPointer(val)); + } + else + { + /* val should replace last_val */ + if (shouldfree && !last_val_is_mode) + pfree(DatumGetPointer(last_val)); + last_val = val; + last_val_freq = 1; + last_val_is_mode = false; + } + + CHECK_FOR_INTERRUPTS(); + } + + if (shouldfree && !last_val_is_mode) + pfree(DatumGetPointer(last_val)); + + /* + * Note: we *cannot* clean up the tuplesort object here, because the value + * to be returned is allocated inside its sortcontext. We could use + * datumCopy to copy it out of there, but it doesn't seem worth the + * trouble, since the cleanup callback will clear the tuplesort later. + */ + + if (mode_freq) + PG_RETURN_DATUM(mode_val); + else + PG_RETURN_NULL(); +} + + +/* + * Common code to sanity-check args for hypothetical-set functions. No need + * for friendly errors, these can only happen if someone's messing up the + * aggregate definitions. The checks are needed for security, however. + */ +static void +hypothetical_check_argtypes(FunctionCallInfo fcinfo, int nargs, + TupleDesc tupdesc) +{ + int i; + + /* check that we have an int4 flag column */ + if (!tupdesc || + (nargs + 1) != tupdesc->natts || + tupdesc->attrs[nargs]->atttypid != INT4OID) + elog(ERROR, "type mismatch in hypothetical-set function"); + + /* check that direct args match in type with aggregated args */ + for (i = 0; i < nargs; i++) + { + if (get_fn_expr_argtype(fcinfo->flinfo, i + 1) != tupdesc->attrs[i]->atttypid) + elog(ERROR, "type mismatch in hypothetical-set function"); + } +} + +/* + * compute rank of hypothetical row + * + * flag should be -1 to sort hypothetical row ahead of its peers, or +1 + * to sort behind. + * total number of regular rows is returned into *number_of_rows. + */ +static int64 +hypothetical_rank_common(FunctionCallInfo fcinfo, int flag, + int64 *number_of_rows) +{ + int nargs = PG_NARGS() - 1; + int64 rank = 1; + OrderedSetAggState *osastate; + TupleTableSlot *slot; + int i; + + /* safety check */ + if (AggCheckCallContext(fcinfo, NULL) != AGG_CONTEXT_AGGREGATE) + elog(ERROR, "ordered-set aggregate called in non-aggregate context"); + + /* If there were no regular rows, the rank is always 1 */ + if (PG_ARGISNULL(0)) + { + *number_of_rows = 0; + return 1; + } + + osastate = (OrderedSetAggState *) PG_GETARG_POINTER(0); + *number_of_rows = osastate->number_of_rows; + + /* Adjust nargs to be the number of direct (or aggregated) args */ + if (nargs % 2 != 0) + elog(ERROR, "wrong number of arguments in hypothetical-set function"); + nargs /= 2; + + hypothetical_check_argtypes(fcinfo, nargs, osastate->tupdesc); + + /* insert the hypothetical row into the sort */ + slot = osastate->tupslot; + ExecClearTuple(slot); + for (i = 0; i < nargs; i++) + { + slot->tts_values[i] = PG_GETARG_DATUM(i + 1); + slot->tts_isnull[i] = PG_ARGISNULL(i + 1); + } + slot->tts_values[i] = Int32GetDatum(flag); + slot->tts_isnull[i] = false; + ExecStoreVirtualTuple(slot); + + tuplesort_puttupleslot(osastate->sortstate, slot); + + /* finish the sort */ + tuplesort_performsort(osastate->sortstate); + + /* iterate till we find the hypothetical row */ + while (tuplesort_gettupleslot(osastate->sortstate, true, slot)) + { + bool isnull; + Datum d = slot_getattr(slot, nargs + 1, &isnull); + + if (!isnull && DatumGetInt32(d) != 0) + break; + + rank++; + + CHECK_FOR_INTERRUPTS(); + } + + ExecClearTuple(slot); + + /* Might as well clean up the tuplesort object immediately */ + tuplesort_end(osastate->sortstate); + osastate->sortstate = NULL; + + return rank; +} + + +/* + * rank() - rank of hypothetical row + */ +Datum +hypothetical_rank_final(PG_FUNCTION_ARGS) +{ + int64 rank; + int64 rowcount; + + rank = hypothetical_rank_common(fcinfo, -1, &rowcount); + + PG_RETURN_INT64(rank); +} + +/* + * percent_rank() - percentile rank of hypothetical row + */ +Datum +hypothetical_percent_rank_final(PG_FUNCTION_ARGS) +{ + int64 rank; + int64 rowcount; + double result_val; + + rank = hypothetical_rank_common(fcinfo, -1, &rowcount); + + if (rowcount == 0) + PG_RETURN_FLOAT8(0); + + result_val = (double) (rank - 1) / (double) (rowcount); + + PG_RETURN_FLOAT8(result_val); +} + +/* + * cume_dist() - cumulative distribution of hypothetical row + */ +Datum +hypothetical_cume_dist_final(PG_FUNCTION_ARGS) +{ + int64 rank; + int64 rowcount; + double result_val; + + rank = hypothetical_rank_common(fcinfo, 1, &rowcount); + + result_val = (double) (rank) / (double) (rowcount + 1); + + PG_RETURN_FLOAT8(result_val); +} + +/* + * dense_rank() - rank of hypothetical row without gaps in ranking + */ +Datum +hypothetical_dense_rank_final(PG_FUNCTION_ARGS) +{ + int nargs = PG_NARGS() - 1; + int64 rank = 1; + int64 duplicate_count = 0; + OrderedSetAggState *osastate; + List *sortlist; + int numDistinctCols; + AttrNumber *sortColIdx; + FmgrInfo *equalfns; + TupleTableSlot *slot; + TupleTableSlot *extraslot; + TupleTableSlot *slot2; + MemoryContext tmpcontext; + ListCell *lc; + int i; + + /* safety check */ + if (AggCheckCallContext(fcinfo, NULL) != AGG_CONTEXT_AGGREGATE) + elog(ERROR, "ordered-set aggregate called in non-aggregate context"); + + /* If there were no regular rows, the rank is always 1 */ + if (PG_ARGISNULL(0)) + PG_RETURN_INT64(rank); + + osastate = (OrderedSetAggState *) PG_GETARG_POINTER(0); + + /* Adjust nargs to be the number of direct (or aggregated) args */ + if (nargs % 2 != 0) + elog(ERROR, "wrong number of arguments in hypothetical-set function"); + nargs /= 2; + + hypothetical_check_argtypes(fcinfo, nargs, osastate->tupdesc); + + /* + * Construct list of columns to compare for uniqueness. We can omit the + * flag column since we will only compare rows with flag == 0. + */ + sortlist = osastate->aggref->aggorder; + numDistinctCols = list_length(sortlist); + sortColIdx = (AttrNumber *) palloc(numDistinctCols * sizeof(AttrNumber)); + equalfns = (FmgrInfo *) palloc(numDistinctCols * sizeof(FmgrInfo)); + + i = 0; + foreach(lc, sortlist) + { + SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc); + TargetEntry *tle = get_sortgroupclause_tle(sortcl, + osastate->aggref->args); + + sortColIdx[i] = tle->resno; + fmgr_info(get_opcode(sortcl->eqop), &equalfns[i]); + i++; + } + + /* Get short-term context we can use for execTuplesMatch */ + tmpcontext = AggGetPerTupleEContext(fcinfo)->ecxt_per_tuple_memory; + + /* insert the hypothetical row into the sort */ + slot = osastate->tupslot; + ExecClearTuple(slot); + for (i = 0; i < nargs; i++) + { + slot->tts_values[i] = PG_GETARG_DATUM(i + 1); + slot->tts_isnull[i] = PG_ARGISNULL(i + 1); + } + slot->tts_values[i] = Int32GetDatum(-1); + slot->tts_isnull[i] = false; + ExecStoreVirtualTuple(slot); + + tuplesort_puttupleslot(osastate->sortstate, slot); + + /* finish the sort */ + tuplesort_performsort(osastate->sortstate); + + /* + * We alternate fetching into osastate->tupslot and extraslot so that we + * have the previous row available for comparisons. This is accomplished + * by swapping the slot pointer variables after each row. + */ + extraslot = MakeSingleTupleTableSlot(osastate->tupdesc); + slot2 = extraslot; + + /* iterate till we find the hypothetical row */ + while (tuplesort_gettupleslot(osastate->sortstate, true, slot)) + { + bool isnull; + Datum d = slot_getattr(slot, nargs + 1, &isnull); + TupleTableSlot *tmpslot; + + if (!isnull && DatumGetInt32(d) != 0) + break; + + /* count non-distinct tuples */ + if (!TupIsNull(slot2) && + execTuplesMatch(slot, slot2, + numDistinctCols, + sortColIdx, + equalfns, + tmpcontext)) + duplicate_count++; + + tmpslot = slot2; + slot2 = slot; + slot = tmpslot; + + rank++; + + CHECK_FOR_INTERRUPTS(); + } + + ExecClearTuple(slot); + ExecClearTuple(slot2); + + ExecDropSingleTupleTableSlot(extraslot); + + /* Might as well clean up the tuplesort object immediately */ + tuplesort_end(osastate->sortstate); + osastate->sortstate = NULL; + + rank = rank - duplicate_count; + + PG_RETURN_INT64(rank); +} diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index 917130fd668..2433437749b 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -3586,8 +3586,7 @@ ri_HashCompareOp(Oid eq_opr, Oid typeid) * special cases such as RECORD; find_coercion_pathway * currently doesn't subsume these special cases. */ - if (!IsPolymorphicType(lefttype) && - !IsBinaryCoercible(typeid, lefttype)) + if (!IsBinaryCoercible(typeid, lefttype)) elog(ERROR, "no conversion function from %s to %s", format_type_be(typeid), format_type_be(lefttype)); diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 86c0a582539..0d7cc8b76a0 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -22,6 +22,7 @@ #include "access/sysattr.h" #include "catalog/dependency.h" #include "catalog/indexing.h" +#include "catalog/pg_aggregate.h" #include "catalog/pg_authid.h" #include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" @@ -40,6 +41,7 @@ #include "nodes/nodeFuncs.h" #include "optimizer/tlist.h" #include "parser/keywords.h" +#include "parser/parse_agg.h" #include "parser/parse_func.h" #include "parser/parse_oper.h" #include "parser/parser.h" @@ -2166,6 +2168,7 @@ print_function_arguments(StringInfo buf, HeapTuple proctup, Oid *argtypes; char **argnames; char *argmodes; + int insertorderbyat = -1; int argsprinted; int inputargno; int nlackdefaults; @@ -2199,6 +2202,23 @@ print_function_arguments(StringInfo buf, HeapTuple proctup, } } + /* Check for special treatment of ordered-set aggregates */ + if (proc->proisagg) + { + HeapTuple aggtup; + Form_pg_aggregate agg; + + aggtup = SearchSysCache1(AGGFNOID, + ObjectIdGetDatum(HeapTupleGetOid(proctup))); + if (!HeapTupleIsValid(aggtup)) + elog(ERROR, "cache lookup failed for aggregate %u", + HeapTupleGetOid(proctup)); + agg = (Form_pg_aggregate) GETSTRUCT(aggtup); + if (AGGKIND_IS_ORDERED_SET(agg->aggkind)) + insertorderbyat = agg->aggnumdirectargs; + ReleaseSysCache(aggtup); + } + argsprinted = 0; inputargno = 0; for (i = 0; i < numargs; i++) @@ -2243,8 +2263,15 @@ print_function_arguments(StringInfo buf, HeapTuple proctup, if (print_table_args != (argmode == PROARGMODE_TABLE)) continue; - if (argsprinted) + if (argsprinted == insertorderbyat) + { + if (argsprinted) + appendStringInfoChar(buf, ' '); + appendStringInfoString(buf, "ORDER BY "); + } + else if (argsprinted) appendStringInfoString(buf, ", "); + appendStringInfoString(buf, modename); if (argname && argname[0]) appendStringInfo(buf, "%s ", quote_identifier(argname)); @@ -2261,6 +2288,14 @@ print_function_arguments(StringInfo buf, HeapTuple proctup, deparse_expression(expr, NIL, false, false)); } argsprinted++; + + /* nasty hack: print the last arg twice for variadic ordered-set agg */ + if (argsprinted == insertorderbyat && i == numargs - 1) + { + i--; + /* aggs shouldn't have defaults anyway, but just to be sure ... */ + print_defaults = false; + } } return argsprinted; @@ -7493,31 +7528,13 @@ get_agg_expr(Aggref *aggref, deparse_context *context) { StringInfo buf = context->buf; Oid argtypes[FUNC_MAX_ARGS]; - List *arglist; int nargs; bool use_variadic; - ListCell *l; - - /* Extract the regular arguments, ignoring resjunk stuff for the moment */ - arglist = NIL; - nargs = 0; - foreach(l, aggref->args) - { - TargetEntry *tle = (TargetEntry *) lfirst(l); - Node *arg = (Node *) tle->expr; - Assert(!IsA(arg, NamedArgExpr)); - if (tle->resjunk) - continue; - if (nargs >= FUNC_MAX_ARGS) /* paranoia */ - ereport(ERROR, - (errcode(ERRCODE_TOO_MANY_ARGUMENTS), - errmsg("too many arguments"))); - argtypes[nargs] = exprType(arg); - arglist = lappend(arglist, arg); - nargs++; - } + /* Extract the argument types as seen by the parser */ + nargs = get_aggregate_argtypes(aggref, argtypes); + /* Print the aggregate name, schema-qualified if needed */ appendStringInfo(buf, "%s(%s", generate_function_name(aggref->aggfnoid, nargs, NIL, argtypes, @@ -7525,26 +7542,51 @@ get_agg_expr(Aggref *aggref, deparse_context *context) &use_variadic), (aggref->aggdistinct != NIL) ? "DISTINCT " : ""); - /* aggstar can be set only in zero-argument aggregates */ - if (aggref->aggstar) - appendStringInfoChar(buf, '*'); + if (AGGKIND_IS_ORDERED_SET(aggref->aggkind)) + { + /* + * Ordered-set aggregates do not use "*" syntax. Also, we needn't + * worry about inserting VARIADIC. So we can just dump the direct + * args as-is. + */ + Assert(!aggref->aggvariadic); + get_rule_expr((Node *) aggref->aggdirectargs, context, true); + Assert(aggref->aggorder != NIL); + appendStringInfoString(buf, ") WITHIN GROUP (ORDER BY "); + get_rule_orderby(aggref->aggorder, aggref->args, false, context); + } else { - nargs = 0; - foreach(l, arglist) + /* aggstar can be set only in zero-argument aggregates */ + if (aggref->aggstar) + appendStringInfoChar(buf, '*'); + else { - if (nargs++ > 0) - appendStringInfoString(buf, ", "); - if (use_variadic && lnext(l) == NULL) - appendStringInfoString(buf, "VARIADIC "); - get_rule_expr((Node *) lfirst(l), context, true); + ListCell *l; + int i; + + i = 0; + foreach(l, aggref->args) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + Node *arg = (Node *) tle->expr; + + Assert(!IsA(arg, NamedArgExpr)); + if (tle->resjunk) + continue; + if (i++ > 0) + appendStringInfoString(buf, ", "); + if (use_variadic && i == nargs) + appendStringInfoString(buf, "VARIADIC "); + get_rule_expr(arg, context, true); + } } - } - if (aggref->aggorder != NIL) - { - appendStringInfoString(buf, " ORDER BY "); - get_rule_orderby(aggref->aggorder, aggref->args, false, context); + if (aggref->aggorder != NIL) + { + appendStringInfoString(buf, " ORDER BY "); + get_rule_orderby(aggref->aggorder, aggref->args, false, context); + } } if (aggref->aggfilter != NULL) diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c index 586596258d3..5cc122b6867 100644 --- a/src/backend/utils/cache/lsyscache.c +++ b/src/backend/utils/cache/lsyscache.c @@ -1493,6 +1493,25 @@ get_func_signature(Oid funcid, Oid **argtypes, int *nargs) } /* + * get_func_variadictype + * Given procedure id, return the function's provariadic field. + */ +Oid +get_func_variadictype(Oid funcid) +{ + HeapTuple tp; + Oid result; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for function %u", funcid); + + result = ((Form_pg_proc) GETSTRUCT(tp))->provariadic; + ReleaseSysCache(tp); + return result; +} + +/* * get_func_retset * Given procedure id, return the function's proretset flag. */ diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index ea8af9f79b8..6221f6db7ed 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -1718,6 +1718,69 @@ tuplesort_getdatum(Tuplesortstate *state, bool forward, } /* + * Advance over N tuples in either forward or back direction, + * without returning any data. N==0 is a no-op. + * Returns TRUE if successful, FALSE if ran out of tuples. + */ +bool +tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples, bool forward) +{ + /* + * We don't actually support backwards skip yet, because no callers need + * it. The API is designed to allow for that later, though. + */ + Assert(forward); + Assert(ntuples >= 0); + + switch (state->status) + { + case TSS_SORTEDINMEM: + if (state->memtupcount - state->current >= ntuples) + { + state->current += ntuples; + return true; + } + state->current = state->memtupcount; + state->eof_reached = true; + + /* + * Complain if caller tries to retrieve more tuples than + * originally asked for in a bounded sort. This is because + * returning EOF here might be the wrong thing. + */ + if (state->bounded && state->current >= state->bound) + elog(ERROR, "retrieved too many tuples in a bounded sort"); + + return false; + + case TSS_SORTEDONTAPE: + case TSS_FINALMERGE: + + /* + * We could probably optimize these cases better, but for now it's + * not worth the trouble. + */ + while (ntuples-- > 0) + { + SortTuple stup; + bool should_free; + + if (!tuplesort_gettuple_common(state, forward, + &stup, &should_free)) + return false; + if (should_free) + pfree(stup.tuple); + CHECK_FOR_INTERRUPTS(); + } + return true; + + default: + elog(ERROR, "invalid tuplesort state"); + return false; /* keep compiler quiet */ + } +} + +/* * tuplesort_merge_order - report merge order we'll use for given memory * (note: "merge order" just means the number of input tapes in the merge). * diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 837e7844312..1d639fbd474 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -11512,6 +11512,7 @@ dumpAgg(Archive *fout, AggInfo *agginfo) int i_aggtransfn; int i_aggfinalfn; int i_aggsortop; + int i_hypothetical; int i_aggtranstype; int i_aggtransspace; int i_agginitval; @@ -11519,6 +11520,7 @@ dumpAgg(Archive *fout, AggInfo *agginfo) const char *aggtransfn; const char *aggfinalfn; const char *aggsortop; + bool hypothetical; const char *aggtranstype; const char *aggtransspace; const char *agginitval; @@ -11543,6 +11545,7 @@ dumpAgg(Archive *fout, AggInfo *agginfo) appendPQExpBuffer(query, "SELECT aggtransfn, " "aggfinalfn, aggtranstype::pg_catalog.regtype, " "aggsortop::pg_catalog.regoperator, " + "(aggkind = 'h') as hypothetical, " "aggtransspace, agginitval, " "'t'::boolean AS convertok, " "pg_catalog.pg_get_function_arguments(p.oid) AS funcargs, " @@ -11557,6 +11560,7 @@ dumpAgg(Archive *fout, AggInfo *agginfo) appendPQExpBuffer(query, "SELECT aggtransfn, " "aggfinalfn, aggtranstype::pg_catalog.regtype, " "aggsortop::pg_catalog.regoperator, " + "false as hypothetical, " "0 AS aggtransspace, agginitval, " "'t'::boolean AS convertok, " "pg_catalog.pg_get_function_arguments(p.oid) AS funcargs, " @@ -11571,6 +11575,7 @@ dumpAgg(Archive *fout, AggInfo *agginfo) appendPQExpBuffer(query, "SELECT aggtransfn, " "aggfinalfn, aggtranstype::pg_catalog.regtype, " "aggsortop::pg_catalog.regoperator, " + "false as hypothetical, " "0 AS aggtransspace, agginitval, " "'t'::boolean AS convertok " "FROM pg_catalog.pg_aggregate a, pg_catalog.pg_proc p " @@ -11583,6 +11588,7 @@ dumpAgg(Archive *fout, AggInfo *agginfo) appendPQExpBuffer(query, "SELECT aggtransfn, " "aggfinalfn, aggtranstype::pg_catalog.regtype, " "0 AS aggsortop, " + "'f'::boolean as hypothetical, " "0 AS aggtransspace, agginitval, " "'t'::boolean AS convertok " "FROM pg_catalog.pg_aggregate a, pg_catalog.pg_proc p " @@ -11595,6 +11601,7 @@ dumpAgg(Archive *fout, AggInfo *agginfo) appendPQExpBuffer(query, "SELECT aggtransfn, aggfinalfn, " "format_type(aggtranstype, NULL) AS aggtranstype, " "0 AS aggsortop, " + "'f'::boolean as hypothetical, " "0 AS aggtransspace, agginitval, " "'t'::boolean AS convertok " "FROM pg_aggregate " @@ -11607,6 +11614,7 @@ dumpAgg(Archive *fout, AggInfo *agginfo) "aggfinalfn, " "(SELECT typname FROM pg_type WHERE oid = aggtranstype1) AS aggtranstype, " "0 AS aggsortop, " + "'f'::boolean as hypothetical, " "0 AS aggtransspace, agginitval1 AS agginitval, " "(aggtransfn2 = 0 and aggtranstype2 = 0 and agginitval2 is null) AS convertok " "FROM pg_aggregate " @@ -11619,6 +11627,7 @@ dumpAgg(Archive *fout, AggInfo *agginfo) i_aggtransfn = PQfnumber(res, "aggtransfn"); i_aggfinalfn = PQfnumber(res, "aggfinalfn"); i_aggsortop = PQfnumber(res, "aggsortop"); + i_hypothetical = PQfnumber(res, "hypothetical"); i_aggtranstype = PQfnumber(res, "aggtranstype"); i_aggtransspace = PQfnumber(res, "aggtransspace"); i_agginitval = PQfnumber(res, "agginitval"); @@ -11627,6 +11636,7 @@ dumpAgg(Archive *fout, AggInfo *agginfo) aggtransfn = PQgetvalue(res, 0, i_aggtransfn); aggfinalfn = PQgetvalue(res, 0, i_aggfinalfn); aggsortop = PQgetvalue(res, 0, i_aggsortop); + hypothetical = (PQgetvalue(res, 0, i_hypothetical)[0] == 't'); aggtranstype = PQgetvalue(res, 0, i_aggtranstype); aggtransspace = PQgetvalue(res, 0, i_aggtransspace); agginitval = PQgetvalue(res, 0, i_agginitval); @@ -11707,6 +11717,9 @@ dumpAgg(Archive *fout, AggInfo *agginfo) aggsortop); } + if (hypothetical) + appendPQExpBufferStr(details, ",\n HYPOTHETICAL"); + /* * DROP must be fully qualified in case same name appears in pg_catalog */ @@ -11743,7 +11756,7 @@ dumpAgg(Archive *fout, AggInfo *agginfo) /* * Since there is no GRANT ON AGGREGATE syntax, we have to make the ACL * command look like a function's GRANT; in particular this affects the - * syntax for zero-argument aggregates. + * syntax for zero-argument aggregates and ordered-set aggregates. */ free(aggsig); free(aggsig_tag); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index b7610e8c3f8..e8b60bda7e1 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201312131 +#define CATALOG_VERSION_NO 201312231 #endif diff --git a/src/include/catalog/pg_aggregate.h b/src/include/catalog/pg_aggregate.h index 034456a315d..f0eaf60a09b 100644 --- a/src/include/catalog/pg_aggregate.h +++ b/src/include/catalog/pg_aggregate.h @@ -28,6 +28,8 @@ * cpp turns this into typedef struct FormData_pg_aggregate * * aggfnoid pg_proc OID of the aggregate itself + * aggkind aggregate kind, see AGGKIND_ categories below + * aggnumdirectargs number of arguments that are "direct" arguments * aggtransfn transition function * aggfinalfn final function (0 if none) * aggsortop associated sort operator (0 if none) @@ -41,6 +43,8 @@ CATALOG(pg_aggregate,2600) BKI_WITHOUT_OIDS { regproc aggfnoid; + char aggkind; + int16 aggnumdirectargs; regproc aggtransfn; regproc aggfinalfn; Oid aggsortop; @@ -64,14 +68,31 @@ typedef FormData_pg_aggregate *Form_pg_aggregate; * ---------------- */ -#define Natts_pg_aggregate 7 -#define Anum_pg_aggregate_aggfnoid 1 -#define Anum_pg_aggregate_aggtransfn 2 -#define Anum_pg_aggregate_aggfinalfn 3 -#define Anum_pg_aggregate_aggsortop 4 -#define Anum_pg_aggregate_aggtranstype 5 -#define Anum_pg_aggregate_aggtransspace 6 -#define Anum_pg_aggregate_agginitval 7 +#define Natts_pg_aggregate 9 +#define Anum_pg_aggregate_aggfnoid 1 +#define Anum_pg_aggregate_aggkind 2 +#define Anum_pg_aggregate_aggnumdirectargs 3 +#define Anum_pg_aggregate_aggtransfn 4 +#define Anum_pg_aggregate_aggfinalfn 5 +#define Anum_pg_aggregate_aggsortop 6 +#define Anum_pg_aggregate_aggtranstype 7 +#define Anum_pg_aggregate_aggtransspace 8 +#define Anum_pg_aggregate_agginitval 9 + +/* + * Symbolic values for aggkind column. We distinguish normal aggregates + * from ordered-set aggregates (which have two sets of arguments, namely + * direct and aggregated arguments) and from hypothetical-set aggregates + * (which are a subclass of ordered-set aggregates in which the last + * direct arguments have to match up in number and datatypes with the + * aggregated arguments). + */ +#define AGGKIND_NORMAL 'n' +#define AGGKIND_ORDERED_SET 'o' +#define AGGKIND_HYPOTHETICAL 'h' + +/* Use this macro to test for "ordered-set agg including hypothetical case" */ +#define AGGKIND_IS_ORDERED_SET(kind) ((kind) != AGGKIND_NORMAL) /* ---------------- @@ -80,175 +101,192 @@ typedef FormData_pg_aggregate *Form_pg_aggregate; */ /* avg */ -DATA(insert ( 2100 int8_avg_accum numeric_avg 0 2281 128 _null_ )); -DATA(insert ( 2101 int4_avg_accum int8_avg 0 1016 0 "{0,0}" )); -DATA(insert ( 2102 int2_avg_accum int8_avg 0 1016 0 "{0,0}" )); -DATA(insert ( 2103 numeric_avg_accum numeric_avg 0 2281 128 _null_ )); -DATA(insert ( 2104 float4_accum float8_avg 0 1022 0 "{0,0,0}" )); -DATA(insert ( 2105 float8_accum float8_avg 0 1022 0 "{0,0,0}" )); -DATA(insert ( 2106 interval_accum interval_avg 0 1187 0 "{0 second,0 second}" )); +DATA(insert ( 2100 n 0 int8_avg_accum numeric_avg 0 2281 128 _null_ )); +DATA(insert ( 2101 n 0 int4_avg_accum int8_avg 0 1016 0 "{0,0}" )); +DATA(insert ( 2102 n 0 int2_avg_accum int8_avg 0 1016 0 "{0,0}" )); +DATA(insert ( 2103 n 0 numeric_avg_accum numeric_avg 0 2281 128 _null_ )); +DATA(insert ( 2104 n 0 float4_accum float8_avg 0 1022 0 "{0,0,0}" )); +DATA(insert ( 2105 n 0 float8_accum float8_avg 0 1022 0 "{0,0,0}" )); +DATA(insert ( 2106 n 0 interval_accum interval_avg 0 1187 0 "{0 second,0 second}" )); /* sum */ -DATA(insert ( 2107 int8_avg_accum numeric_sum 0 2281 128 _null_ )); -DATA(insert ( 2108 int4_sum - 0 20 0 _null_ )); -DATA(insert ( 2109 int2_sum - 0 20 0 _null_ )); -DATA(insert ( 2110 float4pl - 0 700 0 _null_ )); -DATA(insert ( 2111 float8pl - 0 701 0 _null_ )); -DATA(insert ( 2112 cash_pl - 0 790 0 _null_ )); -DATA(insert ( 2113 interval_pl - 0 1186 0 _null_ )); -DATA(insert ( 2114 numeric_avg_accum numeric_sum 0 2281 128 _null_ )); +DATA(insert ( 2107 n 0 int8_avg_accum numeric_sum 0 2281 128 _null_ )); +DATA(insert ( 2108 n 0 int4_sum - 0 20 0 _null_ )); +DATA(insert ( 2109 n 0 int2_sum - 0 20 0 _null_ )); +DATA(insert ( 2110 n 0 float4pl - 0 700 0 _null_ )); +DATA(insert ( 2111 n 0 float8pl - 0 701 0 _null_ )); +DATA(insert ( 2112 n 0 cash_pl - 0 790 0 _null_ )); +DATA(insert ( 2113 n 0 interval_pl - 0 1186 0 _null_ )); +DATA(insert ( 2114 n 0 numeric_avg_accum numeric_sum 0 2281 128 _null_ )); /* max */ -DATA(insert ( 2115 int8larger - 413 20 0 _null_ )); -DATA(insert ( 2116 int4larger - 521 23 0 _null_ )); -DATA(insert ( 2117 int2larger - 520 21 0 _null_ )); -DATA(insert ( 2118 oidlarger - 610 26 0 _null_ )); -DATA(insert ( 2119 float4larger - 623 700 0 _null_ )); -DATA(insert ( 2120 float8larger - 674 701 0 _null_ )); -DATA(insert ( 2121 int4larger - 563 702 0 _null_ )); -DATA(insert ( 2122 date_larger - 1097 1082 0 _null_ )); -DATA(insert ( 2123 time_larger - 1112 1083 0 _null_ )); -DATA(insert ( 2124 timetz_larger - 1554 1266 0 _null_ )); -DATA(insert ( 2125 cashlarger - 903 790 0 _null_ )); -DATA(insert ( 2126 timestamp_larger - 2064 1114 0 _null_ )); -DATA(insert ( 2127 timestamptz_larger - 1324 1184 0 _null_ )); -DATA(insert ( 2128 interval_larger - 1334 1186 0 _null_ )); -DATA(insert ( 2129 text_larger - 666 25 0 _null_ )); -DATA(insert ( 2130 numeric_larger - 1756 1700 0 _null_ )); -DATA(insert ( 2050 array_larger - 1073 2277 0 _null_ )); -DATA(insert ( 2244 bpchar_larger - 1060 1042 0 _null_ )); -DATA(insert ( 2797 tidlarger - 2800 27 0 _null_ )); -DATA(insert ( 3526 enum_larger - 3519 3500 0 _null_ )); +DATA(insert ( 2115 n 0 int8larger - 413 20 0 _null_ )); +DATA(insert ( 2116 n 0 int4larger - 521 23 0 _null_ )); +DATA(insert ( 2117 n 0 int2larger - 520 21 0 _null_ )); +DATA(insert ( 2118 n 0 oidlarger - 610 26 0 _null_ )); +DATA(insert ( 2119 n 0 float4larger - 623 700 0 _null_ )); +DATA(insert ( 2120 n 0 float8larger - 674 701 0 _null_ )); +DATA(insert ( 2121 n 0 int4larger - 563 702 0 _null_ )); +DATA(insert ( 2122 n 0 date_larger - 1097 1082 0 _null_ )); +DATA(insert ( 2123 n 0 time_larger - 1112 1083 0 _null_ )); +DATA(insert ( 2124 n 0 timetz_larger - 1554 1266 0 _null_ )); +DATA(insert ( 2125 n 0 cashlarger - 903 790 0 _null_ )); +DATA(insert ( 2126 n 0 timestamp_larger - 2064 1114 0 _null_ )); +DATA(insert ( 2127 n 0 timestamptz_larger - 1324 1184 0 _null_ )); +DATA(insert ( 2128 n 0 interval_larger - 1334 1186 0 _null_ )); +DATA(insert ( 2129 n 0 text_larger - 666 25 0 _null_ )); +DATA(insert ( 2130 n 0 numeric_larger - 1756 1700 0 _null_ )); +DATA(insert ( 2050 n 0 array_larger - 1073 2277 0 _null_ )); +DATA(insert ( 2244 n 0 bpchar_larger - 1060 1042 0 _null_ )); +DATA(insert ( 2797 n 0 tidlarger - 2800 27 0 _null_ )); +DATA(insert ( 3526 n 0 enum_larger - 3519 3500 0 _null_ )); /* min */ -DATA(insert ( 2131 int8smaller - 412 20 0 _null_ )); -DATA(insert ( 2132 int4smaller - 97 23 0 _null_ )); -DATA(insert ( 2133 int2smaller - 95 21 0 _null_ )); -DATA(insert ( 2134 oidsmaller - 609 26 0 _null_ )); -DATA(insert ( 2135 float4smaller - 622 700 0 _null_ )); -DATA(insert ( 2136 float8smaller - 672 701 0 _null_ )); -DATA(insert ( 2137 int4smaller - 562 702 0 _null_ )); -DATA(insert ( 2138 date_smaller - 1095 1082 0 _null_ )); -DATA(insert ( 2139 time_smaller - 1110 1083 0 _null_ )); -DATA(insert ( 2140 timetz_smaller - 1552 1266 0 _null_ )); -DATA(insert ( 2141 cashsmaller - 902 790 0 _null_ )); -DATA(insert ( 2142 timestamp_smaller - 2062 1114 0 _null_ )); -DATA(insert ( 2143 timestamptz_smaller - 1322 1184 0 _null_ )); -DATA(insert ( 2144 interval_smaller - 1332 1186 0 _null_ )); -DATA(insert ( 2145 text_smaller - 664 25 0 _null_ )); -DATA(insert ( 2146 numeric_smaller - 1754 1700 0 _null_ )); -DATA(insert ( 2051 array_smaller - 1072 2277 0 _null_ )); -DATA(insert ( 2245 bpchar_smaller - 1058 1042 0 _null_ )); -DATA(insert ( 2798 tidsmaller - 2799 27 0 _null_ )); -DATA(insert ( 3527 enum_smaller - 3518 3500 0 _null_ )); +DATA(insert ( 2131 n 0 int8smaller - 412 20 0 _null_ )); +DATA(insert ( 2132 n 0 int4smaller - 97 23 0 _null_ )); +DATA(insert ( 2133 n 0 int2smaller - 95 21 0 _null_ )); +DATA(insert ( 2134 n 0 oidsmaller - 609 26 0 _null_ )); +DATA(insert ( 2135 n 0 float4smaller - 622 700 0 _null_ )); +DATA(insert ( 2136 n 0 float8smaller - 672 701 0 _null_ )); +DATA(insert ( 2137 n 0 int4smaller - 562 702 0 _null_ )); +DATA(insert ( 2138 n 0 date_smaller - 1095 1082 0 _null_ )); +DATA(insert ( 2139 n 0 time_smaller - 1110 1083 0 _null_ )); +DATA(insert ( 2140 n 0 timetz_smaller - 1552 1266 0 _null_ )); +DATA(insert ( 2141 n 0 cashsmaller - 902 790 0 _null_ )); +DATA(insert ( 2142 n 0 timestamp_smaller - 2062 1114 0 _null_ )); +DATA(insert ( 2143 n 0 timestamptz_smaller - 1322 1184 0 _null_ )); +DATA(insert ( 2144 n 0 interval_smaller - 1332 1186 0 _null_ )); +DATA(insert ( 2145 n 0 text_smaller - 664 25 0 _null_ )); +DATA(insert ( 2146 n 0 numeric_smaller - 1754 1700 0 _null_ )); +DATA(insert ( 2051 n 0 array_smaller - 1072 2277 0 _null_ )); +DATA(insert ( 2245 n 0 bpchar_smaller - 1058 1042 0 _null_ )); +DATA(insert ( 2798 n 0 tidsmaller - 2799 27 0 _null_ )); +DATA(insert ( 3527 n 0 enum_smaller - 3518 3500 0 _null_ )); /* count */ -DATA(insert ( 2147 int8inc_any - 0 20 0 "0" )); -DATA(insert ( 2803 int8inc - 0 20 0 "0" )); +DATA(insert ( 2147 n 0 int8inc_any - 0 20 0 "0" )); +DATA(insert ( 2803 n 0 int8inc - 0 20 0 "0" )); /* var_pop */ -DATA(insert ( 2718 int8_accum numeric_var_pop 0 2281 128 _null_ )); -DATA(insert ( 2719 int4_accum numeric_var_pop 0 2281 128 _null_ )); -DATA(insert ( 2720 int2_accum numeric_var_pop 0 2281 128 _null_ )); -DATA(insert ( 2721 float4_accum float8_var_pop 0 1022 0 "{0,0,0}" )); -DATA(insert ( 2722 float8_accum float8_var_pop 0 1022 0 "{0,0,0}" )); -DATA(insert ( 2723 numeric_accum numeric_var_pop 0 2281 128 _null_ )); +DATA(insert ( 2718 n 0 int8_accum numeric_var_pop 0 2281 128 _null_ )); +DATA(insert ( 2719 n 0 int4_accum numeric_var_pop 0 2281 128 _null_ )); +DATA(insert ( 2720 n 0 int2_accum numeric_var_pop 0 2281 128 _null_ )); +DATA(insert ( 2721 n 0 float4_accum float8_var_pop 0 1022 0 "{0,0,0}" )); +DATA(insert ( 2722 n 0 float8_accum float8_var_pop 0 1022 0 "{0,0,0}" )); +DATA(insert ( 2723 n 0 numeric_accum numeric_var_pop 0 2281 128 _null_ )); /* var_samp */ -DATA(insert ( 2641 int8_accum numeric_var_samp 0 2281 128 _null_ )); -DATA(insert ( 2642 int4_accum numeric_var_samp 0 2281 128 _null_ )); -DATA(insert ( 2643 int2_accum numeric_var_samp 0 2281 128 _null_ )); -DATA(insert ( 2644 float4_accum float8_var_samp 0 1022 0 "{0,0,0}" )); -DATA(insert ( 2645 float8_accum float8_var_samp 0 1022 0 "{0,0,0}" )); -DATA(insert ( 2646 numeric_accum numeric_var_samp 0 2281 128 _null_ )); +DATA(insert ( 2641 n 0 int8_accum numeric_var_samp 0 2281 128 _null_ )); +DATA(insert ( 2642 n 0 int4_accum numeric_var_samp 0 2281 128 _null_ )); +DATA(insert ( 2643 n 0 int2_accum numeric_var_samp 0 2281 128 _null_ )); +DATA(insert ( 2644 n 0 float4_accum float8_var_samp 0 1022 0 "{0,0,0}" )); +DATA(insert ( 2645 n 0 float8_accum float8_var_samp 0 1022 0 "{0,0,0}" )); +DATA(insert ( 2646 n 0 numeric_accum numeric_var_samp 0 2281 128 _null_ )); /* variance: historical Postgres syntax for var_samp */ -DATA(insert ( 2148 int8_accum numeric_var_samp 0 2281 128 _null_ )); -DATA(insert ( 2149 int4_accum numeric_var_samp 0 2281 128 _null_ )); -DATA(insert ( 2150 int2_accum numeric_var_samp 0 2281 128 _null_ )); -DATA(insert ( 2151 float4_accum float8_var_samp 0 1022 0 "{0,0,0}" )); -DATA(insert ( 2152 float8_accum float8_var_samp 0 1022 0 "{0,0,0}" )); -DATA(insert ( 2153 numeric_accum numeric_var_samp 0 2281 128 _null_ )); +DATA(insert ( 2148 n 0 int8_accum numeric_var_samp 0 2281 128 _null_ )); +DATA(insert ( 2149 n 0 int4_accum numeric_var_samp 0 2281 128 _null_ )); +DATA(insert ( 2150 n 0 int2_accum numeric_var_samp 0 2281 128 _null_ )); +DATA(insert ( 2151 n 0 float4_accum float8_var_samp 0 1022 0 "{0,0,0}" )); +DATA(insert ( 2152 n 0 float8_accum float8_var_samp 0 1022 0 "{0,0,0}" )); +DATA(insert ( 2153 n 0 numeric_accum numeric_var_samp 0 2281 128 _null_ )); /* stddev_pop */ -DATA(insert ( 2724 int8_accum numeric_stddev_pop 0 2281 128 _null_ )); -DATA(insert ( 2725 int4_accum numeric_stddev_pop 0 2281 128 _null_ )); -DATA(insert ( 2726 int2_accum numeric_stddev_pop 0 2281 128 _null_ )); -DATA(insert ( 2727 float4_accum float8_stddev_pop 0 1022 0 "{0,0,0}" )); -DATA(insert ( 2728 float8_accum float8_stddev_pop 0 1022 0 "{0,0,0}" )); -DATA(insert ( 2729 numeric_accum numeric_stddev_pop 0 2281 128 _null_ )); +DATA(insert ( 2724 n 0 int8_accum numeric_stddev_pop 0 2281 128 _null_ )); +DATA(insert ( 2725 n 0 int4_accum numeric_stddev_pop 0 2281 128 _null_ )); +DATA(insert ( 2726 n 0 int2_accum numeric_stddev_pop 0 2281 128 _null_ )); +DATA(insert ( 2727 n 0 float4_accum float8_stddev_pop 0 1022 0 "{0,0,0}" )); +DATA(insert ( 2728 n 0 float8_accum float8_stddev_pop 0 1022 0 "{0,0,0}" )); +DATA(insert ( 2729 n 0 numeric_accum numeric_stddev_pop 0 2281 128 _null_ )); /* stddev_samp */ -DATA(insert ( 2712 int8_accum numeric_stddev_samp 0 2281 128 _null_ )); -DATA(insert ( 2713 int4_accum numeric_stddev_samp 0 2281 128 _null_ )); -DATA(insert ( 2714 int2_accum numeric_stddev_samp 0 2281 128 _null_ )); -DATA(insert ( 2715 float4_accum float8_stddev_samp 0 1022 0 "{0,0,0}" )); -DATA(insert ( 2716 float8_accum float8_stddev_samp 0 1022 0 "{0,0,0}" )); -DATA(insert ( 2717 numeric_accum numeric_stddev_samp 0 2281 128 _null_ )); +DATA(insert ( 2712 n 0 int8_accum numeric_stddev_samp 0 2281 128 _null_ )); +DATA(insert ( 2713 n 0 int4_accum numeric_stddev_samp 0 2281 128 _null_ )); +DATA(insert ( 2714 n 0 int2_accum numeric_stddev_samp 0 2281 128 _null_ )); +DATA(insert ( 2715 n 0 float4_accum float8_stddev_samp 0 1022 0 "{0,0,0}" )); +DATA(insert ( 2716 n 0 float8_accum float8_stddev_samp 0 1022 0 "{0,0,0}" )); +DATA(insert ( 2717 n 0 numeric_accum numeric_stddev_samp 0 2281 128 _null_ )); /* stddev: historical Postgres syntax for stddev_samp */ -DATA(insert ( 2154 int8_accum numeric_stddev_samp 0 2281 128 _null_ )); -DATA(insert ( 2155 int4_accum numeric_stddev_samp 0 2281 128 _null_ )); -DATA(insert ( 2156 int2_accum numeric_stddev_samp 0 2281 128 _null_ )); -DATA(insert ( 2157 float4_accum float8_stddev_samp 0 1022 0 "{0,0,0}" )); -DATA(insert ( 2158 float8_accum float8_stddev_samp 0 1022 0 "{0,0,0}" )); -DATA(insert ( 2159 numeric_accum numeric_stddev_samp 0 2281 128 _null_ )); +DATA(insert ( 2154 n 0 int8_accum numeric_stddev_samp 0 2281 128 _null_ )); +DATA(insert ( 2155 n 0 int4_accum numeric_stddev_samp 0 2281 128 _null_ )); +DATA(insert ( 2156 n 0 int2_accum numeric_stddev_samp 0 2281 128 _null_ )); +DATA(insert ( 2157 n 0 float4_accum float8_stddev_samp 0 1022 0 "{0,0,0}" )); +DATA(insert ( 2158 n 0 float8_accum float8_stddev_samp 0 1022 0 "{0,0,0}" )); +DATA(insert ( 2159 n 0 numeric_accum numeric_stddev_samp 0 2281 128 _null_ )); /* SQL2003 binary regression aggregates */ -DATA(insert ( 2818 int8inc_float8_float8 - 0 20 0 "0" )); -DATA(insert ( 2819 float8_regr_accum float8_regr_sxx 0 1022 0 "{0,0,0,0,0,0}" )); -DATA(insert ( 2820 float8_regr_accum float8_regr_syy 0 1022 0 "{0,0,0,0,0,0}" )); -DATA(insert ( 2821 float8_regr_accum float8_regr_sxy 0 1022 0 "{0,0,0,0,0,0}" )); -DATA(insert ( 2822 float8_regr_accum float8_regr_avgx 0 1022 0 "{0,0,0,0,0,0}" )); -DATA(insert ( 2823 float8_regr_accum float8_regr_avgy 0 1022 0 "{0,0,0,0,0,0}" )); -DATA(insert ( 2824 float8_regr_accum float8_regr_r2 0 1022 0 "{0,0,0,0,0,0}" )); -DATA(insert ( 2825 float8_regr_accum float8_regr_slope 0 1022 0 "{0,0,0,0,0,0}" )); -DATA(insert ( 2826 float8_regr_accum float8_regr_intercept 0 1022 0 "{0,0,0,0,0,0}" )); -DATA(insert ( 2827 float8_regr_accum float8_covar_pop 0 1022 0 "{0,0,0,0,0,0}" )); -DATA(insert ( 2828 float8_regr_accum float8_covar_samp 0 1022 0 "{0,0,0,0,0,0}" )); -DATA(insert ( 2829 float8_regr_accum float8_corr 0 1022 0 "{0,0,0,0,0,0}" )); +DATA(insert ( 2818 n 0 int8inc_float8_float8 - 0 20 0 "0" )); +DATA(insert ( 2819 n 0 float8_regr_accum float8_regr_sxx 0 1022 0 "{0,0,0,0,0,0}" )); +DATA(insert ( 2820 n 0 float8_regr_accum float8_regr_syy 0 1022 0 "{0,0,0,0,0,0}" )); +DATA(insert ( 2821 n 0 float8_regr_accum float8_regr_sxy 0 1022 0 "{0,0,0,0,0,0}" )); +DATA(insert ( 2822 n 0 float8_regr_accum float8_regr_avgx 0 1022 0 "{0,0,0,0,0,0}" )); +DATA(insert ( 2823 n 0 float8_regr_accum float8_regr_avgy 0 1022 0 "{0,0,0,0,0,0}" )); +DATA(insert ( 2824 n 0 float8_regr_accum float8_regr_r2 0 1022 0 "{0,0,0,0,0,0}" )); +DATA(insert ( 2825 n 0 float8_regr_accum float8_regr_slope 0 1022 0 "{0,0,0,0,0,0}" )); +DATA(insert ( 2826 n 0 float8_regr_accum float8_regr_intercept 0 1022 0 "{0,0,0,0,0,0}" )); +DATA(insert ( 2827 n 0 float8_regr_accum float8_covar_pop 0 1022 0 "{0,0,0,0,0,0}" )); +DATA(insert ( 2828 n 0 float8_regr_accum float8_covar_samp 0 1022 0 "{0,0,0,0,0,0}" )); +DATA(insert ( 2829 n 0 float8_regr_accum float8_corr 0 1022 0 "{0,0,0,0,0,0}" )); /* boolean-and and boolean-or */ -DATA(insert ( 2517 booland_statefunc - 58 16 0 _null_ )); -DATA(insert ( 2518 boolor_statefunc - 59 16 0 _null_ )); -DATA(insert ( 2519 booland_statefunc - 58 16 0 _null_ )); +DATA(insert ( 2517 n 0 booland_statefunc - 58 16 0 _null_ )); +DATA(insert ( 2518 n 0 boolor_statefunc - 59 16 0 _null_ )); +DATA(insert ( 2519 n 0 booland_statefunc - 58 16 0 _null_ )); /* bitwise integer */ -DATA(insert ( 2236 int2and - 0 21 0 _null_ )); -DATA(insert ( 2237 int2or - 0 21 0 _null_ )); -DATA(insert ( 2238 int4and - 0 23 0 _null_ )); -DATA(insert ( 2239 int4or - 0 23 0 _null_ )); -DATA(insert ( 2240 int8and - 0 20 0 _null_ )); -DATA(insert ( 2241 int8or - 0 20 0 _null_ )); -DATA(insert ( 2242 bitand - 0 1560 0 _null_ )); -DATA(insert ( 2243 bitor - 0 1560 0 _null_ )); +DATA(insert ( 2236 n 0 int2and - 0 21 0 _null_ )); +DATA(insert ( 2237 n 0 int2or - 0 21 0 _null_ )); +DATA(insert ( 2238 n 0 int4and - 0 23 0 _null_ )); +DATA(insert ( 2239 n 0 int4or - 0 23 0 _null_ )); +DATA(insert ( 2240 n 0 int8and - 0 20 0 _null_ )); +DATA(insert ( 2241 n 0 int8or - 0 20 0 _null_ )); +DATA(insert ( 2242 n 0 bitand - 0 1560 0 _null_ )); +DATA(insert ( 2243 n 0 bitor - 0 1560 0 _null_ )); /* xml */ -DATA(insert ( 2901 xmlconcat2 - 0 142 0 _null_ )); +DATA(insert ( 2901 n 0 xmlconcat2 - 0 142 0 _null_ )); /* array */ -DATA(insert ( 2335 array_agg_transfn array_agg_finalfn 0 2281 0 _null_ )); +DATA(insert ( 2335 n 0 array_agg_transfn array_agg_finalfn 0 2281 0 _null_ )); /* text */ -DATA(insert ( 3538 string_agg_transfn string_agg_finalfn 0 2281 0 _null_ )); +DATA(insert ( 3538 n 0 string_agg_transfn string_agg_finalfn 0 2281 0 _null_ )); /* bytea */ -DATA(insert ( 3545 bytea_string_agg_transfn bytea_string_agg_finalfn 0 2281 0 _null_ )); +DATA(insert ( 3545 n 0 bytea_string_agg_transfn bytea_string_agg_finalfn 0 2281 0 _null_ )); /* json */ -DATA(insert ( 3175 json_agg_transfn json_agg_finalfn 0 2281 0 _null_ )); +DATA(insert ( 3175 n 0 json_agg_transfn json_agg_finalfn 0 2281 0 _null_ )); + +/* ordered-set and hypothetical-set aggregates */ +DATA(insert ( 3972 o 1 ordered_set_transition percentile_disc_final 0 2281 0 _null_ )); +DATA(insert ( 3974 o 1 ordered_set_transition percentile_cont_float8_final 0 2281 0 _null_ )); +DATA(insert ( 3976 o 1 ordered_set_transition percentile_cont_interval_final 0 2281 0 _null_ )); +DATA(insert ( 3978 o 1 ordered_set_transition percentile_disc_multi_final 0 2281 0 _null_ )); +DATA(insert ( 3980 o 1 ordered_set_transition percentile_cont_float8_multi_final 0 2281 0 _null_ )); +DATA(insert ( 3982 o 1 ordered_set_transition percentile_cont_interval_multi_final 0 2281 0 _null_ )); +DATA(insert ( 3984 o 0 ordered_set_transition mode_final 0 2281 0 _null_ )); +DATA(insert ( 3986 h 1 ordered_set_transition_multi rank_final 0 2281 0 _null_ )); +DATA(insert ( 3988 h 1 ordered_set_transition_multi percent_rank_final 0 2281 0 _null_ )); +DATA(insert ( 3990 h 1 ordered_set_transition_multi cume_dist_final 0 2281 0 _null_ )); +DATA(insert ( 3992 h 1 ordered_set_transition_multi dense_rank_final 0 2281 0 _null_ )); + /* * prototypes for functions in pg_aggregate.c */ extern Oid AggregateCreate(const char *aggName, Oid aggNamespace, + char aggKind, int numArgs, + int numDirectArgs, oidvector *parameterTypes, Datum allParameterTypes, Datum parameterModes, Datum parameterNames, List *parameterDefaults, + Oid variadicArgType, List *aggtransfnName, List *aggfinalfnName, List *aggsortopName, diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h index 78efaa5f23e..a49cfdbdded 100644 --- a/src/include/catalog/pg_operator.h +++ b/src/include/catalog/pg_operator.h @@ -130,6 +130,7 @@ DATA(insert OID = 96 ( "=" PGNSP PGUID b t t 23 23 16 96 518 int4eq eqsel e DESCR("equal"); DATA(insert OID = 97 ( "<" PGNSP PGUID b f f 23 23 16 521 525 int4lt scalarltsel scalarltjoinsel )); DESCR("less than"); +#define Int4LessOperator 97 DATA(insert OID = 98 ( "=" PGNSP PGUID b t t 25 25 16 98 531 texteq eqsel eqjoinsel )); DESCR("equal"); #define TextEqualOperator 98 diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 0117500a8a6..05d7ba54072 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -4756,10 +4756,65 @@ DESCR("SP-GiST support for quad tree over range"); DATA(insert OID = 3473 ( spg_range_quad_leaf_consistent PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 16 "2281 2281" _null_ _null_ _null_ _null_ spg_range_quad_leaf_consistent _null_ _null_ _null_ )); DESCR("SP-GiST support for quad tree over range"); - /* event triggers */ DATA(insert OID = 3566 ( pg_event_trigger_dropped_objects PGNSP PGUID 12 10 100 0 0 f f f f t t s 0 0 2249 "" "{26,26,23,25,25,25,25}" "{o,o,o,o,o,o,o}" "{classid, objid, objsubid, object_type, schema_name, object_name, object_identity}" _null_ pg_event_trigger_dropped_objects _null_ _null_ _null_ )); DESCR("list objects dropped by the current command"); + +/* generic transition functions for ordered-set aggregates */ +DATA(insert OID = 3970 ( ordered_set_transition PGNSP PGUID 12 1 0 0 0 f f f f f f i 2 0 2281 "2281 2276" _null_ _null_ _null_ _null_ ordered_set_transition _null_ _null_ _null_ )); +DESCR("aggregate transition function"); +DATA(insert OID = 3971 ( ordered_set_transition_multi PGNSP PGUID 12 1 0 2276 0 f f f f f f i 2 0 2281 "2281 2276" "{2281,2276}" "{i,v}" _null_ _null_ ordered_set_transition_multi _null_ _null_ _null_ )); +DESCR("aggregate transition function"); + +/* inverse distribution aggregates (and their support functions) */ +DATA(insert OID = 3972 ( percentile_disc PGNSP PGUID 12 1 0 0 0 t f f f f f i 2 0 2283 "701 2283" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); +DESCR("discrete percentile"); +DATA(insert OID = 3973 ( percentile_disc_final PGNSP PGUID 12 1 0 0 0 f f f f f f i 3 0 2283 "2281 701 2283" _null_ _null_ _null_ _null_ percentile_disc_final _null_ _null_ _null_ )); +DESCR("aggregate final function"); +DATA(insert OID = 3974 ( percentile_cont PGNSP PGUID 12 1 0 0 0 t f f f f f i 2 0 701 "701 701" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); +DESCR("continuous distribution percentile"); +DATA(insert OID = 3975 ( percentile_cont_float8_final PGNSP PGUID 12 1 0 0 0 f f f f f f i 3 0 701 "2281 701 701" _null_ _null_ _null_ _null_ percentile_cont_float8_final _null_ _null_ _null_ )); +DESCR("aggregate final function"); +DATA(insert OID = 3976 ( percentile_cont PGNSP PGUID 12 1 0 0 0 t f f f f f i 2 0 1186 "701 1186" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); +DESCR("continuous distribution percentile"); +DATA(insert OID = 3977 ( percentile_cont_interval_final PGNSP PGUID 12 1 0 0 0 f f f f f f i 3 0 1186 "2281 701 1186" _null_ _null_ _null_ _null_ percentile_cont_interval_final _null_ _null_ _null_ )); +DESCR("aggregate final function"); +DATA(insert OID = 3978 ( percentile_disc PGNSP PGUID 12 1 0 0 0 t f f f f f i 2 0 2277 "1022 2283" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); +DESCR("multiple discrete percentiles"); +DATA(insert OID = 3979 ( percentile_disc_multi_final PGNSP PGUID 12 1 0 0 0 f f f f f f i 3 0 2277 "2281 1022 2283" _null_ _null_ _null_ _null_ percentile_disc_multi_final _null_ _null_ _null_ )); +DESCR("aggregate final function"); +DATA(insert OID = 3980 ( percentile_cont PGNSP PGUID 12 1 0 0 0 t f f f f f i 2 0 1022 "1022 701" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); +DESCR("multiple continuous percentiles"); +DATA(insert OID = 3981 ( percentile_cont_float8_multi_final PGNSP PGUID 12 1 0 0 0 f f f f f f i 3 0 1022 "2281 1022 701" _null_ _null_ _null_ _null_ percentile_cont_float8_multi_final _null_ _null_ _null_ )); +DESCR("aggregate final function"); +DATA(insert OID = 3982 ( percentile_cont PGNSP PGUID 12 1 0 0 0 t f f f f f i 2 0 1187 "1022 1186" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); +DESCR("multiple continuous percentiles"); +DATA(insert OID = 3983 ( percentile_cont_interval_multi_final PGNSP PGUID 12 1 0 0 0 f f f f f f i 3 0 1187 "2281 1022 1186" _null_ _null_ _null_ _null_ percentile_cont_interval_multi_final _null_ _null_ _null_ )); +DESCR("aggregate final function"); +DATA(insert OID = 3984 ( mode PGNSP PGUID 12 1 0 0 0 t f f f f f i 1 0 2283 "2283" _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); +DESCR("most common value"); +DATA(insert OID = 3985 ( mode_final PGNSP PGUID 12 1 0 0 0 f f f f f f i 2 0 2283 "2281 2283" _null_ _null_ _null_ _null_ mode_final _null_ _null_ _null_ )); +DESCR("aggregate final function"); + +/* hypothetical-set aggregates (and their support functions) */ +DATA(insert OID = 3986 ( rank PGNSP PGUID 12 1 0 2276 0 t f f f f f i 1 0 20 "2276" "{2276}" "{v}" _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); +DESCR("rank of hypothetical row"); +DATA(insert OID = 3987 ( rank_final PGNSP PGUID 12 1 0 2276 0 f f f f f f i 2 0 20 "2281 2276" "{2281,2276}" "{i,v}" _null_ _null_ hypothetical_rank_final _null_ _null_ _null_ )); +DESCR("aggregate final function"); +DATA(insert OID = 3988 ( percent_rank PGNSP PGUID 12 1 0 2276 0 t f f f f f i 1 0 701 "2276" "{2276}" "{v}" _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); +DESCR("fractional rank of hypothetical row"); +DATA(insert OID = 3989 ( percent_rank_final PGNSP PGUID 12 1 0 2276 0 f f f f f f i 2 0 701 "2281 2276" "{2281,2276}" "{i,v}" _null_ _null_ hypothetical_percent_rank_final _null_ _null_ _null_ )); +DESCR("aggregate final function"); +DATA(insert OID = 3990 ( cume_dist PGNSP PGUID 12 1 0 2276 0 t f f f f f i 1 0 701 "2276" "{2276}" "{v}" _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); +DESCR("cumulative distribution of hypothetical row"); +DATA(insert OID = 3991 ( cume_dist_final PGNSP PGUID 12 1 0 2276 0 f f f f f f i 2 0 701 "2281 2276" "{2281,2276}" "{i,v}" _null_ _null_ hypothetical_cume_dist_final _null_ _null_ _null_ )); +DESCR("aggregate final function"); +DATA(insert OID = 3992 ( dense_rank PGNSP PGUID 12 1 0 2276 0 t f f f f f i 1 0 20 "2276" "{2276}" "{v}" _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); +DESCR("rank of hypothetical row without gaps"); +DATA(insert OID = 3993 ( dense_rank_final PGNSP PGUID 12 1 0 2276 0 f f f f f f i 2 0 20 "2281 2276" "{2281,2276}" "{i,v}" _null_ _null_ hypothetical_dense_rank_final _null_ _null_ _null_ )); +DESCR("aggregate final function"); + + /* * Symbolic values for provolatile column: these indicate whether the result * of a function is dependent *only* on the values of its explicit arguments, diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index f8ceb5da2eb..16817b68e77 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -63,6 +63,7 @@ extern void interpret_function_parameter_list(List *parameters, ArrayType **parameterModes, ArrayType **parameterNames, List **parameterDefaults, + Oid *variadicArgType, Oid *requiredResultType); /* commands/operatorcmds.c */ diff --git a/src/include/fmgr.h b/src/include/fmgr.h index 1f72e1bd48f..2b41746b56b 100644 --- a/src/include/fmgr.h +++ b/src/include/fmgr.h @@ -18,8 +18,12 @@ #ifndef FMGR_H #define FMGR_H -/* We don't want to include primnodes.h here, so make a stub reference */ +/* We don't want to include primnodes.h here, so make some stub references */ typedef struct Node *fmNodePtr; +typedef struct Aggref *fmAggrefPtr; + +/* Likewise, avoid including execnodes.h here */ +typedef struct ExprContext *fmExprContextPtr; /* Likewise, avoid including stringinfo.h here */ typedef struct StringInfoData *fmStringInfo; @@ -640,8 +644,8 @@ extern void **find_rendezvous_variable(const char *varName); /* * Support for aggregate functions * - * This is actually in executor/nodeAgg.c, but we declare it here since the - * whole point is for callers of it to not be overly friendly with nodeAgg. + * These are actually in executor/nodeAgg.c, but we declare them here since + * the whole point is for callers to not be overly friendly with nodeAgg. */ /* AggCheckCallContext can return one of the following codes, or 0: */ @@ -650,6 +654,9 @@ extern void **find_rendezvous_variable(const char *varName); extern int AggCheckCallContext(FunctionCallInfo fcinfo, MemoryContext *aggcontext); +extern fmAggrefPtr AggGetAggref(FunctionCallInfo fcinfo); +extern fmExprContextPtr AggGetPerTupleEContext(FunctionCallInfo fcinfo); +extern fmExprContextPtr AggGetPerAggEContext(FunctionCallInfo fcinfo); /* * We allow plugin modules to hook function entry/exit. This is intended diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 5a4034729cf..2a7b36e148f 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -141,7 +141,7 @@ typedef struct ExprContext /* Link to containing EState (NULL if a standalone ExprContext) */ struct EState *ecxt_estate; - /* Functions to call back when ExprContext is shut down */ + /* Functions to call back when ExprContext is shut down or rescanned */ ExprContext_CB *ecxt_callbacks; } ExprContext; @@ -587,8 +587,9 @@ typedef struct WholeRowVarExprState typedef struct AggrefExprState { ExprState xprstate; - List *args; /* states of argument expressions */ - ExprState *aggfilter; /* FILTER expression */ + List *aggdirectargs; /* states of direct-argument expressions */ + List *args; /* states of aggregated-argument expressions */ + ExprState *aggfilter; /* state of FILTER expression, if any */ int aggno; /* ID number for agg within its plan node */ } AggrefExprState; @@ -1704,6 +1705,7 @@ typedef struct AggState AggStatePerAgg peragg; /* per-Aggref information */ MemoryContext aggcontext; /* memory context for long-lived data */ ExprContext *tmpcontext; /* econtext for input expressions */ + AggStatePerAgg curperagg; /* identifies currently active aggregate */ bool agg_done; /* indicates completion of Agg scan */ /* these fields are used in AGG_PLAIN and AGG_SORTED modes: */ AggStatePerGroup pergroup; /* per-Aggref-per-group working state */ diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 6a5a8c5f2d7..e89d93034fb 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -281,7 +281,8 @@ typedef struct CollateClause /* * FuncCall - a function or aggregate invocation * - * agg_order (if not NIL) indicates we saw 'foo(... ORDER BY ...)'. + * agg_order (if not NIL) indicates we saw 'foo(... ORDER BY ...)', or if + * agg_within_group is true, it was 'foo(...) WITHIN GROUP (ORDER BY ...)'. * agg_star indicates we saw a 'foo(*)' construct, while agg_distinct * indicates we saw 'foo(DISTINCT ...)'. In any of these cases, the * construct *must* be an aggregate call. Otherwise, it might be either an @@ -298,6 +299,7 @@ typedef struct FuncCall List *args; /* the arguments (list of exprs) */ List *agg_order; /* ORDER BY (list of SortBy) */ Node *agg_filter; /* FILTER clause, if any */ + bool agg_within_group; /* ORDER BY appeared in WITHIN GROUP */ bool agg_star; /* argument was really '*' */ bool agg_distinct; /* arguments were labeled DISTINCT */ bool func_variadic; /* last argument was labeled VARIADIC */ diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 791853730b3..16144d45764 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -225,8 +225,9 @@ typedef struct Param /* * Aggref * - * The aggregate's args list is a targetlist, ie, a list of TargetEntry nodes - * (before Postgres 9.0 it was just bare expressions). The non-resjunk TLEs + * The aggregate's args list is a targetlist, ie, a list of TargetEntry nodes. + * + * For a normal (non-ordered-set) aggregate, the non-resjunk TargetEntries * represent the aggregate's regular arguments (if any) and resjunk TLEs can * be added at the end to represent ORDER BY expressions that are not also * arguments. As in a top-level Query, the TLEs can be marked with @@ -236,6 +237,12 @@ typedef struct Param * they are passed to the transition function. The grammar only allows a * simple "DISTINCT" specifier for the arguments, but we use the full * query-level representation to allow more code sharing. + * + * For an ordered-set aggregate, the args list represents the WITHIN GROUP + * (aggregated) arguments, all of which will be listed in the aggorder list. + * DISTINCT is not supported in this case, so aggdistinct will be NIL. + * The direct arguments appear in aggdirectargs (as a list of plain + * expressions, not TargetEntry nodes). */ typedef struct Aggref { @@ -244,12 +251,14 @@ typedef struct Aggref Oid aggtype; /* type Oid of result of the aggregate */ Oid aggcollid; /* OID of collation of result */ Oid inputcollid; /* OID of collation that function should use */ - List *args; /* arguments and sort expressions */ + List *aggdirectargs; /* direct arguments, if an ordered-set agg */ + List *args; /* aggregated arguments and sort expressions */ List *aggorder; /* ORDER BY (list of SortGroupClause) */ List *aggdistinct; /* DISTINCT (list of SortGroupClause) */ - Expr *aggfilter; /* FILTER expression */ + Expr *aggfilter; /* FILTER expression, if any */ bool aggstar; /* TRUE if argument list was really '*' */ bool aggvariadic; /* TRUE if VARIADIC was used in call */ + char aggkind; /* aggregate kind (see pg_aggregate.h) */ Index agglevelsup; /* > 0 if agg belongs to outer query */ int location; /* token location, or -1 if unknown */ } Aggref; @@ -265,7 +274,7 @@ typedef struct WindowFunc Oid wincollid; /* OID of collation of result */ Oid inputcollid; /* OID of collation that function should use */ List *args; /* arguments to the window function */ - Expr *aggfilter; /* FILTER expression */ + Expr *aggfilter; /* FILTER expression, if any */ Index winref; /* index of associated WindowClause */ bool winstar; /* TRUE if argument list was really '*' */ bool winagg; /* is function a simple aggregate? */ diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index 6d7b5948cd9..a9219e0be95 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -47,15 +47,16 @@ typedef struct QualCost /* * Costing aggregate function execution requires these statistics about - * the aggregates to be executed by a given Agg node. Note that transCost - * includes the execution costs of the aggregates' input expressions. + * the aggregates to be executed by a given Agg node. Note that the costs + * include the execution costs of the aggregates' argument expressions as + * well as the aggregate functions themselves. */ typedef struct AggClauseCosts { int numAggs; /* total number of aggregate functions */ - int numOrderedAggs; /* number that use DISTINCT or ORDER BY */ + int numOrderedAggs; /* number w/ DISTINCT/ORDER BY/WITHIN GROUP */ QualCost transCost; /* total per-input-row execution costs */ - Cost finalCost; /* total costs of agg final functions */ + Cost finalCost; /* total per-aggregated-row costs */ Size transitionSpace; /* space for pass-by-ref transition data */ } AggClauseCosts; diff --git a/src/include/parser/kwlist.h b/src/include/parser/kwlist.h index 8bd34d6e8f0..ab2715629c0 100644 --- a/src/include/parser/kwlist.h +++ b/src/include/parser/kwlist.h @@ -412,6 +412,7 @@ PG_KEYWORD("where", WHERE, RESERVED_KEYWORD) PG_KEYWORD("whitespace", WHITESPACE_P, UNRESERVED_KEYWORD) PG_KEYWORD("window", WINDOW, RESERVED_KEYWORD) PG_KEYWORD("with", WITH, RESERVED_KEYWORD) +PG_KEYWORD("within", WITHIN, UNRESERVED_KEYWORD) PG_KEYWORD("without", WITHOUT, UNRESERVED_KEYWORD) PG_KEYWORD("work", WORK, UNRESERVED_KEYWORD) PG_KEYWORD("wrapper", WRAPPER, UNRESERVED_KEYWORD) diff --git a/src/include/parser/parse_agg.h b/src/include/parser/parse_agg.h index b6d9dd37b04..487d5cc8894 100644 --- a/src/include/parser/parse_agg.h +++ b/src/include/parser/parse_agg.h @@ -23,8 +23,17 @@ extern void transformWindowFuncCall(ParseState *pstate, WindowFunc *wfunc, extern void parseCheckAggregates(ParseState *pstate, Query *qry); +extern int get_aggregate_argtypes(Aggref *aggref, Oid *inputTypes); + +extern Oid resolve_aggregate_transtype(Oid aggfuncid, + Oid aggtranstype, + Oid *inputTypes, + int numArguments); + extern void build_aggregate_fnexprs(Oid *agg_input_types, int agg_num_inputs, + int agg_num_direct_inputs, + bool agg_ordered_set, bool agg_variadic, Oid agg_state_type, Oid agg_result_type, diff --git a/src/include/parser/parse_clause.h b/src/include/parser/parse_clause.h index 9bdb03347ad..18f338d3404 100644 --- a/src/include/parser/parse_clause.h +++ b/src/include/parser/parse_clause.h @@ -42,6 +42,9 @@ extern List *transformDistinctClause(ParseState *pstate, extern List *transformDistinctOnClause(ParseState *pstate, List *distinctlist, List **targetlist, List *sortClause); +extern List *addTargetToSortList(ParseState *pstate, TargetEntry *tle, + List *sortlist, List *targetlist, SortBy *sortby, + bool resolveUnknown); extern Index assignSortGroupRef(TargetEntry *tle, List *tlist); extern bool targetIsInSortList(TargetEntry *tle, Oid sortop, List *sortList); diff --git a/src/include/parser/parse_func.h b/src/include/parser/parse_func.h index d33eef3482c..f28fd9974c3 100644 --- a/src/include/parser/parse_func.h +++ b/src/include/parser/parse_func.h @@ -43,9 +43,7 @@ typedef enum extern Node *ParseFuncOrColumn(ParseState *pstate, List *funcname, List *fargs, - List *agg_order, Expr *agg_filter, - bool agg_star, bool agg_distinct, bool func_variadic, - WindowDef *over, bool is_column, int location); + FuncCall *fn, int location); extern FuncDetailCode func_get_detail(List *funcname, List *fargs, List *fargnames, diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 1bfd145da50..4ad900ed2cf 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -515,6 +515,21 @@ extern Datum oidvectorgt(PG_FUNCTION_ARGS); extern oidvector *buildoidvector(const Oid *oids, int n); extern Oid oidparse(Node *node); +/* orderedsetaggs.c */ +extern Datum ordered_set_transition(PG_FUNCTION_ARGS); +extern Datum ordered_set_transition_multi(PG_FUNCTION_ARGS); +extern Datum percentile_disc_final(PG_FUNCTION_ARGS); +extern Datum percentile_cont_float8_final(PG_FUNCTION_ARGS); +extern Datum percentile_cont_interval_final(PG_FUNCTION_ARGS); +extern Datum percentile_disc_multi_final(PG_FUNCTION_ARGS); +extern Datum percentile_cont_float8_multi_final(PG_FUNCTION_ARGS); +extern Datum percentile_cont_interval_multi_final(PG_FUNCTION_ARGS); +extern Datum mode_final(PG_FUNCTION_ARGS); +extern Datum hypothetical_rank_final(PG_FUNCTION_ARGS); +extern Datum hypothetical_percent_rank_final(PG_FUNCTION_ARGS); +extern Datum hypothetical_cume_dist_final(PG_FUNCTION_ARGS); +extern Datum hypothetical_dense_rank_final(PG_FUNCTION_ARGS); + /* pseudotypes.c */ extern Datum cstring_in(PG_FUNCTION_ARGS); extern Datum cstring_out(PG_FUNCTION_ARGS); diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h index 49f459ad580..dfba74c47d3 100644 --- a/src/include/utils/lsyscache.h +++ b/src/include/utils/lsyscache.h @@ -90,6 +90,7 @@ extern Oid get_func_namespace(Oid funcid); extern Oid get_func_rettype(Oid funcid); extern int get_func_nargs(Oid funcid); extern Oid get_func_signature(Oid funcid, Oid **argtypes, int *nargs); +extern Oid get_func_variadictype(Oid funcid); extern bool get_func_retset(Oid funcid); extern bool func_strict(Oid funcid); extern char func_volatile(Oid funcid); diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h index 25fa6de18e1..5f87254881e 100644 --- a/src/include/utils/tuplesort.h +++ b/src/include/utils/tuplesort.h @@ -99,6 +99,9 @@ extern IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward, extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward, Datum *val, bool *isNull); +extern bool tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples, + bool forward); + extern void tuplesort_end(Tuplesortstate *state); extern void tuplesort_get_stats(Tuplesortstate *state, diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 1a0ca5c5f3c..58df85470a6 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1311,6 +1311,262 @@ select aggfns(distinct a,b,c order by a,c using ~<~,b) filter (where a > 1) {"(2,2,bar)","(3,1,baz)"} (1 row) +-- ordered-set aggregates +select p, percentile_cont(p) within group (order by x::float8) +from generate_series(1,5) x, + (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) +group by p order by p; + p | percentile_cont +------+----------------- + 0 | 1 + 0.1 | 1.4 + 0.25 | 2 + 0.4 | 2.6 + 0.5 | 3 + 0.6 | 3.4 + 0.75 | 4 + 0.9 | 4.6 + 1 | 5 +(9 rows) + +select p, percentile_cont(p order by p) within group (order by x) -- error +from generate_series(1,5) x, + (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) +group by p order by p; +ERROR: cannot use multiple ORDER BY clauses with WITHIN GROUP +LINE 1: select p, percentile_cont(p order by p) within group (order ... + ^ +select p, sum() within group (order by x::float8) -- error +from generate_series(1,5) x, + (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) +group by p order by p; +ERROR: sum is not an ordered-set aggregate, so it cannot have WITHIN GROUP +LINE 1: select p, sum() within group (order by x::float8) + ^ +select p, percentile_cont(p,p) -- error +from generate_series(1,5) x, + (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) +group by p order by p; +ERROR: WITHIN GROUP is required for ordered-set aggregate percentile_cont +LINE 1: select p, percentile_cont(p,p) + ^ +select percentile_cont(0.5) within group (order by b) from aggtest; + percentile_cont +------------------ + 53.4485001564026 +(1 row) + +select percentile_cont(0.5) within group (order by b), sum(b) from aggtest; + percentile_cont | sum +------------------+--------- + 53.4485001564026 | 431.773 +(1 row) + +select percentile_cont(0.5) within group (order by thousand) from tenk1; + percentile_cont +----------------- + 499.5 +(1 row) + +select percentile_disc(0.5) within group (order by thousand) from tenk1; + percentile_disc +----------------- + 499 +(1 row) + +select rank(3) within group (order by x) +from (values (1),(1),(2),(2),(3),(3),(4)) v(x); + rank +------ + 5 +(1 row) + +select cume_dist(3) within group (order by x) +from (values (1),(1),(2),(2),(3),(3),(4)) v(x); + cume_dist +----------- + 0.875 +(1 row) + +select percent_rank(3) within group (order by x) +from (values (1),(1),(2),(2),(3),(3),(4),(5)) v(x); + percent_rank +-------------- + 0.5 +(1 row) + +select dense_rank(3) within group (order by x) +from (values (1),(1),(2),(2),(3),(3),(4)) v(x); + dense_rank +------------ + 3 +(1 row) + +select percentile_disc(array[0,0.1,0.25,0.5,0.75,0.9,1]) within group (order by thousand) +from tenk1; + percentile_disc +---------------------------- + {0,99,249,499,749,899,999} +(1 row) + +select percentile_cont(array[0,0.25,0.5,0.75,1]) within group (order by thousand) +from tenk1; + percentile_cont +----------------------------- + {0,249.75,499.5,749.25,999} +(1 row) + +select percentile_disc(array[[null,1,0.5],[0.75,0.25,null]]) within group (order by thousand) +from tenk1; + percentile_disc +--------------------------------- + {{NULL,999,499},{749,249,NULL}} +(1 row) + +select percentile_cont(array[0,1,0.25,0.75,0.5,1]) within group (order by x) +from generate_series(1,6) x; + percentile_cont +----------------------- + {1,6,2.25,4.75,3.5,6} +(1 row) + +select ten, mode() within group (order by string4) from tenk1 group by ten; + ten | mode +-----+-------- + 0 | HHHHxx + 1 | OOOOxx + 2 | VVVVxx + 3 | OOOOxx + 4 | HHHHxx + 5 | HHHHxx + 6 | OOOOxx + 7 | AAAAxx + 8 | VVVVxx + 9 | VVVVxx +(10 rows) + +select percentile_disc(array[0.25,0.5,0.75]) within group (order by x) +from unnest('{fred,jim,fred,jack,jill,fred,jill,jim,jim,sheila,jim,sheila}'::text[]) u(x); + percentile_disc +----------------- + {fred,jill,jim} +(1 row) + +-- check collation propagates up in suitable cases: +select pg_collation_for(percentile_disc(1) within group (order by x collate "POSIX")) + from (values ('fred'),('jim')) v(x); + pg_collation_for +------------------ + "POSIX" +(1 row) + +-- ordered-set aggs created with CREATE AGGREGATE +select test_rank(3) within group (order by x) +from (values (1),(1),(2),(2),(3),(3),(4)) v(x); + test_rank +----------- + 5 +(1 row) + +select test_percentile_disc(0.5) within group (order by thousand) from tenk1; + test_percentile_disc +---------------------- + 499 +(1 row) + +-- ordered-set aggs can't use ungrouped vars in direct args: +select rank(x) within group (order by x) from generate_series(1,5) x; +ERROR: column "x.x" must appear in the GROUP BY clause or be used in an aggregate function +LINE 1: select rank(x) within group (order by x) from generate_serie... + ^ +DETAIL: Direct arguments of an ordered-set aggregate must use only grouped columns. +-- outer-level agg can't use a grouped arg of a lower level, either: +select array(select percentile_disc(a) within group (order by x) + from (values (0.3),(0.7)) v(a) group by a) + from generate_series(1,5) g(x); +ERROR: outer-level aggregate cannot contain a lower-level variable in its direct arguments +LINE 1: select array(select percentile_disc(a) within group (order b... + ^ +-- agg in the direct args is a grouping violation, too: +select rank(sum(x)) within group (order by x) from generate_series(1,5) x; +ERROR: aggregate function calls cannot be nested +LINE 1: select rank(sum(x)) within group (order by x) from generate_... + ^ +-- hypothetical-set type unification and argument-count failures: +select rank(3) within group (order by x) from (values ('fred'),('jim')) v(x); +ERROR: WITHIN GROUP types text and integer cannot be matched +LINE 1: select rank(3) within group (order by x) from (values ('fred... + ^ +select rank(3) within group (order by stringu1,stringu2) from tenk1; +ERROR: function rank(integer, name, name) does not exist +LINE 1: select rank(3) within group (order by stringu1,stringu2) fro... + ^ +HINT: To use the hypothetical-set aggregate rank, the number of hypothetical direct arguments (here 1) must match the number of ordering columns (here 2). +select rank('fred') within group (order by x) from generate_series(1,5) x; +ERROR: invalid input syntax for integer: "fred" +LINE 1: select rank('fred') within group (order by x) from generate_... + ^ +select rank('adam'::text collate "C") within group (order by x collate "POSIX") + from (values ('fred'),('jim')) v(x); +ERROR: collation mismatch between explicit collations "C" and "POSIX" +LINE 1: ...adam'::text collate "C") within group (order by x collate "P... + ^ +-- hypothetical-set type unification successes: +select rank('adam'::varchar) within group (order by x) from (values ('fred'),('jim')) v(x); + rank +------ + 1 +(1 row) + +select rank('3') within group (order by x) from generate_series(1,5) x; + rank +------ + 3 +(1 row) + +-- divide by zero check +select percent_rank(0) within group (order by x) from generate_series(1,0) x; + percent_rank +-------------- + 0 +(1 row) + +-- deparse and multiple features: +create view aggordview1 as +select ten, + percentile_disc(0.5) within group (order by thousand) as p50, + percentile_disc(0.5) within group (order by thousand) filter (where hundred=1) as px, + rank(5,'AZZZZ',50) within group (order by hundred, string4 desc, hundred) + from tenk1 + group by ten order by ten; +select pg_get_viewdef('aggordview1'); + pg_get_viewdef +------------------------------------------------------------------------------------------------------------------------------- + SELECT tenk1.ten, + + percentile_disc((0.5)::double precision) WITHIN GROUP (ORDER BY tenk1.thousand) AS p50, + + percentile_disc((0.5)::double precision) WITHIN GROUP (ORDER BY tenk1.thousand) FILTER (WHERE (tenk1.hundred = 1)) AS px,+ + rank(5, 'AZZZZ'::name, 50) WITHIN GROUP (ORDER BY tenk1.hundred, tenk1.string4 DESC, tenk1.hundred) AS rank + + FROM tenk1 + + GROUP BY tenk1.ten + + ORDER BY tenk1.ten; +(1 row) + +select * from aggordview1 order by ten; + ten | p50 | px | rank +-----+-----+-----+------ + 0 | 490 | | 101 + 1 | 491 | 401 | 101 + 2 | 492 | | 101 + 3 | 493 | | 101 + 4 | 494 | | 101 + 5 | 495 | | 67 + 6 | 496 | | 1 + 7 | 497 | | 1 + 8 | 498 | | 1 + 9 | 499 | | 1 +(10 rows) + +drop view aggordview1; -- variadic aggregates select least_agg(q1,q2) from int8_tbl; least_agg diff --git a/src/test/regress/expected/create_aggregate.out b/src/test/regress/expected/create_aggregate.out index 9ecaea14992..ca908d91f45 100644 --- a/src/test/regress/expected/create_aggregate.out +++ b/src/test/regress/expected/create_aggregate.out @@ -66,3 +66,27 @@ returns anyelement language sql as create aggregate least_agg(variadic items anyarray) ( stype = anyelement, sfunc = least_accum ); +-- test ordered-set aggs using built-in support functions +create aggregate my_percentile_disc(float8 ORDER BY anyelement) ( + stype = internal, + sfunc = ordered_set_transition, + finalfunc = percentile_disc_final +); +create aggregate my_rank(VARIADIC "any" ORDER BY VARIADIC "any") ( + stype = internal, + sfunc = ordered_set_transition_multi, + finalfunc = rank_final, + hypothetical +); +alter aggregate my_percentile_disc(float8 ORDER BY anyelement) + rename to test_percentile_disc; +alter aggregate my_rank(VARIADIC "any" ORDER BY VARIADIC "any") + rename to test_rank; +\da test_* + List of aggregate functions + Schema | Name | Result data type | Argument data types | Description +--------+----------------------+------------------+----------------------------------------+------------- + public | test_percentile_disc | anyelement | double precision ORDER BY anyelement | + public | test_rank | bigint | VARIADIC "any" ORDER BY VARIADIC "any" | +(2 rows) + diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 10d65c0b287..292b6051432 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -23,6 +23,7 @@ SELECT ($1 = $2) OR EXISTS(select 1 from pg_catalog.pg_cast where castsource = $1 and casttarget = $2 and castmethod = 'b' and castcontext = 'i') OR + ($2 = 'pg_catalog.any'::pg_catalog.regtype) OR ($2 = 'pg_catalog.anyarray'::pg_catalog.regtype AND EXISTS(select 1 from pg_catalog.pg_type where oid = $1 and typelem != 0 and typlen = -1)) @@ -34,6 +35,7 @@ SELECT ($1 = $2) OR EXISTS(select 1 from pg_catalog.pg_cast where castsource = $1 and casttarget = $2 and castmethod = 'b') OR + ($2 = 'pg_catalog.any'::pg_catalog.regtype) OR ($2 = 'pg_catalog.anyarray'::pg_catalog.regtype AND EXISTS(select 1 from pg_catalog.pg_type where oid = $1 and typelem != 0 and typlen = -1)) @@ -702,7 +704,11 @@ SELECT * FROM funcdescs -- Look for illegal values in pg_aggregate fields. SELECT ctid, aggfnoid::oid FROM pg_aggregate as p1 -WHERE aggfnoid = 0 OR aggtransfn = 0 OR aggtranstype = 0 OR aggtransspace < 0; +WHERE aggfnoid = 0 OR aggtransfn = 0 OR + aggkind NOT IN ('n', 'o', 'h') OR + aggnumdirectargs < 0 OR + (aggkind = 'n' AND aggnumdirectargs > 0) OR + aggtranstype = 0 OR aggtransspace < 0; ctid | aggfnoid ------+---------- (0 rows) @@ -711,7 +717,7 @@ WHERE aggfnoid = 0 OR aggtransfn = 0 OR aggtranstype = 0 OR aggtransspace < 0; SELECT a.aggfnoid::oid, p.proname FROM pg_aggregate as a, pg_proc as p WHERE a.aggfnoid = p.oid AND - (NOT p.proisagg OR p.proretset); + (NOT p.proisagg OR p.proretset OR p.pronargs < a.aggnumdirectargs); aggfnoid | proname ----------+--------- (0 rows) @@ -742,7 +748,9 @@ FROM pg_aggregate AS a, pg_proc AS p, pg_proc AS ptr WHERE a.aggfnoid = p.oid AND a.aggtransfn = ptr.oid AND (ptr.proretset - OR NOT (ptr.pronargs = p.pronargs + 1) + OR NOT (ptr.pronargs = + CASE WHEN a.aggkind = 'n' THEN p.pronargs + 1 + ELSE greatest(p.pronargs - a.aggnumdirectargs, 1) + 1 END) OR NOT physically_coercible(ptr.prorettype, a.aggtranstype) OR NOT physically_coercible(a.aggtranstype, ptr.proargtypes[0]) OR (p.pronargs > 0 AND @@ -751,7 +759,7 @@ WHERE a.aggfnoid = p.oid AND NOT physically_coercible(p.proargtypes[1], ptr.proargtypes[2])) OR (p.pronargs > 2 AND NOT physically_coercible(p.proargtypes[2], ptr.proargtypes[3])) - -- we could carry the check further, but that's enough for now + -- we could carry the check further, but 3 args is enough for now ); aggfnoid | proname | oid | proname ----------+---------+-----+--------- @@ -762,10 +770,19 @@ SELECT a.aggfnoid::oid, p.proname, pfn.oid, pfn.proname FROM pg_aggregate AS a, pg_proc AS p, pg_proc AS pfn WHERE a.aggfnoid = p.oid AND a.aggfinalfn = pfn.oid AND - (pfn.proretset - OR NOT binary_coercible(pfn.prorettype, p.prorettype) - OR pfn.pronargs != 1 - OR NOT binary_coercible(a.aggtranstype, pfn.proargtypes[0])); + (pfn.proretset OR + NOT binary_coercible(pfn.prorettype, p.prorettype) OR + NOT binary_coercible(a.aggtranstype, pfn.proargtypes[0]) OR + CASE WHEN a.aggkind = 'n' THEN pfn.pronargs != 1 + ELSE pfn.pronargs != p.pronargs + 1 + OR (p.pronargs > 0 AND + NOT binary_coercible(p.proargtypes[0], pfn.proargtypes[1])) + OR (p.pronargs > 1 AND + NOT binary_coercible(p.proargtypes[1], pfn.proargtypes[2])) + OR (p.pronargs > 2 AND + NOT binary_coercible(p.proargtypes[2], pfn.proargtypes[3])) + -- we could carry the check further, but 3 args is enough for now + END); aggfnoid | proname | oid | proname ----------+---------+-----+--------- (0 rows) @@ -857,18 +874,20 @@ ORDER BY 1; count("any") | count() (1 row) --- For the same reason, we avoid creating built-in variadic aggregates. +-- For the same reason, built-in aggregates with default arguments are no good. SELECT oid, proname FROM pg_proc AS p -WHERE proisagg AND provariadic != 0; +WHERE proisagg AND proargdefaults IS NOT NULL; oid | proname -----+--------- (0 rows) --- For the same reason, built-in aggregates with default arguments are no good. -SELECT oid, proname -FROM pg_proc AS p -WHERE proisagg AND proargdefaults IS NOT NULL; +-- For the same reason, we avoid creating built-in variadic aggregates, except +-- that variadic ordered-set aggregates are OK (since they have special syntax +-- that is not subject to the misplaced ORDER BY issue). +SELECT p.oid, proname +FROM pg_proc AS p JOIN pg_aggregate AS a ON a.aggfnoid = p.oid +WHERE proisagg AND provariadic != 0 AND a.aggkind = 'n'; oid | proname -----+--------- (0 rows) diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index f99a07d9833..8096a6ffbec 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -492,6 +492,101 @@ select aggfns(distinct a,b,c order by a,c using ~<~,b) filter (where a > 1) from (values (1,3,'foo'),(0,null,null),(2,2,'bar'),(3,1,'baz')) v(a,b,c), generate_series(1,2) i; +-- ordered-set aggregates + +select p, percentile_cont(p) within group (order by x::float8) +from generate_series(1,5) x, + (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) +group by p order by p; + +select p, percentile_cont(p order by p) within group (order by x) -- error +from generate_series(1,5) x, + (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) +group by p order by p; + +select p, sum() within group (order by x::float8) -- error +from generate_series(1,5) x, + (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) +group by p order by p; + +select p, percentile_cont(p,p) -- error +from generate_series(1,5) x, + (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) +group by p order by p; + +select percentile_cont(0.5) within group (order by b) from aggtest; +select percentile_cont(0.5) within group (order by b), sum(b) from aggtest; +select percentile_cont(0.5) within group (order by thousand) from tenk1; +select percentile_disc(0.5) within group (order by thousand) from tenk1; +select rank(3) within group (order by x) +from (values (1),(1),(2),(2),(3),(3),(4)) v(x); +select cume_dist(3) within group (order by x) +from (values (1),(1),(2),(2),(3),(3),(4)) v(x); +select percent_rank(3) within group (order by x) +from (values (1),(1),(2),(2),(3),(3),(4),(5)) v(x); +select dense_rank(3) within group (order by x) +from (values (1),(1),(2),(2),(3),(3),(4)) v(x); + +select percentile_disc(array[0,0.1,0.25,0.5,0.75,0.9,1]) within group (order by thousand) +from tenk1; +select percentile_cont(array[0,0.25,0.5,0.75,1]) within group (order by thousand) +from tenk1; +select percentile_disc(array[[null,1,0.5],[0.75,0.25,null]]) within group (order by thousand) +from tenk1; +select percentile_cont(array[0,1,0.25,0.75,0.5,1]) within group (order by x) +from generate_series(1,6) x; + +select ten, mode() within group (order by string4) from tenk1 group by ten; + +select percentile_disc(array[0.25,0.5,0.75]) within group (order by x) +from unnest('{fred,jim,fred,jack,jill,fred,jill,jim,jim,sheila,jim,sheila}'::text[]) u(x); + +-- check collation propagates up in suitable cases: +select pg_collation_for(percentile_disc(1) within group (order by x collate "POSIX")) + from (values ('fred'),('jim')) v(x); + +-- ordered-set aggs created with CREATE AGGREGATE +select test_rank(3) within group (order by x) +from (values (1),(1),(2),(2),(3),(3),(4)) v(x); +select test_percentile_disc(0.5) within group (order by thousand) from tenk1; + +-- ordered-set aggs can't use ungrouped vars in direct args: +select rank(x) within group (order by x) from generate_series(1,5) x; + +-- outer-level agg can't use a grouped arg of a lower level, either: +select array(select percentile_disc(a) within group (order by x) + from (values (0.3),(0.7)) v(a) group by a) + from generate_series(1,5) g(x); + +-- agg in the direct args is a grouping violation, too: +select rank(sum(x)) within group (order by x) from generate_series(1,5) x; + +-- hypothetical-set type unification and argument-count failures: +select rank(3) within group (order by x) from (values ('fred'),('jim')) v(x); +select rank(3) within group (order by stringu1,stringu2) from tenk1; +select rank('fred') within group (order by x) from generate_series(1,5) x; +select rank('adam'::text collate "C") within group (order by x collate "POSIX") + from (values ('fred'),('jim')) v(x); +-- hypothetical-set type unification successes: +select rank('adam'::varchar) within group (order by x) from (values ('fred'),('jim')) v(x); +select rank('3') within group (order by x) from generate_series(1,5) x; + +-- divide by zero check +select percent_rank(0) within group (order by x) from generate_series(1,0) x; + +-- deparse and multiple features: +create view aggordview1 as +select ten, + percentile_disc(0.5) within group (order by thousand) as p50, + percentile_disc(0.5) within group (order by thousand) filter (where hundred=1) as px, + rank(5,'AZZZZ',50) within group (order by hundred, string4 desc, hundred) + from tenk1 + group by ten order by ten; + +select pg_get_viewdef('aggordview1'); +select * from aggordview1 order by ten; +drop view aggordview1; + -- variadic aggregates select least_agg(q1,q2) from int8_tbl; select least_agg(variadic array[q1,q2]) from int8_tbl; diff --git a/src/test/regress/sql/create_aggregate.sql b/src/test/regress/sql/create_aggregate.sql index 7ea23de0b6a..c76882a3984 100644 --- a/src/test/regress/sql/create_aggregate.sql +++ b/src/test/regress/sql/create_aggregate.sql @@ -80,3 +80,24 @@ returns anyelement language sql as create aggregate least_agg(variadic items anyarray) ( stype = anyelement, sfunc = least_accum ); + +-- test ordered-set aggs using built-in support functions +create aggregate my_percentile_disc(float8 ORDER BY anyelement) ( + stype = internal, + sfunc = ordered_set_transition, + finalfunc = percentile_disc_final +); + +create aggregate my_rank(VARIADIC "any" ORDER BY VARIADIC "any") ( + stype = internal, + sfunc = ordered_set_transition_multi, + finalfunc = rank_final, + hypothetical +); + +alter aggregate my_percentile_disc(float8 ORDER BY anyelement) + rename to test_percentile_disc; +alter aggregate my_rank(VARIADIC "any" ORDER BY VARIADIC "any") + rename to test_rank; + +\da test_* diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql index 1c71c964a5b..5cf58d5e575 100644 --- a/src/test/regress/sql/opr_sanity.sql +++ b/src/test/regress/sql/opr_sanity.sql @@ -26,6 +26,7 @@ SELECT ($1 = $2) OR EXISTS(select 1 from pg_catalog.pg_cast where castsource = $1 and casttarget = $2 and castmethod = 'b' and castcontext = 'i') OR + ($2 = 'pg_catalog.any'::pg_catalog.regtype) OR ($2 = 'pg_catalog.anyarray'::pg_catalog.regtype AND EXISTS(select 1 from pg_catalog.pg_type where oid = $1 and typelem != 0 and typlen = -1)) @@ -38,6 +39,7 @@ SELECT ($1 = $2) OR EXISTS(select 1 from pg_catalog.pg_cast where castsource = $1 and casttarget = $2 and castmethod = 'b') OR + ($2 = 'pg_catalog.any'::pg_catalog.regtype) OR ($2 = 'pg_catalog.anyarray'::pg_catalog.regtype AND EXISTS(select 1 from pg_catalog.pg_type where oid = $1 and typelem != 0 and typlen = -1)) @@ -567,14 +569,18 @@ SELECT * FROM funcdescs SELECT ctid, aggfnoid::oid FROM pg_aggregate as p1 -WHERE aggfnoid = 0 OR aggtransfn = 0 OR aggtranstype = 0 OR aggtransspace < 0; +WHERE aggfnoid = 0 OR aggtransfn = 0 OR + aggkind NOT IN ('n', 'o', 'h') OR + aggnumdirectargs < 0 OR + (aggkind = 'n' AND aggnumdirectargs > 0) OR + aggtranstype = 0 OR aggtransspace < 0; -- Make sure the matching pg_proc entry is sensible, too. SELECT a.aggfnoid::oid, p.proname FROM pg_aggregate as a, pg_proc as p WHERE a.aggfnoid = p.oid AND - (NOT p.proisagg OR p.proretset); + (NOT p.proisagg OR p.proretset OR p.pronargs < a.aggnumdirectargs); -- Make sure there are no proisagg pg_proc entries without matches. @@ -598,7 +604,9 @@ FROM pg_aggregate AS a, pg_proc AS p, pg_proc AS ptr WHERE a.aggfnoid = p.oid AND a.aggtransfn = ptr.oid AND (ptr.proretset - OR NOT (ptr.pronargs = p.pronargs + 1) + OR NOT (ptr.pronargs = + CASE WHEN a.aggkind = 'n' THEN p.pronargs + 1 + ELSE greatest(p.pronargs - a.aggnumdirectargs, 1) + 1 END) OR NOT physically_coercible(ptr.prorettype, a.aggtranstype) OR NOT physically_coercible(a.aggtranstype, ptr.proargtypes[0]) OR (p.pronargs > 0 AND @@ -607,7 +615,7 @@ WHERE a.aggfnoid = p.oid AND NOT physically_coercible(p.proargtypes[1], ptr.proargtypes[2])) OR (p.pronargs > 2 AND NOT physically_coercible(p.proargtypes[2], ptr.proargtypes[3])) - -- we could carry the check further, but that's enough for now + -- we could carry the check further, but 3 args is enough for now ); -- Cross-check finalfn (if present) against its entry in pg_proc. @@ -616,10 +624,19 @@ SELECT a.aggfnoid::oid, p.proname, pfn.oid, pfn.proname FROM pg_aggregate AS a, pg_proc AS p, pg_proc AS pfn WHERE a.aggfnoid = p.oid AND a.aggfinalfn = pfn.oid AND - (pfn.proretset - OR NOT binary_coercible(pfn.prorettype, p.prorettype) - OR pfn.pronargs != 1 - OR NOT binary_coercible(a.aggtranstype, pfn.proargtypes[0])); + (pfn.proretset OR + NOT binary_coercible(pfn.prorettype, p.prorettype) OR + NOT binary_coercible(a.aggtranstype, pfn.proargtypes[0]) OR + CASE WHEN a.aggkind = 'n' THEN pfn.pronargs != 1 + ELSE pfn.pronargs != p.pronargs + 1 + OR (p.pronargs > 0 AND + NOT binary_coercible(p.proargtypes[0], pfn.proargtypes[1])) + OR (p.pronargs > 1 AND + NOT binary_coercible(p.proargtypes[1], pfn.proargtypes[2])) + OR (p.pronargs > 2 AND + NOT binary_coercible(p.proargtypes[2], pfn.proargtypes[3])) + -- we could carry the check further, but 3 args is enough for now + END); -- If transfn is strict then either initval should be non-NULL, or -- input type should match transtype so that the first non-null input @@ -685,18 +702,20 @@ WHERE p1.oid < p2.oid AND p1.proname = p2.proname AND array_dims(p1.proargtypes) != array_dims(p2.proargtypes) ORDER BY 1; --- For the same reason, we avoid creating built-in variadic aggregates. - -SELECT oid, proname -FROM pg_proc AS p -WHERE proisagg AND provariadic != 0; - -- For the same reason, built-in aggregates with default arguments are no good. SELECT oid, proname FROM pg_proc AS p WHERE proisagg AND proargdefaults IS NOT NULL; +-- For the same reason, we avoid creating built-in variadic aggregates, except +-- that variadic ordered-set aggregates are OK (since they have special syntax +-- that is not subject to the misplaced ORDER BY issue). + +SELECT p.oid, proname +FROM pg_proc AS p JOIN pg_aggregate AS a ON a.aggfnoid = p.oid +WHERE proisagg AND provariadic != 0 AND a.aggkind = 'n'; + -- **************** pg_opfamily **************** -- Look for illegal values in pg_opfamily fields |