aboutsummaryrefslogtreecommitdiff
path: root/doc/src
diff options
context:
space:
mode:
Diffstat (limited to 'doc/src')
-rw-r--r--doc/src/sgml/catalogs.sgml43
-rw-r--r--doc/src/sgml/ref/create_aggregate.sgml153
-rw-r--r--doc/src/sgml/xaggr.sgml197
3 files changed, 375 insertions, 18 deletions
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 0069573c459..c174e672adb 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -387,6 +387,24 @@
<entry>Final function (zero if none)</entry>
</row>
<row>
+ <entry><structfield>aggmtransfn</structfield></entry>
+ <entry><type>regproc</type></entry>
+ <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
+ <entry>Forward transition function for moving-aggregate mode (zero if none)</entry>
+ </row>
+ <row>
+ <entry><structfield>aggminvtransfn</structfield></entry>
+ <entry><type>regproc</type></entry>
+ <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
+ <entry>Inverse transition function for moving-aggregate mode (zero if none)</entry>
+ </row>
+ <row>
+ <entry><structfield>aggmfinalfn</structfield></entry>
+ <entry><type>regproc</type></entry>
+ <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
+ <entry>Final function for moving-aggregate mode (zero if none)</entry>
+ </row>
+ <row>
<entry><structfield>aggsortop</structfield></entry>
<entry><type>oid</type></entry>
<entry><literal><link linkend="catalog-pg-operator"><structname>pg_operator</structname></link>.oid</literal></entry>
@@ -406,6 +424,20 @@
data, or zero to use a default estimate</entry>
</row>
<row>
+ <entry><structfield>aggmtranstype</structfield></entry>
+ <entry><type>oid</type></entry>
+ <entry><literal><link linkend="catalog-pg-type"><structname>pg_type</structname></link>.oid</literal></entry>
+ <entry>Data type of the aggregate function's internal transition (state)
+ data for moving-aggregate mode (zero if none)</entry>
+ </row>
+ <row>
+ <entry><structfield>aggmtransspace</structfield></entry>
+ <entry><type>int4</type></entry>
+ <entry></entry>
+ <entry>Approximate average size (in bytes) of the transition state data
+ for moving-aggregate mode, or zero to use a default estimate</entry>
+ </row>
+ <row>
<entry><structfield>agginitval</structfield></entry>
<entry><type>text</type></entry>
<entry></entry>
@@ -416,6 +448,17 @@
value starts out null.
</entry>
</row>
+ <row>
+ <entry><structfield>aggminitval</structfield></entry>
+ <entry><type>text</type></entry>
+ <entry></entry>
+ <entry>
+ The initial value of the transition state for moving-aggregate mode.
+ This is a text field containing the initial value in its external
+ string representation. If this field is null, the transition state
+ value starts out null.
+ </entry>
+ </row>
</tbody>
</tgroup>
</table>
diff --git a/doc/src/sgml/ref/create_aggregate.sgml b/doc/src/sgml/ref/create_aggregate.sgml
index e5fc7186544..268acf3e84d 100644
--- a/doc/src/sgml/ref/create_aggregate.sgml
+++ b/doc/src/sgml/ref/create_aggregate.sgml
@@ -27,6 +27,12 @@ CREATE AGGREGATE <replaceable class="parameter">name</replaceable> ( [ <replacea
[ , SSPACE = <replaceable class="PARAMETER">state_data_size</replaceable> ]
[ , FINALFUNC = <replaceable class="PARAMETER">ffunc</replaceable> ]
[ , INITCOND = <replaceable class="PARAMETER">initial_condition</replaceable> ]
+ [ , MSFUNC = <replaceable class="PARAMETER">msfunc</replaceable> ]
+ [ , MINVFUNC = <replaceable class="PARAMETER">minvfunc</replaceable> ]
+ [ , MSTYPE = <replaceable class="PARAMETER">mstate_data_type</replaceable> ]
+ [ , MSSPACE = <replaceable class="PARAMETER">mstate_data_size</replaceable> ]
+ [ , MFINALFUNC = <replaceable class="PARAMETER">mffunc</replaceable> ]
+ [ , MINITCOND = <replaceable class="PARAMETER">minitial_condition</replaceable> ]
[ , SORTOP = <replaceable class="PARAMETER">sort_operator</replaceable> ]
)
@@ -49,6 +55,12 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> (
[ , SSPACE = <replaceable class="PARAMETER">state_data_size</replaceable> ]
[ , FINALFUNC = <replaceable class="PARAMETER">ffunc</replaceable> ]
[ , INITCOND = <replaceable class="PARAMETER">initial_condition</replaceable> ]
+ [ , MSFUNC = <replaceable class="PARAMETER">sfunc</replaceable> ]
+ [ , MINVFUNC = <replaceable class="PARAMETER">invfunc</replaceable> ]
+ [ , MSTYPE = <replaceable class="PARAMETER">state_data_type</replaceable> ]
+ [ , MSSPACE = <replaceable class="PARAMETER">state_data_size</replaceable> ]
+ [ , MFINALFUNC = <replaceable class="PARAMETER">ffunc</replaceable> ]
+ [ , MINITCOND = <replaceable class="PARAMETER">initial_condition</replaceable> ]
[ , SORTOP = <replaceable class="PARAMETER">sort_operator</replaceable> ]
)
</synopsis>
@@ -84,7 +96,7 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> (
</para>
<para>
- An aggregate function is made from one or two ordinary
+ A simple aggregate function is made from one or two ordinary
functions:
a state transition function
<replaceable class="PARAMETER">sfunc</replaceable>,
@@ -126,7 +138,7 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> (
values are ignored (the function is not called and the previous state value
is retained). If the initial state value is null, then at the first row
with all-nonnull input values, the first argument value replaces the state
- value, and the transition function is invoked at subsequent rows with
+ value, and the transition function is invoked at each subsequent row with
all-nonnull input values.
This is handy for implementing aggregates like <function>max</function>.
Note that this behavior is only available when
@@ -155,6 +167,18 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> (
</para>
<para>
+ An aggregate can optionally support <firstterm>moving-aggregate mode</>,
+ as described in <xref linkend="xaggr-moving-aggregates">. This requires
+ specifying the <literal>MSFUNC</>, <literal>MINVFUNC</>,
+ and <literal>MSTYPE</> parameters, and optionally
+ the <literal>MSPACE</>, <literal>MFINALFUNC</>,
+ and <literal>MINITCOND</> parameters. Except for <literal>MINVFUNC</>,
+ these parameters work like the corresponding simple-aggregate parameters
+ without <literal>M</>; they define a separate implementation of the
+ aggregate that includes an inverse transition function.
+ </para>
+
+ <para>
The syntax with <literal>ORDER BY</literal> in the parameter list creates
a special type of aggregate called an <firstterm>ordered-set
aggregate</firstterm>; or if <literal>HYPOTHETICAL</> is specified, then
@@ -197,8 +221,8 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1;
<para>
To be able to create an aggregate function, you must
have <literal>USAGE</literal> privilege on the argument types, the state
- type, and the return type, as well as <literal>EXECUTE</literal> privilege
- on the transition and final functions.
+ type(s), and the return type, as well as <literal>EXECUTE</literal>
+ privilege on the transition and final functions.
</para>
</refsect1>
@@ -360,6 +384,79 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1;
</varlistentry>
<varlistentry>
+ <term><replaceable class="PARAMETER">msfunc</replaceable></term>
+ <listitem>
+ <para>
+ The name of the forward state transition function to be called for each
+ input row in moving-aggregate mode. This is exactly like the regular
+ transition function, except that its first argument and result are of
+ type <replaceable>mstate_data_type</>, which might be different
+ from <replaceable>state_data_type</>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable class="PARAMETER">minvfunc</replaceable></term>
+ <listitem>
+ <para>
+ The name of the inverse state transition function to be used in
+ moving-aggregate mode. This function has the same argument and
+ result types as <replaceable>msfunc</>, but it is used to remove
+ a value from the current aggregate state, rather than add a value to
+ it. The inverse transition function must have the same strictness
+ attribute as the forward state transition function.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable class="PARAMETER">mstate_data_type</replaceable></term>
+ <listitem>
+ <para>
+ The data type for the aggregate's state value, when using
+ moving-aggregate mode.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable class="PARAMETER">mstate_data_size</replaceable></term>
+ <listitem>
+ <para>
+ The approximate average size (in bytes) of the aggregate's state
+ value, when using moving-aggregate mode. This works the same as
+ <replaceable>state_data_size</>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable class="PARAMETER">mffunc</replaceable></term>
+ <listitem>
+ <para>
+ The name of the final function called to compute the aggregate's
+ result after all input rows have been traversed, when using
+ moving-aggregate mode. This works the same as <replaceable>ffunc</>,
+ except that its input type is <replaceable>mstate_data_type</>.
+ The aggregate result type determined by <replaceable>mffunc</>
+ and <replaceable>mstate_data_type</> must match that determined by the
+ aggregate's regular implementation.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><replaceable class="PARAMETER">minitial_condition</replaceable></term>
+ <listitem>
+ <para>
+ The initial setting for the state value, when using moving-aggregate
+ mode. This works the same as <replaceable>initial_condition</>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><replaceable class="PARAMETER">sort_operator</replaceable></term>
<listitem>
<para>
@@ -398,6 +495,49 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1;
<title>Notes</title>
<para>
+ If an aggregate supports moving-aggregate mode, it will improve
+ calculation efficiency when the aggregate is used as a window function
+ for a window with moving frame start (that is, a frame start mode other
+ than <literal>UNBOUNDED PRECEDING</>). Conceptually, the forward
+ transition function adds input values to the aggregate's state when
+ they enter the window frame from the bottom, and the inverse transition
+ function removes them again when they leave the frame at the top. So,
+ when values are removed, they are always removed in the same order they
+ were added. Whenever the inverse transition function is invoked, it will
+ thus receive the earliest added but not yet removed argument value(s).
+ The inverse transition function can assume that at least one row will
+ remain in the current state after it removes the oldest row. (When this
+ would not be the case, the window function mechanism simply starts a
+ fresh aggregation, rather than using the inverse transition function.)
+ </para>
+
+ <para>
+ The forward transition function for moving-aggregate mode is not
+ allowed to return NULL as the new state value. If the inverse
+ transition function returns NULL, this is taken as an indication that
+ the inverse function cannot reverse the state calculation for this
+ particular input, and so the aggregate calculation will be redone from
+ scratch for the current frame starting position. This convention
+ allows moving-aggregate mode to be used in situations where there are
+ some infrequent cases that are impractical to reverse out of the
+ running state value.
+ </para>
+
+ <para>
+ If no moving-aggregate implementation is supplied,
+ the aggregate can still be used with moving frames,
+ but <productname>PostgreSQL</productname> will recompute the whole
+ aggregation whenever the start of the frame moves.
+ Note that whether or not the aggregate supports moving-aggregate
+ mode, <productname>PostgreSQL</productname> can handle a moving frame
+ end without recalculation; this is done by continuing to add new values
+ to the aggregate's state. It is assumed that the final function does
+ not damage the aggregate's state value, so that the aggregation can be
+ continued even after an aggregate result value has been obtained for
+ one set of frame boundaries.
+ </para>
+
+ <para>
The syntax for ordered-set aggregates allows <literal>VARIADIC</>
to be specified for both the last direct parameter and the last
aggregated (<literal>WITHIN GROUP</>) parameter. However, the
@@ -415,6 +555,11 @@ SELECT col FROM tab ORDER BY col USING sortop LIMIT 1;
ones; any preceding parameters represent additional direct arguments
that are not constrained to match the aggregated arguments.
</para>
+
+ <para>
+ Currently, ordered-set aggregates do not need to support
+ moving-aggregate mode, since they cannot be used as window functions.
+ </para>
</refsect1>
<refsect1>
diff --git a/doc/src/sgml/xaggr.sgml b/doc/src/sgml/xaggr.sgml
index e77ef12e5c3..cbbb0519115 100644
--- a/doc/src/sgml/xaggr.sgml
+++ b/doc/src/sgml/xaggr.sgml
@@ -132,6 +132,161 @@ CREATE AGGREGATE avg (float8)
</note>
<para>
+ Aggregate function calls in SQL allow <literal>DISTINCT</>
+ and <literal>ORDER BY</> options that control which rows are fed
+ to the aggregate's transition function and in what order. These
+ options are implemented behind the scenes and are not the concern
+ of the aggregate's support functions.
+ </para>
+
+ <para>
+ For further details see the
+ <xref linkend="sql-createaggregate">
+ command.
+ </para>
+
+ <sect2 id="xaggr-moving-aggregates">
+ <title>Moving-Aggregate Mode</title>
+
+ <indexterm>
+ <primary>moving-aggregate mode</primary>
+ </indexterm>
+
+ <indexterm>
+ <primary>aggregate function</primary>
+ <secondary>moving aggregate</secondary>
+ </indexterm>
+
+ <para>
+ Aggregate functions can optionally support <firstterm>moving-aggregate
+ mode</>, which allows substantially faster execution of aggregate
+ functions within windows with moving frame starting points.
+ (See <xref linkend="tutorial-window">
+ and <xref linkend="syntax-window-functions"> for information about use of
+ aggregate functions as window functions.)
+ The basic idea is that in addition to a normal <quote>forward</>
+ transition function, the aggregate provides an <firstterm>inverse
+ transition function</>, which allows rows to be removed from the
+ aggregate's running state value when they exit the window frame.
+ For example a <function>sum</> aggregate, which uses addition as the
+ forward transition function, would use subtraction as the inverse
+ transition function. Without an inverse transition function, the window
+ function mechanism must recalculate the aggregate from scratch each time
+ the frame starting point moves, resulting in run time proportional to the
+ number of input rows times the average frame length. With an inverse
+ transition function, the run time is only proportional to the number of
+ input rows.
+ </para>
+
+ <para>
+ The inverse transition function is passed the current state value and the
+ aggregate input value(s) for the earliest row included in the current
+ state. It must reconstruct what the state value would have been if the
+ given input value had never been aggregated, but only the rows following
+ it. This sometimes requires that the forward transition function keep
+ more state than is needed for plain aggregation mode. Therefore, the
+ moving-aggregate mode uses a completely separate implementation from the
+ plain mode: it has its own state data type, its own forward transition
+ function, and its own final function if needed. These can be the same as
+ the plain mode's data type and functions, if there is no need for extra
+ state.
+ </para>
+
+ <para>
+ As an example, we could extend the <function>sum</> aggregate given above
+ to support moving-aggregate mode like this:
+
+<programlisting>
+CREATE AGGREGATE sum (complex)
+(
+ sfunc = complex_add,
+ stype = complex,
+ initcond = '(0,0)',
+ msfunc = complex_add,
+ minvfunc = complex_sub,
+ mstype = complex,
+ minitcond = '(0,0)'
+);
+</programlisting>
+
+ The parameters whose names begin with <literal>m</> define the
+ moving-aggregate implementation. Except for the inverse transition
+ function <literal>minvfunc</>, they correspond to the plain-aggregate
+ parameters without <literal>m</>.
+ </para>
+
+ <para>
+ The forward transition function for moving-aggregate mode is not allowed
+ to return NULL as the new state value. If the inverse transition
+ function returns NULL, this is taken as an indication that the inverse
+ function cannot reverse the state calculation for this particular input,
+ and so the aggregate calculation will be redone from scratch for the
+ current frame starting position. This convention allows moving-aggregate
+ mode to be used in situations where there are some infrequent cases that
+ are impractical to reverse out of the running state value. The inverse
+ transition function can <quote>punt</> on these cases, and yet still come
+ out ahead so long as it can work for most cases. As an example, an
+ aggregate working with floating-point numbers might choose to punt when
+ a <literal>NaN</> (not a number) input has to be removed from the running
+ state value.
+ </para>
+
+ <para>
+ When writing moving-aggregate support functions, it is important to be
+ sure that the inverse transition function can reconstruct the correct
+ state value exactly. Otherwise there might be user-visible differences
+ in results depending on whether the moving-aggregate mode is used.
+ An example of an aggregate for which adding an inverse transition
+ function seems easy at first, yet where this requirement cannot be met
+ is <function>sum</> over <type>float4</> or <type>float8</> inputs. A
+ naive declaration of <function>sum(<type>float8</>)</function> could be
+
+<programlisting>
+CREATE AGGREGATE unsafe_sum (float8)
+(
+ stype = float8,
+ sfunc = float8pl,
+ mstype = float8,
+ msfunc = float8pl,
+ minvfunc = float8mi
+);
+</programlisting>
+
+ This aggregate, however, can give wildly different results than it would
+ have without the inverse transition function. For example, consider
+
+<programlisting>
+SELECT
+ unsafe_sum(x) OVER (ORDER BY n ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING)
+FROM (VALUES (1, 1.0e20::float8),
+ (2, 1.0::float8)) AS v (n,x);
+</programlisting>
+
+ This query returns <literal>0</> as its second result, rather than the
+ expected answer of <literal>1</>. The cause is the limited precision of
+ floating-point values: adding <literal>1</> to <literal>1e20</> results
+ in <literal>1e20</> again, and so subtracting <literal>1e20</> from that
+ yields <literal>0</>, not <literal>1</>. Note that this is a limitation
+ of floating-point arithmetic in general, not a limitation
+ of <productname>PostgreSQL</>.
+ </para>
+
+ </sect2>
+
+ <sect2 id="xaggr-polymorphic-aggregates">
+ <title>Polymorphic and Variadic Aggregates</title>
+
+ <indexterm>
+ <primary>aggregate function</primary>
+ <secondary>polymorphic</secondary>
+ </indexterm>
+
+ <indexterm>
+ <primary>aggregate function</primary>
+ <secondary>variadic</secondary>
+ </indexterm>
+
+ <para>
Aggregate functions can use polymorphic
state transition functions or final functions, so that the same functions
can be used to implement multiple aggregates.
@@ -189,8 +344,8 @@ SELECT attrelid::regclass, array_accum(atttypid::regtype)
by declaring its last argument as a <literal>VARIADIC</> array, in much
the same fashion as for regular functions; see
<xref linkend="xfunc-sql-variadic-functions">. The aggregate's transition
- function must have the same array type as its last argument. The
- transition function typically would also be marked <literal>VARIADIC</>,
+ function(s) must have the same array type as their last argument. The
+ transition function(s) typically would also be marked <literal>VARIADIC</>,
but this is not strictly required.
</para>
@@ -220,13 +375,15 @@ SELECT myaggregate(a, b, c ORDER BY a) FROM ...
</para>
</note>
- <para>
- Aggregate function calls in SQL allow <literal>DISTINCT</>
- and <literal>ORDER BY</> options that control which rows are fed
- to the aggregate's transition function and in what order. These
- options are implemented behind the scenes and are not the concern
- of the aggregate's support functions.
- </para>
+ </sect2>
+
+ <sect2 id="xaggr-ordered-set-aggregates">
+ <title>Ordered-Set Aggregates</title>
+
+ <indexterm>
+ <primary>aggregate function</primary>
+ <secondary>ordered set</secondary>
+ </indexterm>
<para>
The aggregates we have been describing so far are <quote>normal</>
@@ -312,6 +469,21 @@ SELECT percentile_disc(0.5) WITHIN GROUP (ORDER BY income) FROM households;
</para>
<para>
+ Currently, ordered-set aggregates cannot be used as window functions,
+ and therefore there is no need for them to support moving-aggregate mode.
+ </para>
+
+ </sect2>
+
+ <sect2 id="xaggr-support-functions">
+ <title>Support Functions for Aggregates</title>
+
+ <indexterm>
+ <primary>aggregate function</primary>
+ <secondary>support functions for</secondary>
+ </indexterm>
+
+ <para>
A function written in C can detect that it is being called as an
aggregate transition or final function by calling
<function>AggCheckCallContext</>, for example:
@@ -341,9 +513,6 @@ if (AggCheckCallContext(fcinfo, NULL))
source code.
</para>
- <para>
- For further details see the
- <xref linkend="sql-createaggregate">
- command.
- </para>
+ </sect2>
+
</sect1>