diff options
Diffstat (limited to 'doc/src/sgml')
-rw-r--r-- | doc/src/sgml/catalogs.sgml | 18 | ||||
-rw-r--r-- | doc/src/sgml/ref/create_aggregate.sgml | 223 | ||||
-rw-r--r-- | doc/src/sgml/ref/drop_aggregate.sgml | 6 | ||||
-rw-r--r-- | doc/src/sgml/xaggr.sgml | 136 |
4 files changed, 151 insertions, 232 deletions
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 91cea30605f..1613774f26b 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1,6 +1,6 @@ .\" This is -*-nroff-*- .\" XXX standard disclaimer belongs here.... -.\" $Header: /cvsroot/pgsql/doc/src/sgml/catalogs.sgml,v 2.6 2000/06/09 01:43:56 momjian Exp $ +.\" $Header: /cvsroot/pgsql/doc/src/sgml/catalogs.sgml,v 2.7 2000/07/17 03:04:40 tgl Exp $ .TH "SYSTEM CATALOGS" INTRO 03/13/94 PostgreSQL PostgreSQL .SH "Section 7 - System Catalogs" .de LS @@ -91,20 +91,16 @@ The following catalogs relate to the class/type system. * see DEFINE AGGREGATE for an explanation of transition functions */ pg_aggregate - NameData aggname /* aggregate name (e.g., "count") */ + NameData aggname /* aggregate name (e.g., "count") */ oid aggowner /* usesysid of creator */ - regproc aggtransfn1 /* first transition function */ - regproc aggtransfn2 /* second transition function */ + regproc aggtransfn /* transition function */ regproc aggfinalfn /* final function */ oid aggbasetype /* type of data on which aggregate operates */ - oid aggtranstype1 /* type returned by aggtransfn1 */ - oid aggtranstype2 /* type returned by aggtransfn2 */ - oid aggfinaltype /* type returned by aggfinalfn */ - text agginitval1 /* external format of initial - (starting) value of aggtransfn1 */ - text agginitval2 /* external format of initial - (starting) value of aggtransfn2 */ + oid aggtranstype /* type of aggregate's transition + (state) data */ + oid aggfinaltype /* type of aggregate's final result */ + text agginitval /* external format of initial state value */ .fi .nf M pg_am diff --git a/doc/src/sgml/ref/create_aggregate.sgml b/doc/src/sgml/ref/create_aggregate.sgml index 44291458d60..07b45c112aa 100644 --- a/doc/src/sgml/ref/create_aggregate.sgml +++ b/doc/src/sgml/ref/create_aggregate.sgml @@ -1,5 +1,5 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/ref/create_aggregate.sgml,v 1.9 2000/03/31 14:57:05 tgl Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/ref/create_aggregate.sgml,v 1.10 2000/07/17 03:04:41 tgl Exp $ Postgres documentation --> @@ -21,20 +21,18 @@ Postgres documentation </refnamediv> <refsynopsisdiv> <refsynopsisdivinfo> - <date>1999-07-20</date> + <date>2000-07-16</date> </refsynopsisdivinfo> <synopsis> -CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> ( BASETYPE = <replaceable class="PARAMETER">input_data_type</replaceable> - [ , SFUNC1 = <replaceable class="PARAMETER">sfunc1</replaceable>, STYPE1 = <replaceable class="PARAMETER">state1_type</replaceable> ] - [ , SFUNC2 = <replaceable class="PARAMETER">sfunc2</replaceable>, STYPE2 = <replaceable class="PARAMETER">state2_type</replaceable> ] +CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> ( BASETYPE = <replaceable class="PARAMETER">input_data_type</replaceable>, + SFUNC = <replaceable class="PARAMETER">sfunc</replaceable>, STYPE = <replaceable class="PARAMETER">state_type</replaceable> [ , FINALFUNC = <replaceable class="PARAMETER">ffunc</replaceable> ] - [ , INITCOND1 = <replaceable class="PARAMETER">initial_condition1</replaceable> ] - [ , INITCOND2 = <replaceable class="PARAMETER">initial_condition2</replaceable> ] ) + [ , INITCOND = <replaceable class="PARAMETER">initial_condition</replaceable> ] ) </synopsis> <refsect2 id="R2-SQL-CREATEAGGREGATE-1"> <refsect2info> - <date>1998-09-09</date> + <date>2000-07-16</date> </refsect2info> <title> Inputs @@ -55,57 +53,39 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> ( BASETYPE = <listitem> <para> The input data type on which this aggregate function operates. + This can be specified as ANY for an aggregate that does not + examine its input values + (an example is <function>count(*)</function>). </para> </listitem> </varlistentry> <varlistentry> - <term><replaceable class="PARAMETER">sfunc1</replaceable></term> + <term><replaceable class="PARAMETER">sfunc</replaceable></term> <listitem> <para> - A state transition function - to be called for every non-NULL input data value. - This must be a function of two arguments, the first being of - type <replaceable class="PARAMETER">state1_type</replaceable> + The name of the state transition function + to be called for each input data value. + This is normally a function of two arguments, the first being of + type <replaceable class="PARAMETER">state_type</replaceable> and the second of type <replaceable class="PARAMETER">input_data_type</replaceable>. - The function must return a value of - type <replaceable class="PARAMETER">state1_type</replaceable>. - This function takes the current state value 1 and the current - input data item, and returns the next state value 1. - </para> - </listitem> - </varlistentry> - - <varlistentry> - <term><replaceable class="PARAMETER">state1_type</replaceable></term> - <listitem> - <para> - The data type for the first state value of the aggregate. - </para> - </listitem> - </varlistentry> - - <varlistentry> - <term><replaceable class="PARAMETER">sfunc2</replaceable></term> - <listitem> - <para> - A state transition function - to be called for every non-NULL input data value. - This must be a function of one argument of - type <replaceable class="PARAMETER">state2_type</replaceable>, - returning a value of the same type. - This function takes the current state value 2 and - returns the next state value 2. + Alternatively, for an aggregate that does not examine its input + values, the function takes just one argument of + type <replaceable class="PARAMETER">state_type</replaceable>. + In either case the function must return a value of + type <replaceable class="PARAMETER">state_type</replaceable>. + This function takes the current state value and the current + input data item, and returns the next state value. </para> </listitem> </varlistentry> <varlistentry> - <term><replaceable class="PARAMETER">state2_type</replaceable></term> + <term><replaceable class="PARAMETER">state_type</replaceable></term> <listitem> <para> - The data type for the second state value of the aggregate. + The data type for the aggregate's state value. </para> </listitem> </varlistentry> @@ -114,35 +94,28 @@ CREATE AGGREGATE <replaceable class="PARAMETER">name</replaceable> ( BASETYPE = <term><replaceable class="PARAMETER">ffunc</replaceable></term> <listitem> <para> - The final function called to compute the aggregate's result - after all input data has been traversed. - If both state values are used, the final function must - take two arguments of types - <replaceable class="PARAMETER">state1_type</replaceable> - and - <replaceable class="PARAMETER">state2_type</replaceable>. - If only one state value is used, the final function must - take a single argument of that state value's type. + The name of the final function called to compute the aggregate's + result after all input data has been traversed. The function + must take a single argument of type + <replaceable class="PARAMETER">state_type</replaceable>. The output datatype of the aggregate is defined as the return type of this function. + If <replaceable class="PARAMETER">ffunc</replaceable> + is not specified, then the ending state value is used as the + aggregate's result, and the output type is + <replaceable class="PARAMETER">state_type</replaceable>. </para> </listitem> </varlistentry> <varlistentry> - <term><replaceable class="PARAMETER">initial_condition1</replaceable></term> + <term><replaceable class="PARAMETER">initial_condition</replaceable></term> <listitem> <para> - The initial value for state value 1. - </para> - </listitem> - </varlistentry> - - <varlistentry> - <term><replaceable class="PARAMETER">initial_condition2</replaceable></term> - <listitem> - <para> - The initial value for state value 2. + The initial setting for the state value. This must be a literal + constant in the form accepted for the datatype + <replaceable class="PARAMETER">state_type</replaceable>. + If not specified, the state value starts out NULL. </para> </listitem> </varlistentry> @@ -177,7 +150,7 @@ CREATE <refsect1 id="R1-SQL-CREATEAGGREGATE-1"> <refsect1info> - <date>1998-09-09</date> + <date>2000-07-16</date> </refsect1info> <title> Description @@ -199,65 +172,76 @@ CREATE of the same name and input data type as an aggregate. </para> <para> - An aggregate function is made from between one and three ordinary + An aggregate function is made from one or two ordinary functions: - two state transition functions, - <replaceable class="PARAMETER">sfunc1</replaceable> - and <replaceable class="PARAMETER">sfunc2</replaceable>, - and a final calculation function, + a state transition function + <replaceable class="PARAMETER">sfunc</replaceable>, + and an optional final calculation function <replaceable class="PARAMETER">ffunc</replaceable>. These are used as follows: <programlisting> -<replaceable class="PARAMETER">sfunc1</replaceable>( internal-state1, next-data-item ) ---> next-internal-state1 -<replaceable class="PARAMETER">sfunc2</replaceable>( internal-state2 ) ---> next-internal-state2 -<replaceable class="PARAMETER">ffunc</replaceable>(internal-state1, internal-state2) ---> aggregate-value +<replaceable class="PARAMETER">sfunc</replaceable>( internal-state, next-data-item ) ---> next-internal-state +<replaceable class="PARAMETER">ffunc</replaceable>( internal-state ) ---> aggregate-value </programlisting> </para> <para> - <productname>Postgres</productname> creates one or two temporary variables - (of data types <replaceable class="PARAMETER">stype1</replaceable> and/or - <replaceable class="PARAMETER">stype2</replaceable>) to hold the - current internal states of the aggregate. At each input data item, - the state transition function(s) are invoked to calculate new values - for the internal state values. After all the data has been processed, + <productname>Postgres</productname> creates a temporary variable + of data type <replaceable class="PARAMETER">stype</replaceable> + to hold the current internal state of the aggregate. At each input + data item, + the state transition function is invoked to calculate a new + internal state value. After all the data has been processed, the final function is invoked once to calculate the aggregate's output - value. + value. If there is no final function then the ending state value + is returned as-is. </para> + <para> - <replaceable class="PARAMETER">ffunc</replaceable> must be specified if - both transition functions are specified. If only one transition function - is used, then <replaceable class="PARAMETER">ffunc</replaceable> is - optional. The default behavior when - <replaceable class="PARAMETER">ffunc</replaceable> is not provided is - to return the ending value of the internal state value being used - (and, therefore, the aggregate's output type is the same as that - state value's type). - </para> + An aggregate function may provide an initial condition, + that is, an initial value for the internal state value. + This is specified and stored in the database as a field of type + <type>text</type>, but it must be a valid external representation + of a constant of the state value datatype. If it is not supplied + then the state value starts out NULL. + </para> - <para> - An aggregate function may also provide one or two initial conditions, - that is, initial values for the internal state values being used. - These are specified and stored in the database as fields of type - <type>text</type>, but they must be valid external representations - of constants of the state value datatypes. If - <replaceable class="PARAMETER">sfunc1</replaceable> is specified - without an <replaceable class="PARAMETER">initcond1</replaceable> value, - then the system does not call - <replaceable class="PARAMETER">sfunc1</replaceable> - at the first input item; instead, the internal state value 1 is - initialized with the first input value, and - <replaceable class="PARAMETER">sfunc1</replaceable> is called beginning - at the second input item. This is useful for aggregates like MIN and - MAX. Note that an aggregate using this feature will return NULL when - called with no input values. There is no comparable provision for - state value 2; if <replaceable class="PARAMETER">sfunc2</replaceable> is - specified then an <replaceable class="PARAMETER">initcond2</replaceable> is - required. + <para> + If the state transition function is declared "strict" in pg_proc, + then it cannot be called with NULL inputs. With such a transition + function, aggregate execution behaves as follows. NULL input values + are ignored (the function is not called and the previous state value + is retained). If the initial state value is NULL, then the first + non-NULL input value replaces the state value, and the transition + function is invoked beginning with the second non-NULL input value. + This is handy for implementing aggregates like <function>max</function>. + Note that this behavior is only available when + <replaceable class="PARAMETER">state_type</replaceable> + is the same as + <replaceable class="PARAMETER">input_data_type</replaceable>. + When these types are different, you must supply a non-NULL initial + condition or use a non-strict transition function. + </para> + + <para> + If the state transition function is not strict, then it will be called + unconditionally at each input value, and must deal with NULL inputs + and NULL transition values for itself. This allows the aggregate + author to have full control over the aggregate's handling of NULLs. + </para> + + <para> + If the final function is declared "strict", then it will not + be called when the ending state value is NULL; instead a NULL result + will be output automatically. (Of course this is just the normal + behavior of strict functions.) In any case the final function has + the option of returning NULL. For example, the final function for + <function>avg</function> returns NULL when it sees there were zero + input tuples. </para> <refsect2 id="R2-SQL-CREATEAGGREGATE-3"> <refsect2info> - <date>1998-09-09</date> + <date>2000-07-16</date> </refsect2info> <title> Notes @@ -272,29 +256,6 @@ CREATE in any order, not just the order illustrated above. </para> - <para> - It is possible to specify aggregate functions - that have varying combinations of state and final functions. - For example, the <function>count</function> aggregate requires - <replaceable class="PARAMETER">sfunc2</replaceable> - (an incrementing function) but not - <replaceable class="PARAMETER">sfunc1</replaceable> or - <replaceable class="PARAMETER">ffunc</replaceable>, - whereas the <function>sum</function> aggregate requires - <replaceable class="PARAMETER">sfunc1</replaceable> (an addition - function) but not <replaceable class="PARAMETER">sfunc2</replaceable> or - <replaceable class="PARAMETER">ffunc</replaceable>, and the - <function>avg</function> - aggregate requires - both state functions as - well as a <replaceable class="PARAMETER">ffunc</replaceable> (a division - function) to produce its - answer. In any case, at least one state function must be - defined, and any <replaceable class="PARAMETER">sfunc2</replaceable> must - have a corresponding - <replaceable class="PARAMETER">initcond2</replaceable>. - </para> - </refsect2> </refsect1> diff --git a/doc/src/sgml/ref/drop_aggregate.sgml b/doc/src/sgml/ref/drop_aggregate.sgml index 465f5d64215..8efc31a732e 100644 --- a/doc/src/sgml/ref/drop_aggregate.sgml +++ b/doc/src/sgml/ref/drop_aggregate.sgml @@ -1,5 +1,5 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/ref/drop_aggregate.sgml,v 1.7 2000/05/18 14:24:32 momjian Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/ref/drop_aggregate.sgml,v 1.8 2000/07/17 03:04:41 tgl Exp $ Postgres documentation --> @@ -49,7 +49,7 @@ DROP AGGREGATE <replaceable class="PARAMETER">name</replaceable> <replaceable cl <para> The type of an existing aggregate function. (Refer to the <citetitle>PostgreSQL User's Guide</citetitle> for - further information about data types). + further information about data types.) <comment>This should become a cross-reference rather than a hard-coded chapter number</comment> </para> @@ -80,7 +80,7 @@ DROP </varlistentry> <varlistentry> <term><computeroutput> -NOTICE RemoveAggregate: aggregate '<replaceable class="parameter">agg</replaceable>' for '<replaceable class="parameter">type</replaceable>' does not exist +ERROR: RemoveAggregate: aggregate '<replaceable class="parameter">agg</replaceable>' for '<replaceable class="parameter">type</replaceable>' does not exist </computeroutput></term> <listitem> <para> diff --git a/doc/src/sgml/xaggr.sgml b/doc/src/sgml/xaggr.sgml index 8d5cb93a2d9..c1e32f9b015 100644 --- a/doc/src/sgml/xaggr.sgml +++ b/doc/src/sgml/xaggr.sgml @@ -1,5 +1,5 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/xaggr.sgml,v 1.7 2000/03/31 03:27:41 thomas Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/xaggr.sgml,v 1.8 2000/07/17 03:04:40 tgl Exp $ --> <chapter id="xaggr"> @@ -16,39 +16,20 @@ $Header: /cvsroot/pgsql/doc/src/sgml/xaggr.sgml,v 1.7 2000/03/31 03:27:41 thomas an initial value for the state, and a state transition function. The state transition function is just an ordinary function that could also be used outside the - context of the aggregate. + context of the aggregate. A <firstterm>final function</firstterm> + can also be specified, in case the desired output of the aggregate + is different from the data that needs to be kept in the running + state value. </para> <para> - Actually, in order to make it easier to construct useful - aggregates from existing functions, an aggregate can have - one or two separate state values, one or two transition - functions to update those state values, and a - <firstterm>final function</firstterm> that computes the - actual aggregate result from the ending state values. + Thus, in addition to the input and result datatypes seen by a user + of the aggregate, there is an internal state-value datatype that + may be different from both the input and result types. </para> <para> - Thus there can be as many as four datatypes involved: - the type of the input data items, the type of the aggregate's - result, and the types of the two state values. Only the - input and result datatypes are seen by a user of the aggregate. - </para> - - <para> - Some state transition functions need to look at each successive - input to compute the next state value, while others ignore the - specific input value and simply update their internal state. - (The most useful example of the second kind is a running count - of the number of input items.) The <productname>Postgres</productname> - aggregate machinery defines <acronym>sfunc1</acronym> for - an aggregate as a function that is passed both the old state - value and the current input value, while <acronym>sfunc2</acronym> - is a function that is passed only the old state value. - </para> - - <para> - If we define an aggregate that uses only <acronym>sfunc1</acronym>, + If we define an aggregate that does not use a final function, we have an aggregate that computes a running function of the attribute values from each instance. "Sum" is an example of this kind of aggregate. "Sum" starts at @@ -60,10 +41,10 @@ $Header: /cvsroot/pgsql/doc/src/sgml/xaggr.sgml,v 1.7 2000/03/31 03:27:41 thomas <programlisting> CREATE AGGREGATE complex_sum ( - sfunc1 = complex_add, + sfunc = complex_add, basetype = complex, - stype1 = complex, - initcond1 = '(0,0)' + stype = complex, + initcond = '(0,0)' ); SELECT complex_sum(a) FROM test_complex; @@ -81,67 +62,48 @@ SELECT complex_sum(a) FROM test_complex; </para> <para> - If we define only <acronym>sfunc2</acronym>, we are - specifying an aggregate - that computes a running function that is independent of - the attribute values from each instance. - "Count" is the most common example of this kind of - aggregate. "Count" starts at zero and adds one to its - running total for each instance, ignoring the instance - value. Here, we use the built-in - <acronym>int4inc</acronym> routine to do - the work for us. This routine increments (adds one to) - its argument. - - <programlisting> -CREATE AGGREGATE my_count ( - sfunc2 = int4inc, -- add one - basetype = int4, - stype2 = int4, - initcond2 = '0' -); - -SELECT my_count(*) as emp_count from EMP; - - +----------+ - |emp_count | - +----------+ - |5 | - +----------+ - </programlisting> + The above definition of "Sum" will return zero (the initial + state condition) if there are no non-null input values. + Perhaps we want to return NULL in that case instead --- SQL92 + expects "Sum" to behave that way. We can do this simply by + omitting the "initcond" phrase, so that the initial state + condition is NULL. Ordinarily this would mean that the sfunc + would need to check for a NULL state-condition input, but for + "Sum" and some other simple aggregates like "Max" and "Min", + it's sufficient to insert the first non-null input value into + the state variable and then start applying the transition function + at the second non-null input value. <productname>Postgres</productname> + will do that automatically if the initial condition is NULL and + the transition function is marked "strict" (ie, not to be called + for NULL inputs). </para> <para> - "Average" is an example of an aggregate that requires - both a function to compute the running sum and a function - to compute the running count. When all of the - instances have been processed, the final answer for the - aggregate is the running sum divided by the running - count. We use the <acronym>int4pl</acronym> and - <acronym>int4inc</acronym> routines we used - before as well as the <productname>Postgres</productname> integer division - routine, <acronym>int4div</acronym>, to compute the division of the sum by - the count. - + Another bit of default behavior for a "strict" transition function + is that the previous state value is retained unchanged whenever a + NULL input value is encountered. Thus, NULLs are ignored. If you + need some other behavior for NULL inputs, just define your transition + function as non-strict, and code it to test for NULL inputs and do + whatever is needed. + </para> + + <para> + "Average" is a more complex example of an aggregate. It requires + two pieces of running state: the sum of the inputs and the count + of the number of inputs. The final result is obtained by dividing + these quantities. Average is typically implemented by using a + two-element array as the transition state value. For example, + the built-in implementation of <function>avg(float8)</function> + looks like: + <programlisting> -CREATE AGGREGATE my_average ( - sfunc1 = int4pl, -- sum - basetype = int4, - stype1 = int4, - sfunc2 = int4inc, -- count - stype2 = int4, - finalfunc = int4div, -- division - initcond1 = '0', - initcond2 = '0' +CREATE AGGREGATE avg ( + sfunc = float8_accum, + basetype = float8, + stype = _float8, + finalfunc = float8_avg, + initcond = '{0,0}' ); - -SELECT my_average(salary) as emp_average FROM EMP; - - +------------+ - |emp_average | - +------------+ - |1640 | - +------------+ </programlisting> </para> |