aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/intagg/int_aggregate.c117
-rw-r--r--contrib/intagg/int_aggregate.sql.in11
-rw-r--r--doc/src/sgml/xaggr.sgml17
-rw-r--r--doc/src/sgml/xfunc.sgml20
-rw-r--r--src/backend/executor/nodeAgg.c21
-rw-r--r--src/backend/utils/adt/int8.c49
6 files changed, 162 insertions, 73 deletions
diff --git a/contrib/intagg/int_aggregate.c b/contrib/intagg/int_aggregate.c
index cb0f1f3a452..35c883642b4 100644
--- a/contrib/intagg/int_aggregate.c
+++ b/contrib/intagg/int_aggregate.c
@@ -34,8 +34,12 @@
#include "utils/lsyscache.h"
-/* This is actually a postgres version of a one dimensional array */
-
+/*
+ * This is actually a postgres version of a one dimensional array.
+ * We cheat a little by using the lower-bound field as an indicator
+ * of the physically allocated size, while the dimensionality is the
+ * count of items accumulated so far.
+ */
typedef struct
{
ArrayType a;
@@ -56,7 +60,7 @@ typedef struct callContext
#define START_NUM 8 /* initial size of arrays */
#define PGARRAY_SIZE(n) (sizeof(PGARRAY) + (((n)-1)*sizeof(int4)))
-static PGARRAY *GetPGArray(PGARRAY *p, int fAdd);
+static PGARRAY *GetPGArray(PGARRAY *p, AggState *aggstate, bool fAdd);
static PGARRAY *ShrinkPGArray(PGARRAY *p);
Datum int_agg_state(PG_FUNCTION_ARGS);
@@ -68,72 +72,68 @@ PG_FUNCTION_INFO_V1(int_agg_final_array);
PG_FUNCTION_INFO_V1(int_enum);
/*
- * Manage the aggregation state of the array
+ * Manage the allocation state of the array
*
- * Need to specify a suitably long-lived memory context, or it will vanish!
- * PortalContext isn't really right, but it's close enough.
+ * Note that the array needs to be in a reasonably long-lived context,
+ * ie the Agg node's aggcontext.
*/
static PGARRAY *
-GetPGArray(PGARRAY *p, int fAdd)
+GetPGArray(PGARRAY *p, AggState *aggstate, bool fAdd)
{
if (!p)
{
/* New array */
int cb = PGARRAY_SIZE(START_NUM);
- p = (PGARRAY *) MemoryContextAlloc(PortalContext, cb);
+ p = (PGARRAY *) MemoryContextAlloc(aggstate->aggcontext, cb);
p->a.size = cb;
- p->a.ndim = 0;
+ p->a.ndim = 1;
p->a.flags = 0;
p->a.elemtype = INT4OID;
p->items = 0;
p->lower = START_NUM;
}
else if (fAdd)
- { /* Ensure array has space */
+ {
+ /* Ensure array has space for another item */
if (p->items >= p->lower)
{
- PGARRAY *pn;
- int n = p->lower + p->lower;
+ PGARRAY *pn;
+ int n = p->lower * 2;
int cbNew = PGARRAY_SIZE(n);
- pn = (PGARRAY *) repalloc(p, cbNew);
+ pn = (PGARRAY *) MemoryContextAlloc(aggstate->aggcontext, cbNew);
+ memcpy(pn, p, p->a.size);
pn->a.size = cbNew;
pn->lower = n;
- return pn;
+ /* do not pfree(p), because nodeAgg.c will */
+ p = pn;
}
}
return p;
}
-/* Shrinks the array to its actual size and moves it into the standard
- * memory allocation context, frees working memory
+/*
+ * Shrinks the array to its actual size and moves it into the standard
+ * memory allocation context
*/
static PGARRAY *
-ShrinkPGArray(PGARRAY * p)
+ShrinkPGArray(PGARRAY *p)
{
- PGARRAY *pnew = NULL;
+ PGARRAY *pnew;
+ /* get target size */
+ int cb = PGARRAY_SIZE(p->items);
+
+ /* use current transaction context */
+ pnew = palloc(cb);
+ memcpy(pnew, p, cb);
+
+ /* fix up the fields in the new array to match normal conventions */
+ pnew->a.size = cb;
+ pnew->lower = 1;
+
+ /* do not pfree(p), because nodeAgg.c will */
- if (p)
- {
- /* get target size */
- int cb = PGARRAY_SIZE(p->items);
-
- /* use current transaction context */
- pnew = palloc(cb);
-
- /*
- * Fix up the fields in the new structure, so Postgres understands
- */
- memcpy(pnew, p, cb);
- pnew->a.size = cb;
- pnew->a.ndim = 1;
- pnew->a.flags = 0;
- pnew->a.elemtype = INT4OID;
- pnew->lower = 1;
-
- pfree(p);
- }
return pnew;
}
@@ -144,11 +144,18 @@ int_agg_state(PG_FUNCTION_ARGS)
PGARRAY *state;
PGARRAY *p;
+ /*
+ * As of PG 8.1 we can actually verify that we are being used as an
+ * aggregate function, and so it is safe to scribble on our left input.
+ */
+ if (!(fcinfo->context && IsA(fcinfo->context, AggState)))
+ elog(ERROR, "int_agg_state may only be used as an aggregate");
+
if (PG_ARGISNULL(0))
- state = NULL;
+ state = NULL; /* first time through */
else
state = (PGARRAY *) PG_GETARG_POINTER(0);
- p = GetPGArray(state, 1);
+ p = GetPGArray(state, (AggState *) fcinfo->context, true);
if (!PG_ARGISNULL(1))
{
@@ -164,22 +171,38 @@ int_agg_state(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(p);
}
-/* This is the final function used for the integer aggregator. It returns all
+/*
+ * This is the final function used for the integer aggregator. It returns all
* the integers collected as a one dimensional integer array
*/
Datum
int_agg_final_array(PG_FUNCTION_ARGS)
{
- PGARRAY *state = (PGARRAY *) PG_GETARG_POINTER(0);
- PGARRAY *pnew = ShrinkPGArray(GetPGArray(state, 0));
+ PGARRAY *state;
+ PGARRAY *p;
+ PGARRAY *pnew;
- if (pnew)
- PG_RETURN_POINTER(pnew);
+ /*
+ * As of PG 8.1 we can actually verify that we are being used as an
+ * aggregate function, and so it is safe to scribble on our left input.
+ */
+ if (!(fcinfo->context && IsA(fcinfo->context, AggState)))
+ elog(ERROR, "int_agg_final_array may only be used as an aggregate");
+
+ if (PG_ARGISNULL(0))
+ state = NULL; /* zero items in aggregation */
else
- PG_RETURN_NULL();
+ state = (PGARRAY *) PG_GETARG_POINTER(0);
+ p = GetPGArray(state, (AggState *) fcinfo->context, false);
+
+ pnew = ShrinkPGArray(p);
+ PG_RETURN_POINTER(pnew);
}
-/* This function accepts an array, and returns one item for each entry in the array */
+/*
+ * This function accepts an array, and returns one item for each entry in the
+ * array
+ */
Datum
int_enum(PG_FUNCTION_ARGS)
{
diff --git a/contrib/intagg/int_aggregate.sql.in b/contrib/intagg/int_aggregate.sql.in
index caaf01afdb9..cc1cd927271 100644
--- a/contrib/intagg/int_aggregate.sql.in
+++ b/contrib/intagg/int_aggregate.sql.in
@@ -6,14 +6,14 @@ SET search_path = public;
CREATE OR REPLACE FUNCTION int_agg_state (int4[], int4)
RETURNS int4[]
AS 'MODULE_PATHNAME','int_agg_state'
-LANGUAGE 'C';
+LANGUAGE C;
-- Internal function for the aggregate
-- Is called at the end of the aggregation, and returns an array.
CREATE OR REPLACE FUNCTION int_agg_final_array (int4[])
RETURNS int4[]
AS 'MODULE_PATHNAME','int_agg_final_array'
-LANGUAGE 'C' STRICT;
+LANGUAGE C;
-- The aggregate function itself
-- uses the above functions to create an array of integers from an aggregation.
@@ -24,15 +24,10 @@ CREATE AGGREGATE int_array_aggregate (
FINALFUNC = int_agg_final_array
);
--- The aggregate component functions are not designed to be called
--- independently, so disable public access to them
-REVOKE ALL ON FUNCTION int_agg_state (int4[], int4) FROM PUBLIC;
-REVOKE ALL ON FUNCTION int_agg_final_array (int4[]) FROM PUBLIC;
-
-- The enumeration function
-- returns each element in a one dimensional integer array
-- as a row.
CREATE OR REPLACE FUNCTION int_array_enum(int4[])
RETURNS setof integer
AS 'MODULE_PATHNAME','int_enum'
-LANGUAGE 'C' IMMUTABLE STRICT;
+LANGUAGE C IMMUTABLE STRICT;
diff --git a/doc/src/sgml/xaggr.sgml b/doc/src/sgml/xaggr.sgml
index d9c41714dff..6a928f7b4f5 100644
--- a/doc/src/sgml/xaggr.sgml
+++ b/doc/src/sgml/xaggr.sgml
@@ -1,5 +1,5 @@
<!--
-$PostgreSQL: pgsql/doc/src/sgml/xaggr.sgml,v 1.26 2005/01/22 22:56:36 momjian Exp $
+$PostgreSQL: pgsql/doc/src/sgml/xaggr.sgml,v 1.27 2005/03/12 20:25:06 tgl Exp $
-->
<sect1 id="xaggr">
@@ -162,6 +162,21 @@ SELECT attrelid::regclass, array_accum(atttypid)
</para>
<para>
+ A function written in C can detect that it is being called as an
+ aggregate transition or final function by seeing if it was passed
+ an <structname>AggState</> node as the function call <quote>context</>,
+ for example by
+<programlisting>
+ if (fcinfo->context &amp;&amp; IsA(fcinfo->context, AggState))
+</programlisting>
+ One reason for checking this is that when it is true, the left input
+ must be a temporary transition value and can therefore safely be modified
+ in-place rather than allocating a new copy. (This is the <emphasis>only</>
+ case where it is safe for a function to modify a pass-by-reference input.)
+ See <literal>int8inc()></> for an example.
+ </para>
+
+ <para>
For further details see the
<xref linkend="sql-createaggregate" endterm="sql-createaggregate-title">
command.
diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml
index 72772140b36..0f051a36866 100644
--- a/doc/src/sgml/xfunc.sgml
+++ b/doc/src/sgml/xfunc.sgml
@@ -1,5 +1,5 @@
<!--
-$PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.99 2005/02/21 06:12:14 neilc Exp $
+$PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.100 2005/03/12 20:25:06 tgl Exp $
-->
<sect1 id="xfunc">
@@ -1188,10 +1188,10 @@ typedef struct
them in and out of <productname>PostgreSQL</productname> functions.
To return a value of such a type, allocate the right amount of
memory with <literal>palloc</literal>, fill in the allocated memory,
- and return a pointer to it. (You can also return an input value
- that has the same type as the return value directly by returning
- the pointer to the input value. <emphasis>Never</> modify the
- contents of a pass-by-reference input value, however.)
+ and return a pointer to it. (Also, if you just want to return the
+ same value as one of your input arguments that's of the same data type,
+ you can skip the extra <literal>palloc</literal> and just return the
+ pointer to the input value.)
</para>
<para>
@@ -1205,6 +1205,16 @@ typedef struct
itself.
</para>
+ <warning>
+ <para>
+ <emphasis>Never</> modify the contents of a pass-by-reference input
+ value. If you do so you are likely to corrupt on-disk data, since
+ the pointer you are given may well point directly into a disk buffer.
+ The sole exception to this rule is explained in
+ <xref linkend="xaggr">.
+ </para>
+ </warning>
+
<para>
As an example, we can define the type <type>text</type> as
follows:
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 63a687e9869..8f641cc843b 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -40,12 +40,28 @@
* is used to run finalize functions and compute the output tuple;
* this context can be reset once per output tuple.
*
+ * Beginning in PostgreSQL 8.1, the executor's AggState node is passed as
+ * the fmgr "context" value in all transfunc and finalfunc calls. It is
+ * not really intended that the transition functions will look into the
+ * AggState node, but they can use code like
+ * if (fcinfo->context && IsA(fcinfo->context, AggState))
+ * to verify that they are being called by nodeAgg.c and not as ordinary
+ * SQL functions. The main reason a transition function might want to know
+ * that is that it can avoid palloc'ing a fixed-size pass-by-ref transition
+ * value on every call: it can instead just scribble on and return its left
+ * input. Ordinarily it is completely forbidden for functions to modify
+ * pass-by-ref inputs, but in the aggregate case we know the left input is
+ * either the initial transition value or a previous function result, and
+ * in either case its value need not be preserved. See int8inc() for an
+ * example. Notice that advance_transition_function() is coded to avoid a
+ * data copy step when the previous transition value pointer is returned.
+ *
*
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.128 2005/01/28 19:33:56 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.129 2005/03/12 20:25:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -363,7 +379,7 @@ advance_transition_function(AggState *aggstate,
*/
/* MemSet(&fcinfo, 0, sizeof(fcinfo)); */
- fcinfo.context = NULL;
+ fcinfo.context = (void *) aggstate;
fcinfo.resultinfo = NULL;
fcinfo.isnull = false;
@@ -541,6 +557,7 @@ finalize_aggregate(AggState *aggstate,
FunctionCallInfoData fcinfo;
MemSet(&fcinfo, 0, sizeof(fcinfo));
+ fcinfo.context = (void *) aggstate;
fcinfo.flinfo = &peraggstate->finalfn;
fcinfo.nargs = 1;
fcinfo.arg[0] = pergroupstate->transValue;
diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c
index a7f76e31d96..c5c3d30d03d 100644
--- a/src/backend/utils/adt/int8.c
+++ b/src/backend/utils/adt/int8.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/int8.c,v 1.57 2004/12/31 22:01:22 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/int8.c,v 1.58 2005/03/12 20:25:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -19,6 +19,7 @@
#include "funcapi.h"
#include "libpq/pqformat.h"
+#include "nodes/nodes.h"
#include "utils/int8.h"
@@ -649,17 +650,45 @@ int8mod(PG_FUNCTION_ARGS)
Datum
int8inc(PG_FUNCTION_ARGS)
{
- int64 arg = PG_GETARG_INT64(0);
- int64 result;
+ if (fcinfo->context && IsA(fcinfo->context, AggState))
+ {
+ /*
+ * Special case to avoid palloc overhead for COUNT(): when called
+ * from nodeAgg, we know that the argument is modifiable local
+ * storage, so just update it in-place.
+ *
+ * Note: this assumes int8 is a pass-by-ref type; if we ever support
+ * pass-by-val int8, this should be ifdef'd out when int8 is
+ * pass-by-val.
+ */
+ int64 *arg = (int64 *) PG_GETARG_POINTER(0);
+ int64 result;
- result = arg + 1;
- /* Overflow check */
- if (arg > 0 && result < 0)
- ereport(ERROR,
- (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("bigint out of range")));
+ result = *arg + 1;
+ /* Overflow check */
+ if (result < 0 && *arg > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
- PG_RETURN_INT64(result);
+ *arg = result;
+ PG_RETURN_POINTER(arg);
+ }
+ else
+ {
+ /* Not called by nodeAgg, so just do it the dumb way */
+ int64 arg = PG_GETARG_INT64(0);
+ int64 result;
+
+ result = arg + 1;
+ /* Overflow check */
+ if (result < 0 && arg > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+
+ PG_RETURN_INT64(result);
+ }
}
Datum