6 files changed, 162 insertions, 73 deletions
diff --git a/contrib/intagg/int_aggregate.c b/contrib/intagg/int_aggregate.c
index cb0f1f3a452..35c883642b4 100644
--- a/contrib/intagg/int_aggregate.c
+++ b/contrib/intagg/int_aggregate.c
@@ -34,8 +34,12 @@
 #include "utils/lsyscache.h"
 
 
-/* This is actually a postgres version of a one dimensional array */
-
+/*
+ * This is actually a postgres version of a one dimensional array.
+ * We cheat a little by using the lower-bound field as an indicator
+ * of the physically allocated size, while the dimensionality is the
+ * count of items accumulated so far.
+ */
 typedef struct
 {
 	ArrayType	a;
@@ -56,7 +60,7 @@ typedef struct callContext
 #define START_NUM	8			/* initial size of arrays */
 #define PGARRAY_SIZE(n) (sizeof(PGARRAY) + (((n)-1)*sizeof(int4)))
 
-static PGARRAY *GetPGArray(PGARRAY *p, int fAdd);
+static PGARRAY *GetPGArray(PGARRAY *p, AggState *aggstate, bool fAdd);
 static PGARRAY *ShrinkPGArray(PGARRAY *p);
 
 Datum		int_agg_state(PG_FUNCTION_ARGS);
@@ -68,72 +72,68 @@ PG_FUNCTION_INFO_V1(int_agg_final_array);
 PG_FUNCTION_INFO_V1(int_enum);
 
 /*
- * Manage the aggregation state of the array
+ * Manage the allocation state of the array
  *
- * Need to specify a suitably long-lived memory context, or it will vanish!
- * PortalContext isn't really right, but it's close enough.
+ * Note that the array needs to be in a reasonably long-lived context,
+ * ie the Agg node's aggcontext.
  */
 static PGARRAY *
-GetPGArray(PGARRAY *p, int fAdd)
+GetPGArray(PGARRAY *p, AggState *aggstate, bool fAdd)
 {
 	if (!p)
 	{
 		/* New array */
 		int			cb = PGARRAY_SIZE(START_NUM);
 
-		p = (PGARRAY *) MemoryContextAlloc(PortalContext, cb);
+		p = (PGARRAY *) MemoryContextAlloc(aggstate->aggcontext, cb);
 		p->a.size = cb;
-		p->a.ndim = 0;
+		p->a.ndim = 1;
 		p->a.flags = 0;
 		p->a.elemtype = INT4OID;
 		p->items = 0;
 		p->lower = START_NUM;
 	}
 	else if (fAdd)
-	{							/* Ensure array has space */
+	{
+		/* Ensure array has space for another item */
 		if (p->items >= p->lower)
 		{
-			PGARRAY    *pn;
-			int			n = p->lower + p->lower;
+			PGARRAY	   *pn;
+			int			n = p->lower * 2;
 			int			cbNew = PGARRAY_SIZE(n);
 
-			pn = (PGARRAY *) repalloc(p, cbNew);
+			pn = (PGARRAY *) MemoryContextAlloc(aggstate->aggcontext, cbNew);
+			memcpy(pn, p, p->a.size);
 			pn->a.size = cbNew;
 			pn->lower = n;
-			return pn;
+			/* do not pfree(p), because nodeAgg.c will */
+			p = pn;
 		}
 	}
 	return p;
 }
 
-/* Shrinks the array to its actual size and moves it into the standard
- * memory allocation context, frees working memory
+/*
+ * Shrinks the array to its actual size and moves it into the standard
+ * memory allocation context
  */
 static PGARRAY *
-ShrinkPGArray(PGARRAY * p)
+ShrinkPGArray(PGARRAY *p)
 {
-	PGARRAY    *pnew = NULL;
+	PGARRAY    *pnew;
+	/* get target size */
+	int			cb = PGARRAY_SIZE(p->items);
+
+	/* use current transaction context */
+	pnew = palloc(cb);
+	memcpy(pnew, p, cb);
+
+	/* fix up the fields in the new array to match normal conventions */
+	pnew->a.size = cb;
+	pnew->lower = 1;
+
+	/* do not pfree(p), because nodeAgg.c will */
 
-	if (p)
-	{
-		/* get target size */
-		int			cb = PGARRAY_SIZE(p->items);
-
-		/* use current transaction context */
-		pnew = palloc(cb);
-
-		/*
-		 * Fix up the fields in the new structure, so Postgres understands
-		 */
-		memcpy(pnew, p, cb);
-		pnew->a.size = cb;
-		pnew->a.ndim = 1;
-		pnew->a.flags = 0;
-		pnew->a.elemtype = INT4OID;
-		pnew->lower = 1;
-
-		pfree(p);
-	}
 	return pnew;
 }
 
@@ -144,11 +144,18 @@ int_agg_state(PG_FUNCTION_ARGS)
 	PGARRAY    *state;
 	PGARRAY    *p;
 
+	/*
+	 * As of PG 8.1 we can actually verify that we are being used as an
+	 * aggregate function, and so it is safe to scribble on our left input.
+	 */
+	if (!(fcinfo->context && IsA(fcinfo->context, AggState)))
+		elog(ERROR, "int_agg_state may only be used as an aggregate");
+
 	if (PG_ARGISNULL(0))
-		state = NULL;
+		state = NULL;			/* first time through */
 	else
 		state = (PGARRAY *) PG_GETARG_POINTER(0);
-	p = GetPGArray(state, 1);
+	p = GetPGArray(state, (AggState *) fcinfo->context, true);
 
 	if (!PG_ARGISNULL(1))
 	{
@@ -164,22 +171,38 @@ int_agg_state(PG_FUNCTION_ARGS)
 	PG_RETURN_POINTER(p);
 }
 
-/* This is the final function used for the integer aggregator. It returns all
+/*
+ * This is the final function used for the integer aggregator. It returns all
  * the integers collected as a one dimensional integer array
  */
 Datum
 int_agg_final_array(PG_FUNCTION_ARGS)
 {
-	PGARRAY    *state = (PGARRAY *) PG_GETARG_POINTER(0);
-	PGARRAY    *pnew = ShrinkPGArray(GetPGArray(state, 0));
+	PGARRAY    *state;
+	PGARRAY    *p;
+	PGARRAY    *pnew;
 
-	if (pnew)
-		PG_RETURN_POINTER(pnew);
+	/*
+	 * As of PG 8.1 we can actually verify that we are being used as an
+	 * aggregate function, and so it is safe to scribble on our left input.
+	 */
+	if (!(fcinfo->context && IsA(fcinfo->context, AggState)))
+		elog(ERROR, "int_agg_final_array may only be used as an aggregate");
+
+	if (PG_ARGISNULL(0))
+		state = NULL;			/* zero items in aggregation */
 	else
-		PG_RETURN_NULL();
+		state = (PGARRAY *) PG_GETARG_POINTER(0);
+	p = GetPGArray(state, (AggState *) fcinfo->context, false);
+
+	pnew = ShrinkPGArray(p);
+	PG_RETURN_POINTER(pnew);
 }
 
-/* This function accepts an array, and returns one item for each entry in the array */
+/*
+ * This function accepts an array, and returns one item for each entry in the
+ * array
+ */
 Datum
 int_enum(PG_FUNCTION_ARGS)
 {
diff --git a/contrib/intagg/int_aggregate.sql.in b/contrib/intagg/int_aggregate.sql.in
index caaf01afdb9..cc1cd927271 100644
--- a/contrib/intagg/int_aggregate.sql.in
+++ b/contrib/intagg/int_aggregate.sql.in
@@ -6,14 +6,14 @@ SET search_path = public;
 CREATE OR REPLACE FUNCTION int_agg_state (int4[], int4)
 RETURNS int4[]
 AS 'MODULE_PATHNAME','int_agg_state'
-LANGUAGE 'C';
+LANGUAGE C;
 
 -- Internal function for the aggregate
 -- Is called at the end of the aggregation, and returns an array.
 CREATE OR REPLACE FUNCTION int_agg_final_array (int4[])
 RETURNS int4[]
 AS 'MODULE_PATHNAME','int_agg_final_array'
-LANGUAGE 'C' STRICT;
+LANGUAGE C;
 
 -- The aggregate function itself
 -- uses the above functions to create an array of integers from an aggregation.
@@ -24,15 +24,10 @@ CREATE AGGREGATE int_array_aggregate (
 	FINALFUNC = int_agg_final_array
 );
 
--- The aggregate component functions are not designed to be called
--- independently, so disable public access to them
-REVOKE ALL ON FUNCTION int_agg_state (int4[], int4) FROM PUBLIC;
-REVOKE ALL ON FUNCTION int_agg_final_array (int4[]) FROM PUBLIC;
-
 -- The enumeration function
 -- returns each element in a one dimensional integer array
 -- as a row.
 CREATE OR REPLACE FUNCTION int_array_enum(int4[])
 RETURNS setof integer
 AS 'MODULE_PATHNAME','int_enum'
-LANGUAGE 'C' IMMUTABLE STRICT;
+LANGUAGE C IMMUTABLE STRICT;
diff --git a/doc/src/sgml/xaggr.sgml b/doc/src/sgml/xaggr.sgml
index d9c41714dff..6a928f7b4f5 100644
--- a/doc/src/sgml/xaggr.sgml
+++ b/doc/src/sgml/xaggr.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/xaggr.sgml,v 1.26 2005/01/22 22:56:36 momjian Exp $
+$PostgreSQL: pgsql/doc/src/sgml/xaggr.sgml,v 1.27 2005/03/12 20:25:06 tgl Exp $
 -->
 
  <sect1 id="xaggr">
@@ -162,6 +162,21 @@ SELECT attrelid::regclass, array_accum(atttypid)
   </para>
 
   <para>
+   A function written in C can detect that it is being called as an
+   aggregate transition or final function by seeing if it was passed
+   an <structname>AggState</> node as the function call <quote>context</>,
+   for example by
+<programlisting>
+        if (fcinfo->context &amp;&amp; IsA(fcinfo->context, AggState))
+</programlisting>
+   One reason for checking this is that when it is true, the left input
+   must be a temporary transition value and can therefore safely be modified
+   in-place rather than allocating a new copy.  (This is the <emphasis>only</>
+   case where it is safe for a function to modify a pass-by-reference input.)
+   See <literal>int8inc()></> for an example.
+  </para>
+
+  <para>
    For further details see the
    <xref linkend="sql-createaggregate" endterm="sql-createaggregate-title">
    command.
diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml
index 72772140b36..0f051a36866 100644
--- a/doc/src/sgml/xfunc.sgml
+++ b/doc/src/sgml/xfunc.sgml
@@ -1,5 +1,5 @@
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.99 2005/02/21 06:12:14 neilc Exp $
+$PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.100 2005/03/12 20:25:06 tgl Exp $
 -->
 
  <sect1 id="xfunc">
@@ -1188,10 +1188,10 @@ typedef struct
      them in and out of <productname>PostgreSQL</productname> functions.
      To return a value of such a type, allocate the right amount of
      memory with <literal>palloc</literal>, fill in the allocated memory,
-     and return a pointer to it.  (You can also return an input value
-     that has the same type as the return value directly by returning
-     the pointer to the input value.  <emphasis>Never</> modify the
-     contents of a pass-by-reference input value, however.)
+     and return a pointer to it.  (Also, if you just want to return the
+     same value as one of your input arguments that's of the same data type,
+     you can skip the extra <literal>palloc</literal> and just return the
+     pointer to the input value.)
     </para>
 
     <para>
@@ -1205,6 +1205,16 @@ typedef struct
      itself.
     </para>
 
+    <warning>
+     <para>
+      <emphasis>Never</> modify the contents of a pass-by-reference input
+      value.  If you do so you are likely to corrupt on-disk data, since
+      the pointer you are given may well point directly into a disk buffer.
+      The sole exception to this rule is explained in
+      <xref linkend="xaggr">.
+     </para>
+    </warning>
+
     <para>
      As an example, we can define the type <type>text</type> as
      follows:
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 63a687e9869..8f641cc843b 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -40,12 +40,28 @@
  *	  is used to run finalize functions and compute the output tuple;
  *	  this context can be reset once per output tuple.
  *
+ *	  Beginning in PostgreSQL 8.1, the executor's AggState node is passed as
+ *	  the fmgr "context" value in all transfunc and finalfunc calls.  It is
+ *	  not really intended that the transition functions will look into the
+ *	  AggState node, but they can use code like
+ *			if (fcinfo->context && IsA(fcinfo->context, AggState))
+ *	  to verify that they are being called by nodeAgg.c and not as ordinary
+ *	  SQL functions.  The main reason a transition function might want to know
+ *	  that is that it can avoid palloc'ing a fixed-size pass-by-ref transition
+ *	  value on every call: it can instead just scribble on and return its left
+ *	  input.  Ordinarily it is completely forbidden for functions to modify
+ *	  pass-by-ref inputs, but in the aggregate case we know the left input is
+ *	  either the initial transition value or a previous function result, and
+ *	  in either case its value need not be preserved.  See int8inc() for an
+ *	  example.  Notice that advance_transition_function() is coded to avoid a
+ *	  data copy step when the previous transition value pointer is returned.
+ *
  *
  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.128 2005/01/28 19:33:56 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.129 2005/03/12 20:25:06 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -363,7 +379,7 @@ advance_transition_function(AggState *aggstate,
 	 */
 
 	/* MemSet(&fcinfo, 0, sizeof(fcinfo)); */
-	fcinfo.context = NULL;
+	fcinfo.context = (void *) aggstate;
 	fcinfo.resultinfo = NULL;
 	fcinfo.isnull = false;
 
@@ -541,6 +557,7 @@ finalize_aggregate(AggState *aggstate,
 		FunctionCallInfoData fcinfo;
 
 		MemSet(&fcinfo, 0, sizeof(fcinfo));
+		fcinfo.context = (void *) aggstate;
 		fcinfo.flinfo = &peraggstate->finalfn;
 		fcinfo.nargs = 1;
 		fcinfo.arg[0] = pergroupstate->transValue;
diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c
index a7f76e31d96..c5c3d30d03d 100644
--- a/src/backend/utils/adt/int8.c
+++ b/src/backend/utils/adt/int8.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/int8.c,v 1.57 2004/12/31 22:01:22 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/int8.c,v 1.58 2005/03/12 20:25:06 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -19,6 +19,7 @@
 
 #include "funcapi.h"
 #include "libpq/pqformat.h"
+#include "nodes/nodes.h"
 #include "utils/int8.h"
 
 
@@ -649,17 +650,45 @@ int8mod(PG_FUNCTION_ARGS)
 Datum
 int8inc(PG_FUNCTION_ARGS)
 {
-	int64		arg = PG_GETARG_INT64(0);
-	int64		result;
+	if (fcinfo->context && IsA(fcinfo->context, AggState))
+	{
+		/*
+		 * Special case to avoid palloc overhead for COUNT(): when called
+		 * from nodeAgg, we know that the argument is modifiable local
+		 * storage, so just update it in-place.
+		 *
+		 * Note: this assumes int8 is a pass-by-ref type; if we ever support
+		 * pass-by-val int8, this should be ifdef'd out when int8 is
+		 * pass-by-val.
+		 */
+		int64	   *arg = (int64 *) PG_GETARG_POINTER(0);
+		int64		result;
 
-	result = arg + 1;
-	/* Overflow check */
-	if (arg > 0 && result < 0)
-		ereport(ERROR,
-				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-				 errmsg("bigint out of range")));
+		result = *arg + 1;
+		/* Overflow check */
+		if (result < 0 && *arg > 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+					 errmsg("bigint out of range")));
 
-	PG_RETURN_INT64(result);
+		*arg = result;
+		PG_RETURN_POINTER(arg);
+	}
+	else
+	{
+		/* Not called by nodeAgg, so just do it the dumb way */
+		int64		arg = PG_GETARG_INT64(0);
+		int64		result;
+
+		result = arg + 1;
+		/* Overflow check */
+		if (result < 0 && arg > 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+					 errmsg("bigint out of range")));
+
+		PG_RETURN_INT64(result);
+	}
 }
 
 Datum