Adjust the API for aggregate function calls so that a C-coded function

can tell whether it is being used as an aggregate or not.  This allows
such a function to avoid re-pallocing a pass-by-reference transition
value; normally it would be unsafe for a function to scribble on an input,
but in the aggregate case it's safe to reuse the old transition value.
Make int8inc() do this.  This gets a useful improvement in the speed of
COUNT(*), at least on narrow tables (it seems to be swamped by I/O when
the table rows are wide).  Per a discussion in early December with
Neil Conway.  I also fixed int_aggregate.c to check this, thereby
turning it into something approaching a supportable technique instead
of being a crude hack.
This commit is contained in:
Tom Lane 2005-03-12 20:25:06 +00:00
parent de004e44e2
commit fa5e44017a
6 changed files with 159 additions and 70 deletions

View file

@ -34,8 +34,12 @@
#include "utils/lsyscache.h"
/* This is actually a postgres version of a one dimensional array */
/*
* This is actually a postgres version of a one dimensional array.
* We cheat a little by using the lower-bound field as an indicator
* of the physically allocated size, while the dimensionality is the
* count of items accumulated so far.
*/
typedef struct
{
ArrayType a;
@ -56,7 +60,7 @@ typedef struct callContext
#define START_NUM 8 /* initial size of arrays */
#define PGARRAY_SIZE(n) (sizeof(PGARRAY) + (((n)-1)*sizeof(int4)))
static PGARRAY *GetPGArray(PGARRAY *p, int fAdd);
static PGARRAY *GetPGArray(PGARRAY *p, AggState *aggstate, bool fAdd);
static PGARRAY *ShrinkPGArray(PGARRAY *p);
Datum int_agg_state(PG_FUNCTION_ARGS);
@ -68,72 +72,68 @@ PG_FUNCTION_INFO_V1(int_agg_final_array);
PG_FUNCTION_INFO_V1(int_enum);
/*
* Manage the aggregation state of the array
* Manage the allocation state of the array
*
* Need to specify a suitably long-lived memory context, or it will vanish!
* PortalContext isn't really right, but it's close enough.
* Note that the array needs to be in a reasonably long-lived context,
* ie the Agg node's aggcontext.
*/
static PGARRAY *
GetPGArray(PGARRAY *p, int fAdd)
GetPGArray(PGARRAY *p, AggState *aggstate, bool fAdd)
{
if (!p)
{
/* New array */
int cb = PGARRAY_SIZE(START_NUM);
p = (PGARRAY *) MemoryContextAlloc(PortalContext, cb);
p = (PGARRAY *) MemoryContextAlloc(aggstate->aggcontext, cb);
p->a.size = cb;
p->a.ndim = 0;
p->a.ndim = 1;
p->a.flags = 0;
p->a.elemtype = INT4OID;
p->items = 0;
p->lower = START_NUM;
}
else if (fAdd)
{ /* Ensure array has space */
{
/* Ensure array has space for another item */
if (p->items >= p->lower)
{
PGARRAY *pn;
int n = p->lower + p->lower;
PGARRAY *pn;
int n = p->lower * 2;
int cbNew = PGARRAY_SIZE(n);
pn = (PGARRAY *) repalloc(p, cbNew);
pn = (PGARRAY *) MemoryContextAlloc(aggstate->aggcontext, cbNew);
memcpy(pn, p, p->a.size);
pn->a.size = cbNew;
pn->lower = n;
return pn;
/* do not pfree(p), because nodeAgg.c will */
p = pn;
}
}
return p;
}
/* Shrinks the array to its actual size and moves it into the standard
* memory allocation context, frees working memory
/*
* Shrinks the array to its actual size and moves it into the standard
* memory allocation context
*/
static PGARRAY *
ShrinkPGArray(PGARRAY * p)
ShrinkPGArray(PGARRAY *p)
{
PGARRAY *pnew = NULL;
PGARRAY *pnew;
/* get target size */
int cb = PGARRAY_SIZE(p->items);
if (p)
{
/* get target size */
int cb = PGARRAY_SIZE(p->items);
/* use current transaction context */
pnew = palloc(cb);
memcpy(pnew, p, cb);
/* use current transaction context */
pnew = palloc(cb);
/* fix up the fields in the new array to match normal conventions */
pnew->a.size = cb;
pnew->lower = 1;
/*
* Fix up the fields in the new structure, so Postgres understands
*/
memcpy(pnew, p, cb);
pnew->a.size = cb;
pnew->a.ndim = 1;
pnew->a.flags = 0;
pnew->a.elemtype = INT4OID;
pnew->lower = 1;
/* do not pfree(p), because nodeAgg.c will */
pfree(p);
}
return pnew;
}
@ -144,11 +144,18 @@ int_agg_state(PG_FUNCTION_ARGS)
PGARRAY *state;
PGARRAY *p;
/*
* As of PG 8.1 we can actually verify that we are being used as an
* aggregate function, and so it is safe to scribble on our left input.
*/
if (!(fcinfo->context && IsA(fcinfo->context, AggState)))
elog(ERROR, "int_agg_state may only be used as an aggregate");
if (PG_ARGISNULL(0))
state = NULL;
state = NULL; /* first time through */
else
state = (PGARRAY *) PG_GETARG_POINTER(0);
p = GetPGArray(state, 1);
p = GetPGArray(state, (AggState *) fcinfo->context, true);
if (!PG_ARGISNULL(1))
{
@ -164,22 +171,38 @@ int_agg_state(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(p);
}
/* This is the final function used for the integer aggregator. It returns all
/*
* This is the final function used for the integer aggregator. It returns all
* the integers collected as a one dimensional integer array
*/
Datum
int_agg_final_array(PG_FUNCTION_ARGS)
{
PGARRAY *state = (PGARRAY *) PG_GETARG_POINTER(0);
PGARRAY *pnew = ShrinkPGArray(GetPGArray(state, 0));
PGARRAY *state;
PGARRAY *p;
PGARRAY *pnew;
if (pnew)
PG_RETURN_POINTER(pnew);
/*
* As of PG 8.1 we can actually verify that we are being used as an
* aggregate function, and so it is safe to scribble on our left input.
*/
if (!(fcinfo->context && IsA(fcinfo->context, AggState)))
elog(ERROR, "int_agg_final_array may only be used as an aggregate");
if (PG_ARGISNULL(0))
state = NULL; /* zero items in aggregation */
else
PG_RETURN_NULL();
state = (PGARRAY *) PG_GETARG_POINTER(0);
p = GetPGArray(state, (AggState *) fcinfo->context, false);
pnew = ShrinkPGArray(p);
PG_RETURN_POINTER(pnew);
}
/* This function accepts an array, and returns one item for each entry in the array */
/*
* This function accepts an array, and returns one item for each entry in the
* array
*/
Datum
int_enum(PG_FUNCTION_ARGS)
{

View file

@ -6,14 +6,14 @@ SET search_path = public;
CREATE OR REPLACE FUNCTION int_agg_state (int4[], int4)
RETURNS int4[]
AS 'MODULE_PATHNAME','int_agg_state'
LANGUAGE 'C';
LANGUAGE C;
-- Internal function for the aggregate
-- Is called at the end of the aggregation, and returns an array.
CREATE OR REPLACE FUNCTION int_agg_final_array (int4[])
RETURNS int4[]
AS 'MODULE_PATHNAME','int_agg_final_array'
LANGUAGE 'C' STRICT;
LANGUAGE C;
-- The aggregate function itself
-- uses the above functions to create an array of integers from an aggregation.
@ -24,15 +24,10 @@ CREATE AGGREGATE int_array_aggregate (
FINALFUNC = int_agg_final_array
);
-- The aggregate component functions are not designed to be called
-- independently, so disable public access to them
REVOKE ALL ON FUNCTION int_agg_state (int4[], int4) FROM PUBLIC;
REVOKE ALL ON FUNCTION int_agg_final_array (int4[]) FROM PUBLIC;
-- The enumeration function
-- returns each element in a one dimensional integer array
-- as a row.
CREATE OR REPLACE FUNCTION int_array_enum(int4[])
RETURNS setof integer
AS 'MODULE_PATHNAME','int_enum'
LANGUAGE 'C' IMMUTABLE STRICT;
LANGUAGE C IMMUTABLE STRICT;

View file

@ -1,5 +1,5 @@
<!--
$PostgreSQL: pgsql/doc/src/sgml/xaggr.sgml,v 1.26 2005/01/22 22:56:36 momjian Exp $
$PostgreSQL: pgsql/doc/src/sgml/xaggr.sgml,v 1.27 2005/03/12 20:25:06 tgl Exp $
-->
<sect1 id="xaggr">
@ -161,6 +161,21 @@ SELECT attrelid::regclass, array_accum(atttypid)
</programlisting>
</para>
<para>
A function written in C can detect that it is being called as an
aggregate transition or final function by seeing if it was passed
an <structname>AggState</> node as the function call <quote>context</>,
for example by
<programlisting>
if (fcinfo->context &amp;&amp; IsA(fcinfo->context, AggState))
</programlisting>
One reason for checking this is that when it is true, the left input
must be a temporary transition value and can therefore safely be modified
in-place rather than allocating a new copy. (This is the <emphasis>only</>
case where it is safe for a function to modify a pass-by-reference input.)
See <literal>int8inc()></> for an example.
</para>
<para>
For further details see the
<xref linkend="sql-createaggregate" endterm="sql-createaggregate-title">

View file

@ -1,5 +1,5 @@
<!--
$PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.99 2005/02/21 06:12:14 neilc Exp $
$PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.100 2005/03/12 20:25:06 tgl Exp $
-->
<sect1 id="xfunc">
@ -1188,10 +1188,10 @@ typedef struct
them in and out of <productname>PostgreSQL</productname> functions.
To return a value of such a type, allocate the right amount of
memory with <literal>palloc</literal>, fill in the allocated memory,
and return a pointer to it. (You can also return an input value
that has the same type as the return value directly by returning
the pointer to the input value. <emphasis>Never</> modify the
contents of a pass-by-reference input value, however.)
and return a pointer to it. (Also, if you just want to return the
same value as one of your input arguments that's of the same data type,
you can skip the extra <literal>palloc</literal> and just return the
pointer to the input value.)
</para>
<para>
@ -1205,6 +1205,16 @@ typedef struct
itself.
</para>
<warning>
<para>
<emphasis>Never</> modify the contents of a pass-by-reference input
value. If you do so you are likely to corrupt on-disk data, since
the pointer you are given may well point directly into a disk buffer.
The sole exception to this rule is explained in
<xref linkend="xaggr">.
</para>
</warning>
<para>
As an example, we can define the type <type>text</type> as
follows:

View file

@ -40,12 +40,28 @@
* is used to run finalize functions and compute the output tuple;
* this context can be reset once per output tuple.
*
* Beginning in PostgreSQL 8.1, the executor's AggState node is passed as
* the fmgr "context" value in all transfunc and finalfunc calls. It is
* not really intended that the transition functions will look into the
* AggState node, but they can use code like
* if (fcinfo->context && IsA(fcinfo->context, AggState))
* to verify that they are being called by nodeAgg.c and not as ordinary
* SQL functions. The main reason a transition function might want to know
* that is that it can avoid palloc'ing a fixed-size pass-by-ref transition
* value on every call: it can instead just scribble on and return its left
* input. Ordinarily it is completely forbidden for functions to modify
* pass-by-ref inputs, but in the aggregate case we know the left input is
* either the initial transition value or a previous function result, and
* in either case its value need not be preserved. See int8inc() for an
* example. Notice that advance_transition_function() is coded to avoid a
* data copy step when the previous transition value pointer is returned.
*
*
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.128 2005/01/28 19:33:56 tgl Exp $
* $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.129 2005/03/12 20:25:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -363,7 +379,7 @@ advance_transition_function(AggState *aggstate,
*/
/* MemSet(&fcinfo, 0, sizeof(fcinfo)); */
fcinfo.context = NULL;
fcinfo.context = (void *) aggstate;
fcinfo.resultinfo = NULL;
fcinfo.isnull = false;
@ -541,6 +557,7 @@ finalize_aggregate(AggState *aggstate,
FunctionCallInfoData fcinfo;
MemSet(&fcinfo, 0, sizeof(fcinfo));
fcinfo.context = (void *) aggstate;
fcinfo.flinfo = &peraggstate->finalfn;
fcinfo.nargs = 1;
fcinfo.arg[0] = pergroupstate->transValue;

View file

@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/int8.c,v 1.57 2004/12/31 22:01:22 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/int8.c,v 1.58 2005/03/12 20:25:06 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -19,6 +19,7 @@
#include "funcapi.h"
#include "libpq/pqformat.h"
#include "nodes/nodes.h"
#include "utils/int8.h"
@ -649,17 +650,45 @@ int8mod(PG_FUNCTION_ARGS)
Datum
int8inc(PG_FUNCTION_ARGS)
{
int64 arg = PG_GETARG_INT64(0);
int64 result;
if (fcinfo->context && IsA(fcinfo->context, AggState))
{
/*
* Special case to avoid palloc overhead for COUNT(): when called
* from nodeAgg, we know that the argument is modifiable local
* storage, so just update it in-place.
*
* Note: this assumes int8 is a pass-by-ref type; if we ever support
* pass-by-val int8, this should be ifdef'd out when int8 is
* pass-by-val.
*/
int64 *arg = (int64 *) PG_GETARG_POINTER(0);
int64 result;
result = arg + 1;
/* Overflow check */
if (arg > 0 && result < 0)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("bigint out of range")));
result = *arg + 1;
/* Overflow check */
if (result < 0 && *arg > 0)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("bigint out of range")));
PG_RETURN_INT64(result);
*arg = result;
PG_RETURN_POINTER(arg);
}
else
{
/* Not called by nodeAgg, so just do it the dumb way */
int64 arg = PG_GETARG_INT64(0);
int64 result;
result = arg + 1;
/* Overflow check */
if (result < 0 && arg > 0)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("bigint out of range")));
PG_RETURN_INT64(result);
}
}
Datum