diff --git a/contrib/intagg/int_aggregate.c b/contrib/intagg/int_aggregate.c index cb0f1f3a45..35c883642b 100644 --- a/contrib/intagg/int_aggregate.c +++ b/contrib/intagg/int_aggregate.c @@ -34,8 +34,12 @@ #include "utils/lsyscache.h" -/* This is actually a postgres version of a one dimensional array */ - +/* + * This is actually a postgres version of a one dimensional array. + * We cheat a little by using the lower-bound field as an indicator + * of the physically allocated size, while the dimensionality is the + * count of items accumulated so far. + */ typedef struct { ArrayType a; @@ -56,7 +60,7 @@ typedef struct callContext #define START_NUM 8 /* initial size of arrays */ #define PGARRAY_SIZE(n) (sizeof(PGARRAY) + (((n)-1)*sizeof(int4))) -static PGARRAY *GetPGArray(PGARRAY *p, int fAdd); +static PGARRAY *GetPGArray(PGARRAY *p, AggState *aggstate, bool fAdd); static PGARRAY *ShrinkPGArray(PGARRAY *p); Datum int_agg_state(PG_FUNCTION_ARGS); @@ -68,72 +72,68 @@ PG_FUNCTION_INFO_V1(int_agg_final_array); PG_FUNCTION_INFO_V1(int_enum); /* - * Manage the aggregation state of the array + * Manage the allocation state of the array * - * Need to specify a suitably long-lived memory context, or it will vanish! - * PortalContext isn't really right, but it's close enough. + * Note that the array needs to be in a reasonably long-lived context, + * ie the Agg node's aggcontext. */ static PGARRAY * -GetPGArray(PGARRAY *p, int fAdd) +GetPGArray(PGARRAY *p, AggState *aggstate, bool fAdd) { if (!p) { /* New array */ int cb = PGARRAY_SIZE(START_NUM); - p = (PGARRAY *) MemoryContextAlloc(PortalContext, cb); + p = (PGARRAY *) MemoryContextAlloc(aggstate->aggcontext, cb); p->a.size = cb; - p->a.ndim = 0; + p->a.ndim = 1; p->a.flags = 0; p->a.elemtype = INT4OID; p->items = 0; p->lower = START_NUM; } else if (fAdd) - { /* Ensure array has space */ + { + /* Ensure array has space for another item */ if (p->items >= p->lower) { - PGARRAY *pn; - int n = p->lower + p->lower; + PGARRAY *pn; + int n = p->lower * 2; int cbNew = PGARRAY_SIZE(n); - pn = (PGARRAY *) repalloc(p, cbNew); + pn = (PGARRAY *) MemoryContextAlloc(aggstate->aggcontext, cbNew); + memcpy(pn, p, p->a.size); pn->a.size = cbNew; pn->lower = n; - return pn; + /* do not pfree(p), because nodeAgg.c will */ + p = pn; } } return p; } -/* Shrinks the array to its actual size and moves it into the standard - * memory allocation context, frees working memory +/* + * Shrinks the array to its actual size and moves it into the standard + * memory allocation context */ static PGARRAY * -ShrinkPGArray(PGARRAY * p) +ShrinkPGArray(PGARRAY *p) { - PGARRAY *pnew = NULL; + PGARRAY *pnew; + /* get target size */ + int cb = PGARRAY_SIZE(p->items); - if (p) - { - /* get target size */ - int cb = PGARRAY_SIZE(p->items); + /* use current transaction context */ + pnew = palloc(cb); + memcpy(pnew, p, cb); - /* use current transaction context */ - pnew = palloc(cb); + /* fix up the fields in the new array to match normal conventions */ + pnew->a.size = cb; + pnew->lower = 1; - /* - * Fix up the fields in the new structure, so Postgres understands - */ - memcpy(pnew, p, cb); - pnew->a.size = cb; - pnew->a.ndim = 1; - pnew->a.flags = 0; - pnew->a.elemtype = INT4OID; - pnew->lower = 1; + /* do not pfree(p), because nodeAgg.c will */ - pfree(p); - } return pnew; } @@ -144,11 +144,18 @@ int_agg_state(PG_FUNCTION_ARGS) PGARRAY *state; PGARRAY *p; + /* + * As of PG 8.1 we can actually verify that we are being used as an + * aggregate function, and so it is safe to scribble on our left input. + */ + if (!(fcinfo->context && IsA(fcinfo->context, AggState))) + elog(ERROR, "int_agg_state may only be used as an aggregate"); + if (PG_ARGISNULL(0)) - state = NULL; + state = NULL; /* first time through */ else state = (PGARRAY *) PG_GETARG_POINTER(0); - p = GetPGArray(state, 1); + p = GetPGArray(state, (AggState *) fcinfo->context, true); if (!PG_ARGISNULL(1)) { @@ -164,22 +171,38 @@ int_agg_state(PG_FUNCTION_ARGS) PG_RETURN_POINTER(p); } -/* This is the final function used for the integer aggregator. It returns all +/* + * This is the final function used for the integer aggregator. It returns all * the integers collected as a one dimensional integer array */ Datum int_agg_final_array(PG_FUNCTION_ARGS) { - PGARRAY *state = (PGARRAY *) PG_GETARG_POINTER(0); - PGARRAY *pnew = ShrinkPGArray(GetPGArray(state, 0)); + PGARRAY *state; + PGARRAY *p; + PGARRAY *pnew; - if (pnew) - PG_RETURN_POINTER(pnew); + /* + * As of PG 8.1 we can actually verify that we are being used as an + * aggregate function, and so it is safe to scribble on our left input. + */ + if (!(fcinfo->context && IsA(fcinfo->context, AggState))) + elog(ERROR, "int_agg_final_array may only be used as an aggregate"); + + if (PG_ARGISNULL(0)) + state = NULL; /* zero items in aggregation */ else - PG_RETURN_NULL(); + state = (PGARRAY *) PG_GETARG_POINTER(0); + p = GetPGArray(state, (AggState *) fcinfo->context, false); + + pnew = ShrinkPGArray(p); + PG_RETURN_POINTER(pnew); } -/* This function accepts an array, and returns one item for each entry in the array */ +/* + * This function accepts an array, and returns one item for each entry in the + * array + */ Datum int_enum(PG_FUNCTION_ARGS) { diff --git a/contrib/intagg/int_aggregate.sql.in b/contrib/intagg/int_aggregate.sql.in index caaf01afdb..cc1cd92727 100644 --- a/contrib/intagg/int_aggregate.sql.in +++ b/contrib/intagg/int_aggregate.sql.in @@ -6,14 +6,14 @@ SET search_path = public; CREATE OR REPLACE FUNCTION int_agg_state (int4[], int4) RETURNS int4[] AS 'MODULE_PATHNAME','int_agg_state' -LANGUAGE 'C'; +LANGUAGE C; -- Internal function for the aggregate -- Is called at the end of the aggregation, and returns an array. CREATE OR REPLACE FUNCTION int_agg_final_array (int4[]) RETURNS int4[] AS 'MODULE_PATHNAME','int_agg_final_array' -LANGUAGE 'C' STRICT; +LANGUAGE C; -- The aggregate function itself -- uses the above functions to create an array of integers from an aggregation. @@ -24,15 +24,10 @@ CREATE AGGREGATE int_array_aggregate ( FINALFUNC = int_agg_final_array ); --- The aggregate component functions are not designed to be called --- independently, so disable public access to them -REVOKE ALL ON FUNCTION int_agg_state (int4[], int4) FROM PUBLIC; -REVOKE ALL ON FUNCTION int_agg_final_array (int4[]) FROM PUBLIC; - -- The enumeration function -- returns each element in a one dimensional integer array -- as a row. CREATE OR REPLACE FUNCTION int_array_enum(int4[]) RETURNS setof integer AS 'MODULE_PATHNAME','int_enum' -LANGUAGE 'C' IMMUTABLE STRICT; +LANGUAGE C IMMUTABLE STRICT; diff --git a/doc/src/sgml/xaggr.sgml b/doc/src/sgml/xaggr.sgml index d9c41714df..6a928f7b4f 100644 --- a/doc/src/sgml/xaggr.sgml +++ b/doc/src/sgml/xaggr.sgml @@ -1,5 +1,5 @@ @@ -161,6 +161,21 @@ SELECT attrelid::regclass, array_accum(atttypid) + + A function written in C can detect that it is being called as an + aggregate transition or final function by seeing if it was passed + an AggState node as the function call context, + for example by + + if (fcinfo->context && IsA(fcinfo->context, AggState)) + + One reason for checking this is that when it is true, the left input + must be a temporary transition value and can therefore safely be modified + in-place rather than allocating a new copy. (This is the only + case where it is safe for a function to modify a pass-by-reference input.) + See int8inc()> for an example. + + For further details see the diff --git a/doc/src/sgml/xfunc.sgml b/doc/src/sgml/xfunc.sgml index 72772140b3..0f051a3686 100644 --- a/doc/src/sgml/xfunc.sgml +++ b/doc/src/sgml/xfunc.sgml @@ -1,5 +1,5 @@ @@ -1188,10 +1188,10 @@ typedef struct them in and out of PostgreSQL functions. To return a value of such a type, allocate the right amount of memory with palloc, fill in the allocated memory, - and return a pointer to it. (You can also return an input value - that has the same type as the return value directly by returning - the pointer to the input value. Never modify the - contents of a pass-by-reference input value, however.) + and return a pointer to it. (Also, if you just want to return the + same value as one of your input arguments that's of the same data type, + you can skip the extra palloc and just return the + pointer to the input value.) @@ -1205,6 +1205,16 @@ typedef struct itself. + + + Never modify the contents of a pass-by-reference input + value. If you do so you are likely to corrupt on-disk data, since + the pointer you are given may well point directly into a disk buffer. + The sole exception to this rule is explained in + . + + + As an example, we can define the type text as follows: diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 63a687e986..8f641cc843 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -40,12 +40,28 @@ * is used to run finalize functions and compute the output tuple; * this context can be reset once per output tuple. * + * Beginning in PostgreSQL 8.1, the executor's AggState node is passed as + * the fmgr "context" value in all transfunc and finalfunc calls. It is + * not really intended that the transition functions will look into the + * AggState node, but they can use code like + * if (fcinfo->context && IsA(fcinfo->context, AggState)) + * to verify that they are being called by nodeAgg.c and not as ordinary + * SQL functions. The main reason a transition function might want to know + * that is that it can avoid palloc'ing a fixed-size pass-by-ref transition + * value on every call: it can instead just scribble on and return its left + * input. Ordinarily it is completely forbidden for functions to modify + * pass-by-ref inputs, but in the aggregate case we know the left input is + * either the initial transition value or a previous function result, and + * in either case its value need not be preserved. See int8inc() for an + * example. Notice that advance_transition_function() is coded to avoid a + * data copy step when the previous transition value pointer is returned. + * * * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.128 2005/01/28 19:33:56 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeAgg.c,v 1.129 2005/03/12 20:25:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -363,7 +379,7 @@ advance_transition_function(AggState *aggstate, */ /* MemSet(&fcinfo, 0, sizeof(fcinfo)); */ - fcinfo.context = NULL; + fcinfo.context = (void *) aggstate; fcinfo.resultinfo = NULL; fcinfo.isnull = false; @@ -541,6 +557,7 @@ finalize_aggregate(AggState *aggstate, FunctionCallInfoData fcinfo; MemSet(&fcinfo, 0, sizeof(fcinfo)); + fcinfo.context = (void *) aggstate; fcinfo.flinfo = &peraggstate->finalfn; fcinfo.nargs = 1; fcinfo.arg[0] = pergroupstate->transValue; diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c index a7f76e31d9..c5c3d30d03 100644 --- a/src/backend/utils/adt/int8.c +++ b/src/backend/utils/adt/int8.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/int8.c,v 1.57 2004/12/31 22:01:22 pgsql Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/int8.c,v 1.58 2005/03/12 20:25:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,6 +19,7 @@ #include "funcapi.h" #include "libpq/pqformat.h" +#include "nodes/nodes.h" #include "utils/int8.h" @@ -649,17 +650,45 @@ int8mod(PG_FUNCTION_ARGS) Datum int8inc(PG_FUNCTION_ARGS) { - int64 arg = PG_GETARG_INT64(0); - int64 result; + if (fcinfo->context && IsA(fcinfo->context, AggState)) + { + /* + * Special case to avoid palloc overhead for COUNT(): when called + * from nodeAgg, we know that the argument is modifiable local + * storage, so just update it in-place. + * + * Note: this assumes int8 is a pass-by-ref type; if we ever support + * pass-by-val int8, this should be ifdef'd out when int8 is + * pass-by-val. + */ + int64 *arg = (int64 *) PG_GETARG_POINTER(0); + int64 result; - result = arg + 1; - /* Overflow check */ - if (arg > 0 && result < 0) - ereport(ERROR, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("bigint out of range"))); + result = *arg + 1; + /* Overflow check */ + if (result < 0 && *arg > 0) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); - PG_RETURN_INT64(result); + *arg = result; + PG_RETURN_POINTER(arg); + } + else + { + /* Not called by nodeAgg, so just do it the dumb way */ + int64 arg = PG_GETARG_INT64(0); + int64 result; + + result = arg + 1; + /* Overflow check */ + if (result < 0 && arg > 0) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + + PG_RETURN_INT64(result); + } } Datum