Implement the width_bucket() function, per SQL2003. This commit only adds

a variant of the function for the 'numeric' datatype; it would be possible
to add additional variants for other datatypes, but I haven't done so yet.

This commit includes regression tests and minimal documentation; if we
want developers to actually use this function in applications, we'll
probably need to document what it does more fully.
This commit is contained in:
Neil Conway 2004-05-14 21:42:30 +00:00
parent 19a495caaa
commit 0079547bcb
10 changed files with 272 additions and 23 deletions

View file

@ -1,4 +1,4 @@
<!-- $PostgreSQL: pgsql/doc/src/sgml/errcodes.sgml,v 1.4 2004/05/14 18:04:02 neilc Exp $ -->
<!-- $PostgreSQL: pgsql/doc/src/sgml/errcodes.sgml,v 1.5 2004/05/14 21:42:27 neilc Exp $ -->
<appendix id="errcodes-appendix">
<title><productname>PostgreSQL</productname> Error Codes</title>
@ -310,6 +310,11 @@
<entry>INTERVAL FIELD OVERFLOW</entry>
</row>
<row>
<entry><literal>2201G</literal></entry>
<entry>INVALID ARGUMENT FOR WIDTH BUCKET FUNCTION</entry>
</row>
<row>
<entry><literal>22018</literal></entry>
<entry>INVALID CHARACTER VALUE FOR CAST</entry>

View file

@ -1,5 +1,5 @@
<!--
$PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.201 2004/05/10 22:44:42 tgl Exp $
$PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.202 2004/05/14 21:42:27 neilc Exp $
PostgreSQL documentation
-->
@ -751,6 +751,16 @@ PostgreSQL documentation
<entry><literal>42.43</literal></entry>
</row>
<row>
<entry><literal><function>width_bucket</function>(<parameter>op</parameter> <type>numeric</type>, <parameter>b1</parameter> <type>numeric</type>, <parameter>b2</parameter> <type>numeric</type>, <parameter>count</parameter> <type>integer</type>)</literal></entry>
<entry><type>integer</type></entry>
<entry>return the bucket to which <parameter>operand</> would
be assigned in an equidepth histogram with <parameter>count</>
buckets, an upper bound of <parameter>b1</>, and a lower bound
of <parameter>b2</></entry>
<entry><literal>width_bucket(5.35, 0.024, 10.06, 5)</literal></entry>
<entry><literal>3</literal></entry>
</row>
</tbody>
</tgroup>
</table>

View file

@ -1,5 +1,5 @@
<!--
$PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.82 2004/05/10 22:44:43 tgl Exp $
$PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.83 2004/05/14 21:42:27 neilc Exp $
-->
<sect1 id="xfunc">
@ -56,7 +56,7 @@ $PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.82 2004/05/10 22:44:43 tgl Exp $
</para>
<para>
It's easiest to define <acronym>SQL</acronym>
It's easiest to define <acronym>SQL</acronym>
functions, so we'll start by discussing those.
Most of the concepts presented for <acronym>SQL</acronym> functions
will carry over to the other types of functions.
@ -64,12 +64,12 @@ $PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.82 2004/05/10 22:44:43 tgl Exp $
<para>
Throughout this chapter, it can be useful to look at the reference
page of the <command>CREATE FUNCTION</command> command to
understand the examples better.
Some examples from this chapter
can be found in <filename>funcs.sql</filename>
and <filename>funcs.c</filename> in the <filename>src/tutorial</>
directory in the <productname>PostgreSQL</productname> source distribution.
page of the <xref linkend="sql-createfunction"> command to
understand the examples better. Some examples from this chapter
can be found in <filename>funcs.sql</filename> and
<filename>funcs.c</filename> in the <filename>src/tutorial</>
directory in the <productname>PostgreSQL</productname> source
distribution.
</para>
</sect1>

View file

@ -14,7 +14,7 @@
* Copyright (c) 1998-2003, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/numeric.c,v 1.73 2004/05/07 00:24:58 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/numeric.c,v 1.74 2004/05/14 21:42:28 neilc Exp $
*
*-------------------------------------------------------------------------
*/
@ -252,6 +252,7 @@ static Numeric make_result(NumericVar *var);
static void apply_typmod(NumericVar *var, int32 typmod);
static int32 numericvar_to_int4(NumericVar *var);
static bool numericvar_to_int8(NumericVar *var, int64 *result);
static void int8_to_numericvar(int64 val, NumericVar *var);
static double numeric_to_double_no_overflow(Numeric num);
@ -285,6 +286,8 @@ static void sub_abs(NumericVar *var1, NumericVar *var2, NumericVar *result);
static void round_var(NumericVar *var, int rscale);
static void trunc_var(NumericVar *var, int rscale);
static void strip_var(NumericVar *var);
static void compute_bucket(Numeric operand, Numeric bound1, Numeric bound2,
NumericVar *count_var, NumericVar *result_var);
/* ----------------------------------------------------------------------
@ -803,6 +806,125 @@ numeric_floor(PG_FUNCTION_ARGS)
PG_RETURN_NUMERIC(res);
}
/*
* width_bucket_numeric() -
*
* 'bound1' and 'bound2' are the lower and upper bounds of the
* histogram's range, respectively. 'count' is the number of buckets
* in the histogram. width_bucket() returns an integer indicating the
* bucket number that 'operand' belongs in for an equiwidth histogram
* with the specified characteristics. An operand smaller than the
* lower bound is assigned to bucket 0. An operand greater than the
* upper bound is assigned to an additional bucket (with number
* count+1).
*/
Datum
width_bucket_numeric(PG_FUNCTION_ARGS)
{
Numeric operand = PG_GETARG_NUMERIC(0);
Numeric bound1 = PG_GETARG_NUMERIC(1);
Numeric bound2 = PG_GETARG_NUMERIC(2);
int32 count = PG_GETARG_INT32(3);
NumericVar count_var;
NumericVar result_var;
int32 result;
if (count <= 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
errmsg("count must be greater than zero")));
init_var(&result_var);
init_var(&count_var);
/* Convert 'count' to a numeric, for ease of use later */
int8_to_numericvar((int64) count, &count_var);
switch (cmp_numerics(bound1, bound2))
{
case 0:
ereport(ERROR,
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
errmsg("lower bound cannot equal upper bound")));
/* bound1 < bound2 */
case -1:
if (cmp_numerics(operand, bound1) < 0)
set_var_from_var(&const_zero, &result_var);
else if (cmp_numerics(operand, bound2) >= 0)
add_var(&count_var, &const_one, &result_var);
else
compute_bucket(operand, bound1, bound2,
&count_var, &result_var);
break;
/* bound1 > bound2 */
case 1:
if (cmp_numerics(operand, bound1) > 0)
set_var_from_var(&const_zero, &result_var);
else if (cmp_numerics(operand, bound2) <= 0)
add_var(&count_var, &const_one, &result_var);
else
compute_bucket(operand, bound1, bound2,
&count_var, &result_var);
break;
}
result = numericvar_to_int4(&result_var);
free_var(&count_var);
free_var(&result_var);
PG_RETURN_INT32(result);
}
/*
* compute_bucket() -
*
* If 'operand' is not outside the bucket range, determine the correct
* bucket for it to go. The calculations performed by this function
* are derived directly from the SQL2003 spec.
*/
static void
compute_bucket(Numeric operand, Numeric bound1, Numeric bound2,
NumericVar *count_var, NumericVar *result_var)
{
NumericVar bound1_var;
NumericVar bound2_var;
NumericVar operand_var;
init_var(&bound1_var);
init_var(&bound2_var);
init_var(&operand_var);
set_var_from_num(bound1, &bound1_var);
set_var_from_num(bound2, &bound2_var);
set_var_from_num(operand, &operand_var);
if (cmp_var(&bound1_var, &bound2_var) < 0)
{
sub_var(&operand_var, &bound1_var, &operand_var);
sub_var(&bound2_var, &bound1_var, &bound2_var);
div_var(&operand_var, &bound2_var, result_var,
select_div_scale(&operand_var, &bound2_var));
}
else
{
sub_var(&bound1_var, &operand_var, &operand_var);
sub_var(&bound1_var, &bound2_var, &bound1_var);
div_var(&operand_var, &bound1_var, result_var,
select_div_scale(&operand_var, &bound1_var));
}
mul_var(result_var, count_var, result_var,
result_var->dscale + count_var->dscale);
add_var(result_var, &const_one, result_var);
floor_var(result_var, result_var);
free_var(&bound1_var);
free_var(&bound2_var);
free_var(&operand_var);
}
/* ----------------------------------------------------------------------
*
@ -1612,7 +1734,6 @@ numeric_int4(PG_FUNCTION_ARGS)
{
Numeric num = PG_GETARG_NUMERIC(0);
NumericVar x;
int64 val;
int32 result;
/* XXX would it be better to return NULL? */
@ -1621,17 +1742,30 @@ numeric_int4(PG_FUNCTION_ARGS)
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot convert NaN to integer")));
/* Convert to variable format and thence to int8 */
/* Convert to variable format, then convert to int4 */
init_var(&x);
set_var_from_num(num, &x);
result = numericvar_to_int4(&x);
free_var(&x);
PG_RETURN_INT32(result);
}
if (!numericvar_to_int8(&x, &val))
/*
* Given a NumericVar, convert it to an int32. If the NumericVar
* exceeds the range of an int32, raise the appropriate error via
* ereport(). The input NumericVar is *not* free'd.
*/
static int32
numericvar_to_int4(NumericVar *var)
{
int32 result;
int64 val;
if (!numericvar_to_int8(var, &val))
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("integer out of range")));
free_var(&x);
/* Down-convert to int4 */
result = (int32) val;
@ -1641,10 +1775,9 @@ numeric_int4(PG_FUNCTION_ARGS)
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("integer out of range")));
PG_RETURN_INT32(result);
return result;
}
Datum
int8_numeric(PG_FUNCTION_ARGS)
{

View file

@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.229 2004/05/10 22:44:49 tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.230 2004/05/14 21:42:28 neilc Exp $
*
*-------------------------------------------------------------------------
*/
@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 200405101
#define CATALOG_VERSION_NO 200405141
#endif

View file

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.328 2004/05/07 16:57:16 tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.329 2004/05/14 21:42:28 neilc Exp $
*
* NOTES
* The script catalog/genbki.sh reads this file and generates .bki
@ -2508,6 +2508,8 @@ DATA(insert OID = 1745 ( float4 PGNSP PGUID 12 f f t f i 1 700 "1700" _null_
DESCR("(internal)");
DATA(insert OID = 1746 ( float8 PGNSP PGUID 12 f f t f i 1 701 "1700" _null_ numeric_float8 - _null_ ));
DESCR("(internal)");
DATA(insert OID = 2170 ( width_bucket PGNSP PGUID 12 f f t f i 4 23 "1700 1700 1700 23" _null_ width_bucket_numeric - _null_ ));
DESCR("bucket number of operand in equidepth histogram");
DATA(insert OID = 1747 ( time_pl_interval PGNSP PGUID 12 f f t f i 2 1083 "1083 1186" _null_ time_pl_interval - _null_ ));
DESCR("plus");

View file

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.237 2004/05/05 04:48:47 tgl Exp $
* $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.238 2004/05/14 21:42:30 neilc Exp $
*
*-------------------------------------------------------------------------
*/
@ -758,6 +758,7 @@ extern Datum int8_sum(PG_FUNCTION_ARGS);
extern Datum int2_avg_accum(PG_FUNCTION_ARGS);
extern Datum int4_avg_accum(PG_FUNCTION_ARGS);
extern Datum int8_avg(PG_FUNCTION_ARGS);
extern Datum width_bucket_numeric(PG_FUNCTION_ARGS);
/* ri_triggers.c */
extern Datum RI_FKey_check_ins(PG_FUNCTION_ARGS);

View file

@ -11,7 +11,7 @@
*
* Copyright (c) 2003, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/utils/errcodes.h,v 1.9 2004/05/14 18:04:02 neilc Exp $
* $PostgreSQL: pgsql/src/include/utils/errcodes.h,v 1.10 2004/05/14 21:42:30 neilc Exp $
*
*-------------------------------------------------------------------------
*/
@ -116,6 +116,7 @@
#define ERRCODE_ESCAPE_CHARACTER_CONFLICT MAKE_SQLSTATE('2','2', '0','0','B')
#define ERRCODE_INDICATOR_OVERFLOW MAKE_SQLSTATE('2','2', '0','2','2')
#define ERRCODE_INTERVAL_FIELD_OVERFLOW MAKE_SQLSTATE('2','2', '0','1','5')
#define ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION MAKE_SQLSTATE('2','2', '0', '1', 'G')
#define ERRCODE_INVALID_CHARACTER_VALUE_FOR_CAST MAKE_SQLSTATE('2','2', '0','1','8')
#define ERRCODE_INVALID_DATETIME_FORMAT MAKE_SQLSTATE('2','2', '0','0','7')
#define ERRCODE_INVALID_ESCAPE_CHARACTER MAKE_SQLSTATE('2','2', '0','1','9')

View file

@ -730,6 +730,57 @@ SELECT a, ceil(a), ceiling(a), floor(a), round(a) FROM ceil_floor_round;
(7 rows)
DROP TABLE ceil_floor_round;
-- Testing for width_bucket()
-- NULL result
SELECT width_bucket(NULL, NULL, NULL, NULL);
width_bucket
--------------
(1 row)
-- errors
SELECT width_bucket(5.0, 3.0, 4.0, 0);
ERROR: count must be greater than zero
SELECT width_bucket(5.0, 3.0, 4.0, -5);
ERROR: count must be greater than zero
SELECT width_bucket(3.0, 3.0, 3.0, 888);
ERROR: lower bound cannot equal upper bound
-- normal operation
CREATE TABLE width_bucket_test (operand numeric);
COPY width_bucket_test FROM stdin;
SELECT
operand,
width_bucket(operand, 0, 10, 5) AS wb_1,
width_bucket(operand, 10, 0, 5) AS wb_2,
width_bucket(operand, 2, 8, 4) AS wb_3,
width_bucket(operand, 5.0, 5.5, 20) AS wb_4,
width_bucket(operand, -25, 25, 10) AS wb_5
FROM width_bucket_test;
operand | wb_1 | wb_2 | wb_3 | wb_4 | wb_5
------------------+------+------+------+------+------
-5.2 | 0 | 6 | 0 | 0 | 4
-0.0000000000001 | 0 | 6 | 0 | 0 | 5
0.0000000000001 | 1 | 5 | 0 | 0 | 6
1 | 1 | 5 | 0 | 0 | 6
1.99999999999999 | 1 | 5 | 0 | 0 | 6
2 | 2 | 5 | 1 | 0 | 6
2.00000000000001 | 2 | 4 | 1 | 0 | 6
3 | 2 | 4 | 1 | 0 | 6
4 | 3 | 4 | 2 | 0 | 6
4.5 | 3 | 3 | 2 | 0 | 6
5 | 3 | 3 | 3 | 1 | 7
5.5 | 3 | 3 | 3 | 21 | 7
6 | 4 | 3 | 3 | 21 | 7
7 | 4 | 2 | 4 | 21 | 7
8 | 5 | 2 | 5 | 21 | 7
9 | 5 | 1 | 5 | 21 | 7
9.99999999999999 | 5 | 1 | 5 | 21 | 7
10 | 6 | 1 | 5 | 21 | 8
10.0000000000001 | 6 | 0 | 5 | 21 | 8
NaN | 6 | 0 | 5 | 21 | 11
(20 rows)
DROP TABLE width_bucket_test;
-- TO_CHAR()
--
SELECT '' AS to_char_1, to_char(val, '9G999G999G999G999G999')

View file

@ -667,6 +667,52 @@ INSERT INTO ceil_floor_round VALUES ('-0.000001');
SELECT a, ceil(a), ceiling(a), floor(a), round(a) FROM ceil_floor_round;
DROP TABLE ceil_floor_round;
-- Testing for width_bucket()
-- NULL result
SELECT width_bucket(NULL, NULL, NULL, NULL);
-- errors
SELECT width_bucket(5.0, 3.0, 4.0, 0);
SELECT width_bucket(5.0, 3.0, 4.0, -5);
SELECT width_bucket(3.0, 3.0, 3.0, 888);
-- normal operation
CREATE TABLE width_bucket_test (operand numeric);
COPY width_bucket_test FROM stdin;
-5.2
-0.0000000000001
0.0000000000001
1
1.99999999999999
2
2.00000000000001
3
4
4.5
5
5.5
6
7
8
9
9.99999999999999
10
10.0000000000001
NaN
\.
SELECT
operand,
width_bucket(operand, 0, 10, 5) AS wb_1,
width_bucket(operand, 10, 0, 5) AS wb_2,
width_bucket(operand, 2, 8, 4) AS wb_3,
width_bucket(operand, 5.0, 5.5, 20) AS wb_4,
width_bucket(operand, -25, 25, 10) AS wb_5
FROM width_bucket_test;
DROP TABLE width_bucket_test;
-- TO_CHAR()
--
SELECT '' AS to_char_1, to_char(val, '9G999G999G999G999G999')