Fix regex, LIKE, and some other second-rank text-manipulation functions

to not cause needless copying of text datums that have 1-byte headers.
Greg Stark, in response to performance gripe from Guillaume Smet and
ITAGAKI Takahiro.
This commit is contained in:
Tom Lane 2007-09-21 22:52:52 +00:00
parent cc59049daf
commit 7583f9a7ca
6 changed files with 212 additions and 192 deletions

View file

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.52 2007/06/01 15:33:18 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/hash/hashfunc.c,v 1.53 2007/09/21 22:52:52 tgl Exp $
*
* NOTES
* These functions are stored in pg_amproc. For each operator class
@ -157,7 +157,7 @@ hashname(PG_FUNCTION_ARGS)
Datum
hashtext(PG_FUNCTION_ARGS)
{
text *key = PG_GETARG_TEXT_P(0);
text *key = PG_GETARG_TEXT_PP(0);
Datum result;
/*
@ -165,8 +165,8 @@ hashtext(PG_FUNCTION_ARGS)
* it as a separate function in case we someday want to do something
* different in non-C locales. (See also hashbpchar, if so.)
*/
result = hash_any((unsigned char *) VARDATA(key),
VARSIZE(key) - VARHDRSZ);
result = hash_any((unsigned char *) VARDATA_ANY(key),
VARSIZE_ANY_EXHDR(key));
/* Avoid leaking memory for toasted inputs */
PG_FREE_IF_COPY(key, 0);
@ -181,11 +181,11 @@ hashtext(PG_FUNCTION_ARGS)
Datum
hashvarlena(PG_FUNCTION_ARGS)
{
struct varlena *key = PG_GETARG_VARLENA_P(0);
struct varlena *key = PG_GETARG_VARLENA_PP(0);
Datum result;
result = hash_any((unsigned char *) VARDATA(key),
VARSIZE(key) - VARHDRSZ);
result = hash_any((unsigned char *) VARDATA_ANY(key),
VARSIZE_ANY_EXHDR(key));
/* Avoid leaking memory for toasted inputs */
PG_FREE_IF_COPY(key, 0);

View file

@ -11,7 +11,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/like.c,v 1.69 2007/06/02 02:03:42 adunstan Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/like.c,v 1.70 2007/09/21 22:52:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -135,6 +135,7 @@ Generic_Text_IC_like(text *str, text *pat)
/* Force inputs to lower case to achieve case insensitivity */
str = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(str)));
pat = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(pat)));
/* lower's result is never packed, so OK to use old macros here */
s = VARDATA(str);
slen = (VARSIZE(str) - VARHDRSZ);
p = VARDATA(pat);
@ -151,7 +152,7 @@ Datum
namelike(PG_FUNCTION_ARGS)
{
Name str = PG_GETARG_NAME(0);
text *pat = PG_GETARG_TEXT_P(1);
text *pat = PG_GETARG_TEXT_PP(1);
bool result;
char *s,
*p;
@ -160,8 +161,8 @@ namelike(PG_FUNCTION_ARGS)
s = NameStr(*str);
slen = strlen(s);
p = VARDATA(pat);
plen = (VARSIZE(pat) - VARHDRSZ);
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
@ -172,7 +173,7 @@ Datum
namenlike(PG_FUNCTION_ARGS)
{
Name str = PG_GETARG_NAME(0);
text *pat = PG_GETARG_TEXT_P(1);
text *pat = PG_GETARG_TEXT_PP(1);
bool result;
char *s,
*p;
@ -181,8 +182,8 @@ namenlike(PG_FUNCTION_ARGS)
s = NameStr(*str);
slen = strlen(s);
p = VARDATA(pat);
plen = (VARSIZE(pat) - VARHDRSZ);
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
@ -192,18 +193,18 @@ namenlike(PG_FUNCTION_ARGS)
Datum
textlike(PG_FUNCTION_ARGS)
{
text *str = PG_GETARG_TEXT_P(0);
text *pat = PG_GETARG_TEXT_P(1);
text *str = PG_GETARG_TEXT_PP(0);
text *pat = PG_GETARG_TEXT_PP(1);
bool result;
char *s,
*p;
int slen,
plen;
s = VARDATA(str);
slen = (VARSIZE(str) - VARHDRSZ);
p = VARDATA(pat);
plen = (VARSIZE(pat) - VARHDRSZ);
s = VARDATA_ANY(str);
slen = VARSIZE_ANY_EXHDR(str);
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
@ -213,18 +214,18 @@ textlike(PG_FUNCTION_ARGS)
Datum
textnlike(PG_FUNCTION_ARGS)
{
text *str = PG_GETARG_TEXT_P(0);
text *pat = PG_GETARG_TEXT_P(1);
text *str = PG_GETARG_TEXT_PP(0);
text *pat = PG_GETARG_TEXT_PP(1);
bool result;
char *s,
*p;
int slen,
plen;
s = VARDATA(str);
slen = (VARSIZE(str) - VARHDRSZ);
p = VARDATA(pat);
plen = (VARSIZE(pat) - VARHDRSZ);
s = VARDATA_ANY(str);
slen = VARSIZE_ANY_EXHDR(str);
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
@ -234,18 +235,18 @@ textnlike(PG_FUNCTION_ARGS)
Datum
bytealike(PG_FUNCTION_ARGS)
{
bytea *str = PG_GETARG_BYTEA_P(0);
bytea *pat = PG_GETARG_BYTEA_P(1);
bytea *str = PG_GETARG_BYTEA_PP(0);
bytea *pat = PG_GETARG_BYTEA_PP(1);
bool result;
char *s,
*p;
int slen,
plen;
s = VARDATA(str);
slen = (VARSIZE(str) - VARHDRSZ);
p = VARDATA(pat);
plen = (VARSIZE(pat) - VARHDRSZ);
s = VARDATA_ANY(str);
slen = VARSIZE_ANY_EXHDR(str);
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
result = (SB_MatchText(s, slen, p, plen) == LIKE_TRUE);
@ -255,18 +256,18 @@ bytealike(PG_FUNCTION_ARGS)
Datum
byteanlike(PG_FUNCTION_ARGS)
{
bytea *str = PG_GETARG_BYTEA_P(0);
bytea *pat = PG_GETARG_BYTEA_P(1);
bytea *str = PG_GETARG_BYTEA_PP(0);
bytea *pat = PG_GETARG_BYTEA_PP(1);
bool result;
char *s,
*p;
int slen,
plen;
s = VARDATA(str);
slen = (VARSIZE(str) - VARHDRSZ);
p = VARDATA(pat);
plen = (VARSIZE(pat) - VARHDRSZ);
s = VARDATA_ANY(str);
slen = VARSIZE_ANY_EXHDR(str);
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
result = (SB_MatchText(s, slen, p, plen) != LIKE_TRUE);
@ -281,7 +282,7 @@ Datum
nameiclike(PG_FUNCTION_ARGS)
{
Name str = PG_GETARG_NAME(0);
text *pat = PG_GETARG_TEXT_P(1);
text *pat = PG_GETARG_TEXT_PP(1);
bool result;
text *strtext;
@ -296,7 +297,7 @@ Datum
nameicnlike(PG_FUNCTION_ARGS)
{
Name str = PG_GETARG_NAME(0);
text *pat = PG_GETARG_TEXT_P(1);
text *pat = PG_GETARG_TEXT_PP(1);
bool result;
text *strtext;
@ -310,8 +311,8 @@ nameicnlike(PG_FUNCTION_ARGS)
Datum
texticlike(PG_FUNCTION_ARGS)
{
text *str = PG_GETARG_TEXT_P(0);
text *pat = PG_GETARG_TEXT_P(1);
text *str = PG_GETARG_TEXT_PP(0);
text *pat = PG_GETARG_TEXT_PP(1);
bool result;
result = (Generic_Text_IC_like(str, pat) == LIKE_TRUE);
@ -322,8 +323,8 @@ texticlike(PG_FUNCTION_ARGS)
Datum
texticnlike(PG_FUNCTION_ARGS)
{
text *str = PG_GETARG_TEXT_P(0);
text *pat = PG_GETARG_TEXT_P(1);
text *str = PG_GETARG_TEXT_PP(0);
text *pat = PG_GETARG_TEXT_PP(1);
bool result;
result = (Generic_Text_IC_like(str, pat) != LIKE_TRUE);
@ -338,8 +339,8 @@ texticnlike(PG_FUNCTION_ARGS)
Datum
like_escape(PG_FUNCTION_ARGS)
{
text *pat = PG_GETARG_TEXT_P(0);
text *esc = PG_GETARG_TEXT_P(1);
text *pat = PG_GETARG_TEXT_PP(0);
text *esc = PG_GETARG_TEXT_PP(1);
text *result;
if (pg_database_encoding_max_length() == 1)
@ -357,8 +358,8 @@ like_escape(PG_FUNCTION_ARGS)
Datum
like_escape_bytea(PG_FUNCTION_ARGS)
{
bytea *pat = PG_GETARG_BYTEA_P(0);
bytea *esc = PG_GETARG_BYTEA_P(1);
bytea *pat = PG_GETARG_BYTEA_PP(0);
bytea *esc = PG_GETARG_BYTEA_PP(1);
bytea *result = SB_do_like_escape((text *)pat, (text *)esc);
PG_RETURN_BYTEA_P((bytea *)result);

View file

@ -17,7 +17,7 @@
* Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.16 2007/06/02 02:03:42 adunstan Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.17 2007/09/21 22:52:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -242,10 +242,10 @@ do_like_escape(text *pat, text *esc)
elen;
bool afterescape;
p = VARDATA(pat);
plen = (VARSIZE(pat) - VARHDRSZ);
e = VARDATA(esc);
elen = (VARSIZE(esc) - VARHDRSZ);
p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat);
e = VARDATA_ANY(esc);
elen = VARSIZE_ANY_EXHDR(esc);
/*
* Worst-case pattern growth is 2x --- unlikely, but it's hardly worth
@ -279,14 +279,14 @@ do_like_escape(text *pat, text *esc)
errmsg("invalid escape string"),
errhint("Escape string must be empty or one character.")));
e = VARDATA(esc);
e = VARDATA_ANY(esc);
/*
* If specified escape is '\', just copy the pattern as-is.
*/
if (*e == '\\')
{
memcpy(result, pat, VARSIZE(pat));
memcpy(result, pat, VARSIZE_ANY(pat));
return result;
}

View file

@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.71 2007/09/18 17:41:17 adunstan Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.72 2007/09/21 22:52:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -63,7 +63,7 @@ static text *dotrim(const char *string, int stringlen,
static wchar_t *
texttowcs(const text *txt)
{
int nbytes = VARSIZE(txt) - VARHDRSZ;
int nbytes = VARSIZE_ANY_EXHDR(txt);
char *workstr;
wchar_t *result;
size_t ncodes;
@ -77,7 +77,7 @@ texttowcs(const text *txt)
/* Need a null-terminated version of the input */
workstr = (char *) palloc(nbytes + 1);
memcpy(workstr, VARDATA(txt), nbytes);
memcpy(workstr, VARDATA_ANY(txt), nbytes);
workstr[nbytes] = '\0';
/* Output workspace cannot have more codes than input bytes */
@ -164,7 +164,7 @@ wcstotext(const wchar_t *str, int ncodes)
static wchar_t *
win32_utf8_texttowcs(const text *txt)
{
int nbytes = VARSIZE(txt) - VARHDRSZ;
int nbytes = VARSIZE_ANY_EXHDR(txt);
wchar_t *result;
int r;
@ -184,13 +184,13 @@ win32_utf8_texttowcs(const text *txt)
else
{
/* Do the conversion */
r = MultiByteToWideChar(CP_UTF8, 0, VARDATA(txt), nbytes,
r = MultiByteToWideChar(CP_UTF8, 0, VARDATA_ANY(txt), nbytes,
result, nbytes);
if (!r) /* assume it's NO_UNICODE_TRANSLATION */
{
/* see notes above about error reporting */
pg_verifymbstr(VARDATA(txt), nbytes, false);
pg_verifymbstr(VARDATA_ANY(txt), nbytes, false);
ereport(ERROR,
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
errmsg("invalid multibyte character for locale"),
@ -287,7 +287,7 @@ wstring_upper(char *str)
out_text = wcstotext(workspace, i);
nbytes = VARSIZE(out_text) - VARHDRSZ;
nbytes = VARSIZE(out_text) - VARHDRSZ;
result = palloc(nbytes + 1);
memcpy(result, VARDATA(out_text), nbytes);
@ -361,7 +361,7 @@ lower(PG_FUNCTION_ARGS)
*/
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
{
text *string = PG_GETARG_TEXT_P(0);
text *string = PG_GETARG_TEXT_PP(0);
text *result;
wchar_t *workspace;
int i;
@ -427,7 +427,7 @@ upper(PG_FUNCTION_ARGS)
*/
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
{
text *string = PG_GETARG_TEXT_P(0);
text *string = PG_GETARG_TEXT_PP(0);
text *result;
wchar_t *workspace;
int i;
@ -496,7 +496,7 @@ initcap(PG_FUNCTION_ARGS)
*/
if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
{
text *string = PG_GETARG_TEXT_P(0);
text *string = PG_GETARG_TEXT_PP(0);
text *result;
wchar_t *workspace;
int wasalnum = 0;
@ -567,12 +567,13 @@ initcap(PG_FUNCTION_ARGS)
Datum
lpad(PG_FUNCTION_ARGS)
{
text *string1 = PG_GETARG_TEXT_P(0);
text *string1 = PG_GETARG_TEXT_PP(0);
int32 len = PG_GETARG_INT32(1);
text *string2 = PG_GETARG_TEXT_P(2);
text *string2 = PG_GETARG_TEXT_PP(2);
text *ret;
char *ptr1,
*ptr2,
*ptr2start,
*ptr2end,
*ptr_ret;
int m,
@ -585,15 +586,15 @@ lpad(PG_FUNCTION_ARGS)
if (len < 0)
len = 0;
s1len = VARSIZE(string1) - VARHDRSZ;
s1len = VARSIZE_ANY_EXHDR(string1);
if (s1len < 0)
s1len = 0; /* shouldn't happen */
s2len = VARSIZE(string2) - VARHDRSZ;
s2len = VARSIZE_ANY_EXHDR(string2);
if (s2len < 0)
s2len = 0; /* shouldn't happen */
s1len = pg_mbstrlen_with_len(VARDATA(string1), s1len);
s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
if (s1len > len)
s1len = len; /* truncate string1 to len chars */
@ -613,7 +614,7 @@ lpad(PG_FUNCTION_ARGS)
m = len - s1len;
ptr2 = VARDATA(string2);
ptr2 = ptr2start = VARDATA_ANY(string2);
ptr2end = ptr2 + s2len;
ptr_ret = VARDATA(ret);
@ -625,10 +626,10 @@ lpad(PG_FUNCTION_ARGS)
ptr_ret += mlen;
ptr2 += mlen;
if (ptr2 == ptr2end) /* wrap around at end of s2 */
ptr2 = VARDATA(string2);
ptr2 = ptr2start;
}
ptr1 = VARDATA(string1);
ptr1 = VARDATA_ANY(string1);
while (s1len--)
{
@ -664,12 +665,13 @@ lpad(PG_FUNCTION_ARGS)
Datum
rpad(PG_FUNCTION_ARGS)
{
text *string1 = PG_GETARG_TEXT_P(0);
text *string1 = PG_GETARG_TEXT_PP(0);
int32 len = PG_GETARG_INT32(1);
text *string2 = PG_GETARG_TEXT_P(2);
text *string2 = PG_GETARG_TEXT_PP(2);
text *ret;
char *ptr1,
*ptr2,
*ptr2start,
*ptr2end,
*ptr_ret;
int m,
@ -682,15 +684,15 @@ rpad(PG_FUNCTION_ARGS)
if (len < 0)
len = 0;
s1len = VARSIZE(string1) - VARHDRSZ;
s1len = VARSIZE_ANY_EXHDR(string1);
if (s1len < 0)
s1len = 0; /* shouldn't happen */
s2len = VARSIZE(string2) - VARHDRSZ;
s2len = VARSIZE_ANY_EXHDR(string2);
if (s2len < 0)
s2len = 0; /* shouldn't happen */
s1len = pg_mbstrlen_with_len(VARDATA(string1), s1len);
s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
if (s1len > len)
s1len = len; /* truncate string1 to len chars */
@ -709,7 +711,7 @@ rpad(PG_FUNCTION_ARGS)
ret = (text *) palloc(VARHDRSZ + bytelen);
m = len - s1len;
ptr1 = VARDATA(string1);
ptr1 = VARDATA_ANY(string1);
ptr_ret = VARDATA(ret);
while (s1len--)
@ -721,7 +723,7 @@ rpad(PG_FUNCTION_ARGS)
ptr1 += mlen;
}
ptr2 = VARDATA(string2);
ptr2 = ptr2start = VARDATA_ANY(string2);
ptr2end = ptr2 + s2len;
while (m--)
@ -732,7 +734,7 @@ rpad(PG_FUNCTION_ARGS)
ptr_ret += mlen;
ptr2 += mlen;
if (ptr2 == ptr2end) /* wrap around at end of s2 */
ptr2 = VARDATA(string2);
ptr2 = ptr2start;
}
SET_VARSIZE(ret, ptr_ret - (char *) ret);
@ -759,12 +761,12 @@ rpad(PG_FUNCTION_ARGS)
Datum
btrim(PG_FUNCTION_ARGS)
{
text *string = PG_GETARG_TEXT_P(0);
text *set = PG_GETARG_TEXT_P(1);
text *string = PG_GETARG_TEXT_PP(0);
text *set = PG_GETARG_TEXT_PP(1);
text *ret;
ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
VARDATA(set), VARSIZE(set) - VARHDRSZ,
ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
true, true);
PG_RETURN_TEXT_P(ret);
@ -779,10 +781,10 @@ btrim(PG_FUNCTION_ARGS)
Datum
btrim1(PG_FUNCTION_ARGS)
{
text *string = PG_GETARG_TEXT_P(0);
text *string = PG_GETARG_TEXT_PP(0);
text *ret;
ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
" ", 1,
true, true);
@ -969,26 +971,33 @@ dotrim(const char *string, int stringlen,
Datum
byteatrim(PG_FUNCTION_ARGS)
{
bytea *string = PG_GETARG_BYTEA_P(0);
bytea *set = PG_GETARG_BYTEA_P(1);
bytea *string = PG_GETARG_BYTEA_PP(0);
bytea *set = PG_GETARG_BYTEA_PP(1);
bytea *ret;
char *ptr,
*end,
*ptr2,
*ptr2start,
*end2;
int m;
int m,
stringlen,
setlen;
if ((m = VARSIZE(string) - VARHDRSZ) <= 0 ||
(VARSIZE(set) - VARHDRSZ) <= 0)
stringlen = VARSIZE_ANY_EXHDR(string);
setlen = VARSIZE_ANY_EXHDR(set);
if (stringlen <= 0 || setlen <= 0)
PG_RETURN_BYTEA_P(string);
ptr = VARDATA(string);
end = VARDATA(string) + VARSIZE(string) - VARHDRSZ - 1;
end2 = VARDATA(set) + VARSIZE(set) - VARHDRSZ - 1;
m = stringlen;
ptr = VARDATA_ANY(string);
end = ptr + stringlen - 1;
ptr2start = VARDATA_ANY(set);
end2 = ptr2start + setlen - 1;
while (m > 0)
{
ptr2 = VARDATA(set);
ptr2 = ptr2start;
while (ptr2 <= end2)
{
if (*ptr == *ptr2)
@ -1003,7 +1012,7 @@ byteatrim(PG_FUNCTION_ARGS)
while (m > 0)
{
ptr2 = VARDATA(set);
ptr2 = ptr2start;
while (ptr2 <= end2)
{
if (*end == *ptr2)
@ -1041,12 +1050,12 @@ byteatrim(PG_FUNCTION_ARGS)
Datum
ltrim(PG_FUNCTION_ARGS)
{
text *string = PG_GETARG_TEXT_P(0);
text *set = PG_GETARG_TEXT_P(1);
text *string = PG_GETARG_TEXT_PP(0);
text *set = PG_GETARG_TEXT_PP(1);
text *ret;
ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
VARDATA(set), VARSIZE(set) - VARHDRSZ,
ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
true, false);
PG_RETURN_TEXT_P(ret);
@ -1061,10 +1070,10 @@ ltrim(PG_FUNCTION_ARGS)
Datum
ltrim1(PG_FUNCTION_ARGS)
{
text *string = PG_GETARG_TEXT_P(0);
text *string = PG_GETARG_TEXT_PP(0);
text *ret;
ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
" ", 1,
true, false);
@ -1089,12 +1098,12 @@ ltrim1(PG_FUNCTION_ARGS)
Datum
rtrim(PG_FUNCTION_ARGS)
{
text *string = PG_GETARG_TEXT_P(0);
text *set = PG_GETARG_TEXT_P(1);
text *string = PG_GETARG_TEXT_PP(0);
text *set = PG_GETARG_TEXT_PP(1);
text *ret;
ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
VARDATA(set), VARSIZE(set) - VARHDRSZ,
ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
false, true);
PG_RETURN_TEXT_P(ret);
@ -1109,10 +1118,10 @@ rtrim(PG_FUNCTION_ARGS)
Datum
rtrim1(PG_FUNCTION_ARGS)
{
text *string = PG_GETARG_TEXT_P(0);
text *string = PG_GETARG_TEXT_PP(0);
text *ret;
ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
" ", 1,
false, true);
@ -1140,9 +1149,9 @@ rtrim1(PG_FUNCTION_ARGS)
Datum
translate(PG_FUNCTION_ARGS)
{
text *string = PG_GETARG_TEXT_P(0);
text *from = PG_GETARG_TEXT_P(1);
text *to = PG_GETARG_TEXT_P(2);
text *string = PG_GETARG_TEXT_PP(0);
text *from = PG_GETARG_TEXT_PP(1);
text *to = PG_GETARG_TEXT_PP(2);
text *result;
char *from_ptr,
*to_ptr;
@ -1160,20 +1169,23 @@ translate(PG_FUNCTION_ARGS)
int source_len;
int from_index;
if ((m = VARSIZE(string) - VARHDRSZ) <= 0)
m = VARSIZE_ANY_EXHDR(string);
if (m <= 0)
PG_RETURN_TEXT_P(string);
fromlen = VARSIZE(from) - VARHDRSZ;
from_ptr = VARDATA(from);
tolen = VARSIZE(to) - VARHDRSZ;
to_ptr = VARDATA(to);
fromlen = VARSIZE_ANY_EXHDR(from);
from_ptr = VARDATA_ANY(from);
tolen = VARSIZE_ANY_EXHDR(to);
to_ptr = VARDATA_ANY(to);
str_len = VARSIZE_ANY_EXHDR(string);
source = VARDATA_ANY(string);
str_len = VARSIZE(string);
estimate_len = (tolen * 1.0 / fromlen + 0.5) * str_len;
estimate_len = estimate_len > str_len ? estimate_len : str_len;
result = (text *) palloc(estimate_len);
source = VARDATA(string);
result = (text *) palloc(estimate_len + VARHDRSZ);
target = VARDATA(result);
retlen = 0;
@ -1259,14 +1271,14 @@ translate(PG_FUNCTION_ARGS)
Datum
ascii(PG_FUNCTION_ARGS)
{
text *string = PG_GETARG_TEXT_P(0);
text *string = PG_GETARG_TEXT_PP(0);
int encoding = GetDatabaseEncoding();
unsigned char *data;
if (VARSIZE(string) <= VARHDRSZ)
if (VARSIZE_ANY_EXHDR(string) <= 0)
PG_RETURN_INT32(0);
data = (unsigned char *) VARDATA(string);
data = (unsigned char *) VARDATA_ANY(string);
if (encoding == PG_UTF8 && *data > 127)
{
@ -1434,19 +1446,20 @@ chr(PG_FUNCTION_ARGS)
Datum
repeat(PG_FUNCTION_ARGS)
{
text *string = PG_GETARG_TEXT_P(0);
text *string = PG_GETARG_TEXT_PP(0);
int32 count = PG_GETARG_INT32(1);
text *result;
int slen,
tlen;
int i;
char *cp;
char *cp,
*sp;
if (count < 0)
count = 0;
slen = (VARSIZE(string) - VARHDRSZ);
tlen = (VARHDRSZ + (count * slen));
slen = VARSIZE_ANY_EXHDR(string);
tlen = VARHDRSZ + (count * slen);
/* Check for integer overflow */
if (slen != 0 && count != 0)
@ -1464,9 +1477,10 @@ repeat(PG_FUNCTION_ARGS)
SET_VARSIZE(result, tlen);
cp = VARDATA(result);
sp = VARDATA_ANY(string);
for (i = 0; i < count; i++)
{
memcpy(cp, VARDATA(string), slen);
memcpy(cp, sp, slen);
cp += slen;
}

View file

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.73 2007/08/11 19:16:41 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.74 2007/09/21 22:52:52 tgl Exp $
*
* Alistair Crooks added the code for the regex caching
* agc - cached the regular expressions used - there's a good chance
@ -35,8 +35,8 @@
#include "utils/builtins.h"
#include "utils/guc.h"
#define PG_GETARG_TEXT_P_IF_EXISTS(_n) \
(PG_NARGS() > (_n) ? PG_GETARG_TEXT_P(_n) : NULL)
#define PG_GETARG_TEXT_PP_IF_EXISTS(_n) \
(PG_NARGS() > (_n) ? PG_GETARG_TEXT_PP(_n) : NULL)
/* GUC-settable flavor parameter */
@ -97,7 +97,8 @@ typedef struct regexp_matches_ctx
/* this structure describes one cached regular expression */
typedef struct cached_re_str
{
text *cre_pat; /* original RE (untoasted TEXT form) */
char *cre_pat; /* original RE (not null terminated!) */
int cre_pat_len; /* length of original RE, in bytes */
int cre_flags; /* compile flags: extended,icase etc */
regex_t cre_re; /* the compiled regular expression */
} cached_re_str;
@ -122,7 +123,7 @@ static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
*
* Returns regex_t *
*
* text_re --- the pattern, expressed as an *untoasted* TEXT object
* text_re --- the pattern, expressed as a TEXT object
* cflags --- compile options for the pattern
*
* Pattern is given in the database encoding. We internally convert to
@ -131,7 +132,8 @@ static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
static regex_t *
RE_compile_and_cache(text *text_re, int cflags)
{
int text_re_len = VARSIZE(text_re);
int text_re_len = VARSIZE_ANY_EXHDR(text_re);
char *text_re_val = VARDATA_ANY(text_re);
pg_wchar *pattern;
int pattern_len;
int i;
@ -146,9 +148,9 @@ RE_compile_and_cache(text *text_re, int cflags)
*/
for (i = 0; i < num_res; i++)
{
if (VARSIZE(re_array[i].cre_pat) == text_re_len &&
memcmp(re_array[i].cre_pat, text_re, text_re_len) == 0 &&
re_array[i].cre_flags == cflags)
if (re_array[i].cre_pat_len == text_re_len &&
re_array[i].cre_flags == cflags &&
memcmp(re_array[i].cre_pat, text_re_val, text_re_len) == 0)
{
/*
* Found a match; move it to front if not there already.
@ -170,10 +172,10 @@ RE_compile_and_cache(text *text_re, int cflags)
*/
/* Convert pattern string to wide characters */
pattern = (pg_wchar *) palloc((text_re_len - VARHDRSZ + 1) * sizeof(pg_wchar));
pattern_len = pg_mb2wchar_with_len(VARDATA(text_re),
pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
pattern_len = pg_mb2wchar_with_len(text_re_val,
pattern,
text_re_len - VARHDRSZ);
text_re_len);
regcomp_result = pg_regcomp(&re_temp.cre_re,
pattern,
@ -204,7 +206,8 @@ RE_compile_and_cache(text *text_re, int cflags)
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
memcpy(re_temp.cre_pat, text_re, text_re_len);
memcpy(re_temp.cre_pat, text_re_val, text_re_len);
re_temp.cre_pat_len = text_re_len;
re_temp.cre_flags = cflags;
/*
@ -308,7 +311,7 @@ RE_execute(regex_t *re, char *dat, int dat_len,
*
* Returns TRUE on match, FALSE on no match
*
* text_re --- the pattern, expressed as an *untoasted* TEXT object
* text_re --- the pattern, expressed as a TEXT object
* dat --- the data to match against (need not be null-terminated)
* dat_len --- the length of the data string
* cflags --- compile options for the pattern
@ -334,7 +337,7 @@ RE_compile_and_execute(text *text_re, char *dat, int dat_len,
* parse_re_flags - parse the options argument of regexp_matches and friends
*
* flags --- output argument, filled with desired options
* opts --- *untoasted* TEXT object, or NULL for defaults
* opts --- TEXT object, or NULL for defaults
*
* This accepts all the options allowed by any of the callers; callers that
* don't want some have to reject them after the fact.
@ -348,8 +351,8 @@ parse_re_flags(pg_re_flags *flags, text *opts)
if (opts)
{
char *opt_p = VARDATA(opts);
int opt_len = VARSIZE(opts) - VARHDRSZ;
char *opt_p = VARDATA_ANY(opts);
int opt_len = VARSIZE_ANY_EXHDR(opts);
int i;
for (i = 0; i < opt_len; i++)
@ -454,7 +457,7 @@ Datum
nameregexeq(PG_FUNCTION_ARGS)
{
Name n = PG_GETARG_NAME(0);
text *p = PG_GETARG_TEXT_P(1);
text *p = PG_GETARG_TEXT_PP(1);
PG_RETURN_BOOL(RE_compile_and_execute(p,
NameStr(*n),
@ -467,7 +470,7 @@ Datum
nameregexne(PG_FUNCTION_ARGS)
{
Name n = PG_GETARG_NAME(0);
text *p = PG_GETARG_TEXT_P(1);
text *p = PG_GETARG_TEXT_PP(1);
PG_RETURN_BOOL(!RE_compile_and_execute(p,
NameStr(*n),
@ -479,12 +482,12 @@ nameregexne(PG_FUNCTION_ARGS)
Datum
textregexeq(PG_FUNCTION_ARGS)
{
text *s = PG_GETARG_TEXT_P(0);
text *p = PG_GETARG_TEXT_P(1);
text *s = PG_GETARG_TEXT_PP(0);
text *p = PG_GETARG_TEXT_PP(1);
PG_RETURN_BOOL(RE_compile_and_execute(p,
VARDATA(s),
VARSIZE(s) - VARHDRSZ,
VARDATA_ANY(s),
VARSIZE_ANY_EXHDR(s),
regex_flavor,
0, NULL));
}
@ -492,12 +495,12 @@ textregexeq(PG_FUNCTION_ARGS)
Datum
textregexne(PG_FUNCTION_ARGS)
{
text *s = PG_GETARG_TEXT_P(0);
text *p = PG_GETARG_TEXT_P(1);
text *s = PG_GETARG_TEXT_PP(0);
text *p = PG_GETARG_TEXT_PP(1);
PG_RETURN_BOOL(!RE_compile_and_execute(p,
VARDATA(s),
VARSIZE(s) - VARHDRSZ,
VARDATA_ANY(s),
VARSIZE_ANY_EXHDR(s),
regex_flavor,
0, NULL));
}
@ -513,7 +516,7 @@ Datum
nameicregexeq(PG_FUNCTION_ARGS)
{
Name n = PG_GETARG_NAME(0);
text *p = PG_GETARG_TEXT_P(1);
text *p = PG_GETARG_TEXT_PP(1);
PG_RETURN_BOOL(RE_compile_and_execute(p,
NameStr(*n),
@ -526,7 +529,7 @@ Datum
nameicregexne(PG_FUNCTION_ARGS)
{
Name n = PG_GETARG_NAME(0);
text *p = PG_GETARG_TEXT_P(1);
text *p = PG_GETARG_TEXT_PP(1);
PG_RETURN_BOOL(!RE_compile_and_execute(p,
NameStr(*n),
@ -538,12 +541,12 @@ nameicregexne(PG_FUNCTION_ARGS)
Datum
texticregexeq(PG_FUNCTION_ARGS)
{
text *s = PG_GETARG_TEXT_P(0);
text *p = PG_GETARG_TEXT_P(1);
text *s = PG_GETARG_TEXT_PP(0);
text *p = PG_GETARG_TEXT_PP(1);
PG_RETURN_BOOL(RE_compile_and_execute(p,
VARDATA(s),
VARSIZE(s) - VARHDRSZ,
VARDATA_ANY(s),
VARSIZE_ANY_EXHDR(s),
regex_flavor | REG_ICASE,
0, NULL));
}
@ -551,12 +554,12 @@ texticregexeq(PG_FUNCTION_ARGS)
Datum
texticregexne(PG_FUNCTION_ARGS)
{
text *s = PG_GETARG_TEXT_P(0);
text *p = PG_GETARG_TEXT_P(1);
text *s = PG_GETARG_TEXT_PP(0);
text *p = PG_GETARG_TEXT_PP(1);
PG_RETURN_BOOL(!RE_compile_and_execute(p,
VARDATA(s),
VARSIZE(s) - VARHDRSZ,
VARDATA_ANY(s),
VARSIZE_ANY_EXHDR(s),
regex_flavor | REG_ICASE,
0, NULL));
}
@ -569,8 +572,8 @@ texticregexne(PG_FUNCTION_ARGS)
Datum
textregexsubstr(PG_FUNCTION_ARGS)
{
text *s = PG_GETARG_TEXT_P(0);
text *p = PG_GETARG_TEXT_P(1);
text *s = PG_GETARG_TEXT_PP(0);
text *p = PG_GETARG_TEXT_PP(1);
bool match;
regmatch_t pmatch[2];
@ -581,8 +584,8 @@ textregexsubstr(PG_FUNCTION_ARGS)
* return what the whole regexp matched.
*/
match = RE_compile_and_execute(p,
VARDATA(s),
VARSIZE(s) - VARHDRSZ,
VARDATA_ANY(s),
VARSIZE_ANY_EXHDR(s),
regex_flavor,
2, pmatch);
@ -620,9 +623,9 @@ textregexsubstr(PG_FUNCTION_ARGS)
Datum
textregexreplace_noopt(PG_FUNCTION_ARGS)
{
text *s = PG_GETARG_TEXT_P(0);
text *p = PG_GETARG_TEXT_P(1);
text *r = PG_GETARG_TEXT_P(2);
text *s = PG_GETARG_TEXT_PP(0);
text *p = PG_GETARG_TEXT_PP(1);
text *r = PG_GETARG_TEXT_PP(2);
regex_t *re;
re = RE_compile_and_cache(p, regex_flavor);
@ -637,10 +640,10 @@ textregexreplace_noopt(PG_FUNCTION_ARGS)
Datum
textregexreplace(PG_FUNCTION_ARGS)
{
text *s = PG_GETARG_TEXT_P(0);
text *p = PG_GETARG_TEXT_P(1);
text *r = PG_GETARG_TEXT_P(2);
text *opt = PG_GETARG_TEXT_P(3);
text *s = PG_GETARG_TEXT_PP(0);
text *p = PG_GETARG_TEXT_PP(1);
text *r = PG_GETARG_TEXT_PP(2);
text *opt = PG_GETARG_TEXT_PP(3);
regex_t *re;
pg_re_flags flags;
@ -673,9 +676,9 @@ similar_escape(PG_FUNCTION_ARGS)
/* This function is not strict, so must test explicitly */
if (PG_ARGISNULL(0))
PG_RETURN_NULL();
pat_text = PG_GETARG_TEXT_P(0);
p = VARDATA(pat_text);
plen = (VARSIZE(pat_text) - VARHDRSZ);
pat_text = PG_GETARG_TEXT_PP(0);
p = VARDATA_ANY(pat_text);
plen = VARSIZE_ANY_EXHDR(pat_text);
if (PG_ARGISNULL(1))
{
/* No ESCAPE clause provided; default to backslash as escape */
@ -684,9 +687,9 @@ similar_escape(PG_FUNCTION_ARGS)
}
else
{
esc_text = PG_GETARG_TEXT_P(1);
e = VARDATA(esc_text);
elen = (VARSIZE(esc_text) - VARHDRSZ);
esc_text = PG_GETARG_TEXT_PP(1);
e = VARDATA_ANY(esc_text);
elen = VARSIZE_ANY_EXHDR(esc_text);
if (elen == 0)
e = NULL; /* no escape character */
else if (elen != 1)
@ -785,8 +788,8 @@ regexp_matches(PG_FUNCTION_ARGS)
if (SRF_IS_FIRSTCALL())
{
text *pattern = PG_GETARG_TEXT_P(1);
text *flags = PG_GETARG_TEXT_P_IF_EXISTS(2);
text *pattern = PG_GETARG_TEXT_PP(1);
text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
MemoryContext oldcontext;
funcctx = SRF_FIRSTCALL_INIT();
@ -863,9 +866,9 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
matchctx->orig_str = orig_str;
/* convert string to pg_wchar form for matching */
orig_len = VARSIZE(orig_str) - VARHDRSZ;
orig_len = VARSIZE_ANY_EXHDR(orig_str);
wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1));
wide_len = pg_mb2wchar_with_len(VARDATA(orig_str), wide_str, orig_len);
wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
/* determine options */
parse_re_flags(&re_flags, flags);
@ -1043,8 +1046,8 @@ regexp_split_to_table(PG_FUNCTION_ARGS)
if (SRF_IS_FIRSTCALL())
{
text *pattern = PG_GETARG_TEXT_P(1);
text *flags = PG_GETARG_TEXT_P_IF_EXISTS(2);
text *pattern = PG_GETARG_TEXT_PP(1);
text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
MemoryContext oldcontext;
funcctx = SRF_FIRSTCALL_INIT();
@ -1091,9 +1094,9 @@ Datum regexp_split_to_array(PG_FUNCTION_ARGS)
ArrayBuildState *astate = NULL;
regexp_matches_ctx *splitctx;
splitctx = setup_regexp_matches(PG_GETARG_TEXT_P(0),
PG_GETARG_TEXT_P(1),
PG_GETARG_TEXT_P_IF_EXISTS(2),
splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0),
PG_GETARG_TEXT_PP(1),
PG_GETARG_TEXT_PP_IF_EXISTS(2),
true, false, true);
while (splitctx->next_match <= splitctx->nmatches)

View file

@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/fmgr.h,v 1.52 2007/07/25 12:22:52 mha Exp $
* $PostgreSQL: pgsql/src/include/fmgr.h,v 1.53 2007/09/21 22:52:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -220,6 +220,8 @@ extern struct varlena *pg_detoast_datum_packed(struct varlena * datum);
#define PG_GETARG_RAW_VARLENA_P(n) ((struct varlena *) PG_GETARG_POINTER(n))
/* use this if you want the input datum de-toasted: */
#define PG_GETARG_VARLENA_P(n) PG_DETOAST_DATUM(PG_GETARG_DATUM(n))
/* and this if you can handle 1-byte-header datums: */
#define PG_GETARG_VARLENA_PP(n) PG_DETOAST_DATUM_PACKED(PG_GETARG_DATUM(n))
/* DatumGetFoo macros for varlena types will typically look like this: */
#define DatumGetByteaP(X) ((bytea *) PG_DETOAST_DATUM(X))
#define DatumGetByteaPP(X) ((bytea *) PG_DETOAST_DATUM_PACKED(X))