Teach plpgsql's lexer about dollar-quoted literals.

Andrew Dunstan, some help from Tom Lane.
This commit is contained in:
Tom Lane 2004-02-25 18:10:51 +00:00
parent fa7a3abe87
commit 5ada9ef088
4 changed files with 118 additions and 30 deletions

View file

@ -4,7 +4,7 @@
* procedural language
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.50 2003/12/23 00:01:57 tgl Exp $
* $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.51 2004/02/25 18:10:51 tgl Exp $
*
* This software is copyrighted by Jan Wieck - Hamburg.
*
@ -1235,7 +1235,7 @@ stmt_raise : K_RAISE lno raise_level raise_msg raise_params ';'
raise_msg : T_STRING
{
$$ = strdup(yytext);
$$ = plpgsql_get_string_value();
}
;

View file

@ -3,7 +3,7 @@
* procedural language
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_exec.c,v 1.96 2004/02/24 01:44:33 tgl Exp $
* $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_exec.c,v 1.97 2004/02/25 18:10:51 tgl Exp $
*
* This software is copyrighted by Jan Wieck - Hamburg.
*
@ -1805,7 +1805,7 @@ exec_stmt_raise(PLpgSQL_execstate * estate, PLpgSQL_stmt_raise * stmt)
for (cp = stmt->message; *cp; cp++)
{
/*
* Occurences of a single % are replaced by the next argument's
* Occurrences of a single % are replaced by the next argument's
* external representation. Double %'s are converted to one %.
*/
if ((c[0] = *cp) == '%')
@ -1834,21 +1834,6 @@ exec_stmt_raise(PLpgSQL_execstate * estate, PLpgSQL_stmt_raise * stmt)
continue;
}
/*
* Occurrences of single ' are removed. double ' are reduced to
* single ones. We must do this because the parameter stored by
* the grammar is the raw T_STRING input literal, rather than the
* de-lexed string as you might expect ...
*/
if (*cp == '\'')
{
cp++;
if (*cp == '\'')
plpgsql_dstring_append(&ds, c);
else
cp--;
continue;
}
plpgsql_dstring_append(&ds, c);
}

View file

@ -3,7 +3,7 @@
* procedural language
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.43 2003/11/29 19:52:12 pgsql Exp $
* $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.44 2004/02/25 18:10:51 tgl Exp $
*
* This software is copyrighted by Jan Wieck - Hamburg.
*
@ -694,5 +694,6 @@ extern void plpgsql_push_back_token(int token);
extern int plpgsql_scanner_lineno(void);
extern void plpgsql_scanner_init(const char *str, int functype);
extern void plpgsql_scanner_finish(void);
extern char *plpgsql_get_string_value(void);
#endif /* PLPGSQL_H */

View file

@ -4,7 +4,7 @@
* procedural language
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.31 2004/02/24 22:06:32 tgl Exp $
* $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.32 2004/02/25 18:10:51 tgl Exp $
*
* This software is copyrighted by Jan Wieck - Hamburg.
*
@ -57,6 +57,8 @@ static int lookahead_token;
static bool have_lookahead_token;
static const char *cur_line_start;
static int cur_line_num;
static char *dolqstart; /* current $foo$ quote start string */
static int dolqlen; /* signal to plpgsql_get_string_value */
int plpgsql_SpaceScanned = 0;
%}
@ -70,7 +72,9 @@ int plpgsql_SpaceScanned = 0;
%option case-insensitive
%x IN_STRING IN_COMMENT
%x IN_STRING
%x IN_COMMENT
%x IN_DOLLARQUOTE
digit [0-9]
ident_start [A-Za-z\200-\377_]
@ -84,6 +88,14 @@ param \${digit}+
space [ \t\n\r\f]
/* $foo$ style quotes ("dollar quoting")
* copied straight from the backend SQL parser
*/
dolq_start [A-Za-z\200-\377_]
dolq_cont [A-Za-z\200-\377_0-9]
dolqdelim \$({dolq_start}{dolq_cont}*)?\$
dolqinside [^$]+
%%
/* ----------
* Local variables in scanner to remember where
@ -97,7 +109,7 @@ space [ \t\n\r\f]
* Reset the state when entering the scanner
* ----------
*/
BEGIN INITIAL;
BEGIN(INITIAL);
plpgsql_SpaceScanned = 0;
/* ----------
@ -247,9 +259,9 @@ dump { return O_DUMP; }
--[^\r\n]* ;
\/\* { start_lineno = plpgsql_scanner_lineno();
BEGIN IN_COMMENT;
BEGIN(IN_COMMENT);
}
<IN_COMMENT>\*\/ { BEGIN INITIAL; plpgsql_SpaceScanned = 1; }
<IN_COMMENT>\*\/ { BEGIN(INITIAL); plpgsql_SpaceScanned = 1; }
<IN_COMMENT>\n ;
<IN_COMMENT>. ;
<IN_COMMENT><<EOF>> {
@ -260,7 +272,7 @@ dump { return O_DUMP; }
}
/* ----------
* Collect anything inside of ''s and return one STRING
* Collect anything inside of ''s and return one STRING token
*
* Hacking yytext/yyleng here lets us avoid using yymore(), which is
* a win for performance. It's safe because we know the underlying
@ -270,15 +282,18 @@ dump { return O_DUMP; }
' {
start_lineno = plpgsql_scanner_lineno();
start_charpos = yytext;
BEGIN IN_STRING;
BEGIN(IN_STRING);
}
<IN_STRING>\\. { }
<IN_STRING>\\ { /* can only happen with \ at EOF */ }
<IN_STRING>'' { }
<IN_STRING>' {
yyleng -= (yytext - start_charpos);
/* tell plpgsql_get_string_value it's not a dollar quote */
dolqlen = 0;
/* adjust yytext/yyleng to describe whole string token */
yyleng += (yytext - start_charpos);
yytext = start_charpos;
BEGIN INITIAL;
BEGIN(INITIAL);
return T_STRING;
}
<IN_STRING>[^'\\]+ { }
@ -289,6 +304,43 @@ dump { return O_DUMP; }
errmsg("unterminated string")));
}
{dolqdelim} {
start_lineno = plpgsql_scanner_lineno();
start_charpos = yytext;
dolqstart = pstrdup(yytext);
BEGIN(IN_DOLLARQUOTE);
}
<IN_DOLLARQUOTE>{dolqdelim} {
if (strcmp(yytext, dolqstart) == 0)
{
pfree(dolqstart);
/* tell plpgsql_get_string_value it is a dollar quote */
dolqlen = yyleng;
/* adjust yytext/yyleng to describe whole string token */
yyleng += (yytext - start_charpos);
yytext = start_charpos;
BEGIN(INITIAL);
return T_STRING;
}
else
{
/*
* When we fail to match $...$ to dolqstart, transfer
* the $... part to the output, but put back the final
* $ for rescanning. Consider $delim$...$junk$delim$
*/
yyless(yyleng-1);
}
}
<IN_DOLLARQUOTE>{dolqinside} { }
<IN_DOLLARQUOTE>. { /* needed for $ inside the quoted text */ }
<IN_DOLLARQUOTE><<EOF>> {
plpgsql_error_lineno = start_lineno;
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
errmsg("unterminated dollar-quoted string")));
}
/* ----------
* Any unmatched character is returned as is
* ----------
@ -429,7 +481,6 @@ plpgsql_scanner_init(const char *str, int functype)
BEGIN(INITIAL);
}
/*
* Called after parsing is done to clean up after plpgsql_scanner_init()
*/
@ -439,3 +490,54 @@ plpgsql_scanner_finish(void)
yy_delete_buffer(scanbufhandle);
pfree(scanbuf);
}
/*
* Called after a T_STRING token is read to get the string literal's value
* as a malloc'd string. (We make this a separate call because in many
* scenarios there's no need to get the decoded value.)
*
* Note: we expect the literal to be the most recently lexed token. This
* would not work well if we supported multiple-token pushback or if
* plpgsql_yylex() wanted to read ahead beyond a T_STRING token.
*/
char *
plpgsql_get_string_value(void)
{
char *result;
const char *cp;
int len;
if (dolqlen > 0)
{
/* Token is a $foo$...$foo$ string */
len = yyleng - 2 * dolqlen;
Assert(len >= 0);
result = (char *) malloc(len + 1);
memcpy(result, yytext + dolqlen, len);
result[len] = '\0';
}
else
{
/* Token is a '...' string */
result = (char *) malloc(yyleng + 1); /* more than enough room */
len = 0;
for (cp = yytext; *cp; cp++)
{
if (*cp == '\'')
{
if (cp[1] == '\'')
result[len++] = *cp++;
/* else it must be string start or end quote */
}
else if (*cp == '\\')
{
if (cp[1] != '\0') /* just a paranoid check */
result[len++] = *(++cp);
}
else
result[len++] = *cp;
}
result[len] = '\0';
}
return result;
}