Revise plpgsql's scanner to process comments and string literals in a way
more nearly matching the core SQL scanner. The user-visible effects are: * Block comments (slash-star comments) now nest, as per SQL spec. * In standard_conforming_strings mode, backslash as the last character of a non-E string literal is now correctly taken as an ordinary character; formerly it was misinterpreted as escaping the ending quote. (Since the string also had to pass through the core scanner, this invariably led to syntax errors.) * Formerly, backslashes in the format string of RAISE were always treated as quoting the next character, regardless of mode. Now, they are ordinary characters with standard_conforming_strings on, while with it off, they introduce the same set of escapes as in the core SQL scanner. Also, escape_string_warning is now effective for RAISE format strings. These changes make RAISE format strings work just like any other string literal. This is implemented by copying and pasting a lot of logic from the core scanner. It would be a good idea to look into getting rid of plpgsql's scanner entirely in favor of using the core scanner. However, that involves more change than I can justify making during beta --- in particular, the core scanner would have to become re-entrant. In passing, remove the kluge that made the plpgsql scanner emit T_FUNCTION or T_TRIGGER as a made-up first token. That presumably had some value once upon a time, but now it's just useless complication for both the scanner and the grammar.
This commit is contained in:
parent
7f2f798b30
commit
3a624e9200
8 changed files with 395 additions and 233 deletions
|
@ -1,4 +1,4 @@
|
|||
<!-- $PostgreSQL: pgsql/doc/src/sgml/plpgsql.sgml,v 1.139 2009/04/02 19:20:45 momjian Exp $ -->
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/plpgsql.sgml,v 1.140 2009/04/19 18:52:56 tgl Exp $ -->
|
||||
|
||||
<chapter id="plpgsql">
|
||||
<title><application>PL/pgSQL</application> - <acronym>SQL</acronym> Procedural Language</title>
|
||||
|
@ -220,10 +220,8 @@ END <optional> <replaceable>label</replaceable> </optional>;
|
|||
There are two types of comments in <application>PL/pgSQL</>. A double
|
||||
dash (<literal>--</literal>) starts a comment that extends to the end of
|
||||
the line. A <literal>/*</literal> starts a block comment that extends to
|
||||
the next occurrence of <literal>*/</literal>. Block comments cannot be
|
||||
nested, but double dash comments can be enclosed into a block comment and
|
||||
a double dash can hide the block comment delimiters <literal>/*</literal>
|
||||
and <literal>*/</literal>.
|
||||
the next occurrence of <literal>*/</literal>. Block comments nest,
|
||||
just as in ordinary SQL.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.121 2009/02/18 11:33:04 petere Exp $
|
||||
* $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.122 2009/04/19 18:52:57 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -62,6 +62,8 @@ static PLpgSQL_row *make_scalar_list1(const char *initial_name,
|
|||
int lineno);
|
||||
static void check_sql_expr(const char *stmt);
|
||||
static void plpgsql_sql_error_callback(void *arg);
|
||||
static char *parse_string_token(const char *token);
|
||||
static void plpgsql_string_error_callback(void *arg);
|
||||
static char *check_label(const char *yytxt);
|
||||
static void check_labels(const char *start_label,
|
||||
const char *end_label);
|
||||
|
@ -228,8 +230,6 @@ static List *read_raise_options(void);
|
|||
/*
|
||||
* Other tokens
|
||||
*/
|
||||
%token T_FUNCTION
|
||||
%token T_TRIGGER
|
||||
%token T_STRING
|
||||
%token T_NUMBER
|
||||
%token T_SCALAR /* a VAR, RECFIELD, or TRIGARG */
|
||||
|
@ -244,13 +244,9 @@ static List *read_raise_options(void);
|
|||
|
||||
%%
|
||||
|
||||
pl_function : T_FUNCTION comp_optsect pl_block opt_semi
|
||||
pl_function : comp_optsect pl_block opt_semi
|
||||
{
|
||||
yylval.program = (PLpgSQL_stmt_block *)$3;
|
||||
}
|
||||
| T_TRIGGER comp_optsect pl_block opt_semi
|
||||
{
|
||||
yylval.program = (PLpgSQL_stmt_block *)$3;
|
||||
yylval.program = (PLpgSQL_stmt_block *) $2;
|
||||
}
|
||||
;
|
||||
|
||||
|
@ -1403,7 +1399,7 @@ stmt_raise : K_RAISE lno
|
|||
if (tok == T_STRING)
|
||||
{
|
||||
/* old style message and parameters */
|
||||
new->message = plpgsql_get_string_value();
|
||||
new->message = parse_string_token(yytext);
|
||||
/*
|
||||
* We expect either a semi-colon, which
|
||||
* indicates no parameters, or a comma that
|
||||
|
@ -1435,7 +1431,7 @@ stmt_raise : K_RAISE lno
|
|||
|
||||
if (yylex() != T_STRING)
|
||||
yyerror("syntax error");
|
||||
sqlstatestr = plpgsql_get_string_value();
|
||||
sqlstatestr = parse_string_token(yytext);
|
||||
|
||||
if (strlen(sqlstatestr) != 5)
|
||||
yyerror("invalid SQLSTATE code");
|
||||
|
@ -1778,7 +1774,7 @@ proc_condition : opt_lblname
|
|||
/* next token should be a string literal */
|
||||
if (yylex() != T_STRING)
|
||||
yyerror("syntax error");
|
||||
sqlstatestr = plpgsql_get_string_value();
|
||||
sqlstatestr = parse_string_token(yytext);
|
||||
|
||||
if (strlen(sqlstatestr) != 5)
|
||||
yyerror("invalid SQLSTATE code");
|
||||
|
@ -2738,6 +2734,49 @@ plpgsql_sql_error_callback(void *arg)
|
|||
errposition(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a string-literal token to the represented string value.
|
||||
*
|
||||
* To do this, we need to invoke the core lexer. To avoid confusion between
|
||||
* the core bison/flex definitions and our own, the actual invocation is in
|
||||
* pl_funcs.c. Here we are only concerned with setting up the right errcontext
|
||||
* state, which is handled the same as in check_sql_expr().
|
||||
*/
|
||||
static char *
|
||||
parse_string_token(const char *token)
|
||||
{
|
||||
char *result;
|
||||
ErrorContextCallback syntax_errcontext;
|
||||
ErrorContextCallback *previous_errcontext;
|
||||
|
||||
/* See comments in check_sql_expr() */
|
||||
Assert(error_context_stack->callback == plpgsql_compile_error_callback);
|
||||
|
||||
previous_errcontext = error_context_stack;
|
||||
syntax_errcontext.callback = plpgsql_string_error_callback;
|
||||
syntax_errcontext.arg = (char *) token;
|
||||
syntax_errcontext.previous = error_context_stack->previous;
|
||||
error_context_stack = &syntax_errcontext;
|
||||
|
||||
result = plpgsql_parse_string_token(token);
|
||||
|
||||
/* Restore former ereport callback */
|
||||
error_context_stack = previous_errcontext;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static void
|
||||
plpgsql_string_error_callback(void *arg)
|
||||
{
|
||||
Assert(plpgsql_error_funcname);
|
||||
|
||||
errcontext("string literal in PL/PgSQL function \"%s\" near line %d",
|
||||
plpgsql_error_funcname, plpgsql_error_lineno);
|
||||
/* representing the string literal as internalquery seems overkill */
|
||||
errposition(0);
|
||||
}
|
||||
|
||||
static char *
|
||||
check_label(const char *yytxt)
|
||||
{
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.134 2009/02/18 11:33:04 petere Exp $
|
||||
* $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.135 2009/04/19 18:52:57 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -261,7 +261,7 @@ do_compile(FunctionCallInfo fcinfo,
|
|||
bool forValidator)
|
||||
{
|
||||
Form_pg_proc procStruct = (Form_pg_proc) GETSTRUCT(procTup);
|
||||
int functype = CALLED_AS_TRIGGER(fcinfo) ? T_TRIGGER : T_FUNCTION;
|
||||
bool is_trigger = CALLED_AS_TRIGGER(fcinfo);
|
||||
Datum prosrcdatum;
|
||||
bool isnull;
|
||||
char *proc_source;
|
||||
|
@ -293,7 +293,7 @@ do_compile(FunctionCallInfo fcinfo,
|
|||
if (isnull)
|
||||
elog(ERROR, "null prosrc");
|
||||
proc_source = TextDatumGetCString(prosrcdatum);
|
||||
plpgsql_scanner_init(proc_source, functype);
|
||||
plpgsql_scanner_init(proc_source);
|
||||
|
||||
plpgsql_error_funcname = pstrdup(NameStr(procStruct->proname));
|
||||
plpgsql_error_lineno = 0;
|
||||
|
@ -359,13 +359,13 @@ do_compile(FunctionCallInfo fcinfo,
|
|||
function->fn_oid = fcinfo->flinfo->fn_oid;
|
||||
function->fn_xmin = HeapTupleHeaderGetXmin(procTup->t_data);
|
||||
function->fn_tid = procTup->t_self;
|
||||
function->fn_functype = functype;
|
||||
function->fn_is_trigger = is_trigger;
|
||||
function->fn_cxt = func_cxt;
|
||||
function->out_param_varno = -1; /* set up for no OUT param */
|
||||
|
||||
switch (functype)
|
||||
switch (is_trigger)
|
||||
{
|
||||
case T_FUNCTION:
|
||||
case false:
|
||||
|
||||
/*
|
||||
* Fetch info about the procedure's parameters. Allocations aren't
|
||||
|
@ -564,7 +564,7 @@ do_compile(FunctionCallInfo fcinfo,
|
|||
ReleaseSysCache(typeTup);
|
||||
break;
|
||||
|
||||
case T_TRIGGER:
|
||||
case true:
|
||||
/* Trigger procedure's return type is unknown yet */
|
||||
function->fn_rettype = InvalidOid;
|
||||
function->fn_retbyval = false;
|
||||
|
@ -645,7 +645,7 @@ do_compile(FunctionCallInfo fcinfo,
|
|||
break;
|
||||
|
||||
default:
|
||||
elog(ERROR, "unrecognized function typecode: %u", functype);
|
||||
elog(ERROR, "unrecognized function typecode: %d", (int) is_trigger);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -790,7 +790,7 @@ plpgsql_parse_word(const char *word)
|
|||
* Recognize tg_argv when compiling triggers
|
||||
* (XXX this sucks, it should be a regular variable in the namestack)
|
||||
*/
|
||||
if (plpgsql_curr_compile->fn_functype == T_TRIGGER)
|
||||
if (plpgsql_curr_compile->fn_is_trigger)
|
||||
{
|
||||
if (strcmp(cp[0], "tg_argv") == 0)
|
||||
{
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.76 2009/02/18 11:33:04 petere Exp $
|
||||
* $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.77 2009/04/19 18:52:57 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -17,6 +17,8 @@
|
|||
|
||||
#include <ctype.h>
|
||||
|
||||
#include "parser/gramparse.h"
|
||||
#include "parser/gram.h"
|
||||
#include "parser/scansup.h"
|
||||
|
||||
|
||||
|
@ -459,6 +461,41 @@ plpgsql_convert_ident(const char *s, char **output, int numidents)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* plpgsql_parse_string_token - get the value represented by a string literal
|
||||
*
|
||||
* We do not make plpgsql's lexer produce the represented value, because
|
||||
* in many cases we don't need it. Instead this function is invoked when
|
||||
* we do need it. The input is the T_STRING token as identified by the lexer.
|
||||
*
|
||||
* The result is a palloc'd string.
|
||||
*
|
||||
* Note: this is called only from plpgsql's gram.y, but we can't just put it
|
||||
* there because including parser/gram.h there would cause confusion.
|
||||
*/
|
||||
char *
|
||||
plpgsql_parse_string_token(const char *token)
|
||||
{
|
||||
int ctoken;
|
||||
|
||||
/*
|
||||
* We use the core lexer to do the dirty work. Aside from getting the
|
||||
* right results for escape sequences and so on, this helps us produce
|
||||
* appropriate warnings for escape_string_warning etc.
|
||||
*/
|
||||
scanner_init(token);
|
||||
|
||||
ctoken = base_yylex();
|
||||
|
||||
if (ctoken != SCONST)
|
||||
elog(ERROR, "unexpected result from base lexer: %d", ctoken);
|
||||
|
||||
scanner_finish();
|
||||
|
||||
return base_yylval.str;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Statement type as a string, for use in error messages etc.
|
||||
*/
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.110 2009/04/09 02:57:53 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.111 2009/04/19 18:52:57 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -650,7 +650,7 @@ typedef struct PLpgSQL_function
|
|||
Oid fn_oid;
|
||||
TransactionId fn_xmin;
|
||||
ItemPointerData fn_tid;
|
||||
int fn_functype;
|
||||
bool fn_is_trigger;
|
||||
PLpgSQL_func_hashkey *fn_hashkey; /* back-link to hashtable key */
|
||||
MemoryContext fn_cxt;
|
||||
|
||||
|
@ -880,6 +880,7 @@ extern void plpgsql_ns_rename(char *oldname, char *newname);
|
|||
* ----------
|
||||
*/
|
||||
extern void plpgsql_convert_ident(const char *s, char **output, int numidents);
|
||||
extern char *plpgsql_parse_string_token(const char *token);
|
||||
extern const char *plpgsql_stmt_typename(PLpgSQL_stmt *stmt);
|
||||
extern void plpgsql_dumptree(PLpgSQL_function *func);
|
||||
|
||||
|
@ -894,8 +895,7 @@ extern int plpgsql_yylex(void);
|
|||
extern void plpgsql_push_back_token(int token);
|
||||
extern void plpgsql_yyerror(const char *message);
|
||||
extern int plpgsql_scanner_lineno(void);
|
||||
extern void plpgsql_scanner_init(const char *str, int functype);
|
||||
extern void plpgsql_scanner_init(const char *str);
|
||||
extern void plpgsql_scanner_finish(void);
|
||||
extern char *plpgsql_get_string_value(void);
|
||||
|
||||
#endif /* PLPGSQL_H */
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.67 2009/02/18 11:33:04 petere Exp $
|
||||
* $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.68 2009/04/19 18:52:57 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
@ -19,27 +19,31 @@
|
|||
#include "mb/pg_wchar.h"
|
||||
|
||||
|
||||
/* No reason to constrain amount of data slurped */
|
||||
#define YY_READ_BUF_SIZE 16777216
|
||||
|
||||
/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
|
||||
#undef fprintf
|
||||
#define fprintf(file, fmt, msg) ereport(ERROR, (errmsg_internal("%s", msg)))
|
||||
|
||||
/*
|
||||
* When we parse a token that requires multiple lexer rules to process,
|
||||
* remember the token's starting position this way.
|
||||
*/
|
||||
#define SAVE_TOKEN_START() \
|
||||
( start_lineno = plpgsql_scanner_lineno(), start_charpos = yytext )
|
||||
|
||||
/* Handles to the buffer that the lexer uses internally */
|
||||
static YY_BUFFER_STATE scanbufhandle;
|
||||
static char *scanbuf;
|
||||
|
||||
static const char *scanstr; /* original input string */
|
||||
|
||||
static int scanner_functype;
|
||||
static bool scanner_typereported;
|
||||
static int pushback_token;
|
||||
static bool have_pushback_token;
|
||||
static const char *cur_line_start;
|
||||
static int cur_line_num;
|
||||
static int xcdepth = 0; /* depth of nesting in slash-star comments */
|
||||
static char *dolqstart; /* current $foo$ quote start string */
|
||||
static int dolqlen; /* signal to plpgsql_get_string_value */
|
||||
|
||||
extern bool standard_conforming_strings;
|
||||
|
||||
bool plpgsql_SpaceScanned = false;
|
||||
%}
|
||||
|
@ -54,31 +58,73 @@ bool plpgsql_SpaceScanned = false;
|
|||
|
||||
%option case-insensitive
|
||||
|
||||
/*
|
||||
* Exclusive states are a subset of the core lexer's:
|
||||
* <xc> extended C-style comments
|
||||
* <xq> standard quoted strings
|
||||
* <xe> extended quoted strings (support backslash escape sequences)
|
||||
* <xdolq> $foo$ quoted strings
|
||||
*/
|
||||
|
||||
%x IN_STRING
|
||||
%x IN_COMMENT
|
||||
%x IN_DOLLARQUOTE
|
||||
%x xc
|
||||
%x xe
|
||||
%x xq
|
||||
%x xdolq
|
||||
|
||||
/*
|
||||
* Definitions --- these generally must match the core lexer, but in some
|
||||
* cases we can simplify, since we only care about identifying the token
|
||||
* boundaries and not about deriving the represented value. Also, we
|
||||
* aren't trying to lex multicharacter operators so their interactions
|
||||
* with comments go away.
|
||||
*/
|
||||
|
||||
space [ \t\n\r\f]
|
||||
horiz_space [ \t\f]
|
||||
newline [\n\r]
|
||||
non_newline [^\n\r]
|
||||
|
||||
comment ("--"{non_newline}*)
|
||||
|
||||
whitespace ({space}+|{comment})
|
||||
special_whitespace ({space}+|{comment}{newline})
|
||||
horiz_whitespace ({horiz_space}|{comment})
|
||||
whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
|
||||
|
||||
quote '
|
||||
quotestop {quote}{whitespace}*
|
||||
quotecontinue {quote}{whitespace_with_newline}{quote}
|
||||
quotefail {quote}{whitespace}*"-"
|
||||
|
||||
xestart [eE]{quote}
|
||||
xeinside [^\\']+
|
||||
xeescape [\\].
|
||||
|
||||
xqstart {quote}
|
||||
xqdouble {quote}{quote}
|
||||
xqinside [^']+
|
||||
|
||||
dolq_start [A-Za-z\200-\377_]
|
||||
dolq_cont [A-Za-z\200-\377_0-9]
|
||||
dolqdelim \$({dolq_start}{dolq_cont}*)?\$
|
||||
dolqfailed \${dolq_start}{dolq_cont}*
|
||||
dolqinside [^$]+
|
||||
|
||||
xcstart \/\*
|
||||
xcstop \*+\/
|
||||
xcinside [^*/]+
|
||||
|
||||
digit [0-9]
|
||||
ident_start [A-Za-z\200-\377_]
|
||||
ident_cont [A-Za-z\200-\377_0-9\$]
|
||||
|
||||
/* This is a simpler treatment of quoted identifiers than the core uses */
|
||||
quoted_ident (\"[^\"]*\")+
|
||||
|
||||
identifier ({ident_start}{ident_cont}*|{quoted_ident})
|
||||
|
||||
param \${digit}+
|
||||
|
||||
space [ \t\n\r\f]
|
||||
|
||||
/* $foo$ style quotes ("dollar quoting")
|
||||
* copied straight from the backend SQL parser
|
||||
*/
|
||||
dolq_start [A-Za-z\200-\377_]
|
||||
dolq_cont [A-Za-z\200-\377_0-9]
|
||||
dolqdelim \$({dolq_start}{dolq_cont}*)?\$
|
||||
dolqinside [^$]+
|
||||
|
||||
%%
|
||||
/* ----------
|
||||
* Local variables in scanner to remember where
|
||||
|
@ -95,17 +141,6 @@ dolqinside [^$]+
|
|||
BEGIN(INITIAL);
|
||||
plpgsql_SpaceScanned = false;
|
||||
|
||||
/* ----------
|
||||
* On the first call to a new source report the
|
||||
* function's type (T_FUNCTION or T_TRIGGER)
|
||||
* ----------
|
||||
*/
|
||||
if (!scanner_typereported)
|
||||
{
|
||||
scanner_typereported = true;
|
||||
return scanner_functype;
|
||||
}
|
||||
|
||||
/* ----------
|
||||
* The keyword rules
|
||||
* ----------
|
||||
|
@ -225,119 +260,134 @@ dump { return O_DUMP; }
|
|||
|
||||
{digit}+ { return T_NUMBER; }
|
||||
|
||||
\". {
|
||||
plpgsql_error_lineno = plpgsql_scanner_lineno();
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
||||
errmsg("unterminated quoted identifier")));
|
||||
}
|
||||
\". { yyerror("unterminated quoted identifier"); }
|
||||
|
||||
/* ----------
|
||||
* Ignore whitespaces but remember this happened
|
||||
* Ignore whitespace (including comments) but remember this happened
|
||||
* ----------
|
||||
*/
|
||||
{space}+ { plpgsql_SpaceScanned = true; }
|
||||
{whitespace} { plpgsql_SpaceScanned = true; }
|
||||
|
||||
/* ----------
|
||||
* Eat up comments
|
||||
* Comment and literal handling is mostly copied from the core lexer
|
||||
* ----------
|
||||
*/
|
||||
--[^\r\n]* ;
|
||||
{xcstart} {
|
||||
/* Set location in case of syntax error in comment */
|
||||
SAVE_TOKEN_START();
|
||||
xcdepth = 0;
|
||||
BEGIN(xc);
|
||||
plpgsql_SpaceScanned = true;
|
||||
}
|
||||
|
||||
\/\* { start_lineno = plpgsql_scanner_lineno();
|
||||
BEGIN(IN_COMMENT);
|
||||
}
|
||||
<IN_COMMENT>\*\/ { BEGIN(INITIAL); plpgsql_SpaceScanned = true; }
|
||||
<IN_COMMENT>\n ;
|
||||
<IN_COMMENT>. ;
|
||||
<IN_COMMENT><<EOF>> {
|
||||
plpgsql_error_lineno = start_lineno;
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
||||
errmsg("unterminated /* comment")));
|
||||
}
|
||||
<xc>{xcstart} {
|
||||
xcdepth++;
|
||||
}
|
||||
|
||||
/* ----------
|
||||
* Collect anything inside of ''s and return one STRING token
|
||||
*
|
||||
* Hacking yytext/yyleng here lets us avoid using yymore(), which is
|
||||
* a win for performance. It's safe because we know the underlying
|
||||
* input buffer is not changing.
|
||||
* ----------
|
||||
*/
|
||||
' {
|
||||
start_lineno = plpgsql_scanner_lineno();
|
||||
start_charpos = yytext;
|
||||
BEGIN(IN_STRING);
|
||||
}
|
||||
[eE]' {
|
||||
/* for now, treat the same as a regular literal */
|
||||
start_lineno = plpgsql_scanner_lineno();
|
||||
start_charpos = yytext;
|
||||
BEGIN(IN_STRING);
|
||||
}
|
||||
<IN_STRING>\\. { }
|
||||
<IN_STRING>\\ { /* can only happen with \ at EOF */ }
|
||||
<IN_STRING>'' { }
|
||||
<IN_STRING>' {
|
||||
/* tell plpgsql_get_string_value it's not a dollar quote */
|
||||
dolqlen = 0;
|
||||
/* adjust yytext/yyleng to describe whole string token */
|
||||
yyleng += (yytext - start_charpos);
|
||||
yytext = start_charpos;
|
||||
BEGIN(INITIAL);
|
||||
return T_STRING;
|
||||
}
|
||||
<IN_STRING>[^'\\]+ { }
|
||||
<IN_STRING><<EOF>> {
|
||||
plpgsql_error_lineno = start_lineno;
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
||||
errmsg("unterminated quoted string")));
|
||||
}
|
||||
<xc>{xcstop} {
|
||||
if (xcdepth <= 0)
|
||||
BEGIN(INITIAL);
|
||||
else
|
||||
xcdepth--;
|
||||
}
|
||||
|
||||
{dolqdelim} {
|
||||
start_lineno = plpgsql_scanner_lineno();
|
||||
start_charpos = yytext;
|
||||
dolqstart = pstrdup(yytext);
|
||||
BEGIN(IN_DOLLARQUOTE);
|
||||
}
|
||||
<IN_DOLLARQUOTE>{dolqdelim} {
|
||||
if (strcmp(yytext, dolqstart) == 0)
|
||||
{
|
||||
pfree(dolqstart);
|
||||
/* tell plpgsql_get_string_value it is a dollar quote */
|
||||
dolqlen = yyleng;
|
||||
<xc>{xcinside} {
|
||||
/* ignore */
|
||||
}
|
||||
|
||||
<xc>\/+ {
|
||||
/* ignore */
|
||||
}
|
||||
|
||||
<xc>\*+ {
|
||||
/* ignore */
|
||||
}
|
||||
|
||||
<xc><<EOF>> { yyerror("unterminated /* comment"); }
|
||||
|
||||
{xqstart} {
|
||||
SAVE_TOKEN_START();
|
||||
if (standard_conforming_strings)
|
||||
BEGIN(xq);
|
||||
else
|
||||
BEGIN(xe);
|
||||
}
|
||||
{xestart} {
|
||||
SAVE_TOKEN_START();
|
||||
BEGIN(xe);
|
||||
}
|
||||
<xq,xe>{quotestop} |
|
||||
<xq,xe>{quotefail} {
|
||||
yyless(1);
|
||||
BEGIN(INITIAL);
|
||||
/* adjust yytext/yyleng to describe whole string token */
|
||||
yyleng += (yytext - start_charpos);
|
||||
yytext = start_charpos;
|
||||
BEGIN(INITIAL);
|
||||
return T_STRING;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* When we fail to match $...$ to dolqstart, transfer
|
||||
* the $... part to the output, but put back the final
|
||||
* $ for rescanning. Consider $delim$...$junk$delim$
|
||||
*/
|
||||
yyless(yyleng-1);
|
||||
}
|
||||
}
|
||||
<IN_DOLLARQUOTE>{dolqinside} { }
|
||||
<IN_DOLLARQUOTE>. { /* needed for $ inside the quoted text */ }
|
||||
<IN_DOLLARQUOTE><<EOF>> {
|
||||
plpgsql_error_lineno = start_lineno;
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATATYPE_MISMATCH),
|
||||
errmsg("unterminated dollar-quoted string")));
|
||||
}
|
||||
}
|
||||
<xq,xe>{xqdouble} {
|
||||
}
|
||||
<xq>{xqinside} {
|
||||
}
|
||||
<xe>{xeinside} {
|
||||
}
|
||||
<xe>{xeescape} {
|
||||
}
|
||||
<xq,xe>{quotecontinue} {
|
||||
/* ignore */
|
||||
}
|
||||
<xe>. {
|
||||
/* This is only needed for \ just before EOF */
|
||||
}
|
||||
<xq,xe><<EOF>> { yyerror("unterminated quoted string"); }
|
||||
|
||||
{dolqdelim} {
|
||||
SAVE_TOKEN_START();
|
||||
dolqstart = pstrdup(yytext);
|
||||
BEGIN(xdolq);
|
||||
}
|
||||
{dolqfailed} {
|
||||
/* throw back all but the initial "$" */
|
||||
yyless(1);
|
||||
/* and treat it as {other} */
|
||||
return yytext[0];
|
||||
}
|
||||
<xdolq>{dolqdelim} {
|
||||
if (strcmp(yytext, dolqstart) == 0)
|
||||
{
|
||||
pfree(dolqstart);
|
||||
BEGIN(INITIAL);
|
||||
/* adjust yytext/yyleng to describe whole string */
|
||||
yyleng += (yytext - start_charpos);
|
||||
yytext = start_charpos;
|
||||
return T_STRING;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* When we fail to match $...$ to dolqstart, transfer
|
||||
* the $... part to the output, but put back the final
|
||||
* $ for rescanning. Consider $delim$...$junk$delim$
|
||||
*/
|
||||
yyless(yyleng-1);
|
||||
}
|
||||
}
|
||||
<xdolq>{dolqinside} {
|
||||
}
|
||||
<xdolq>{dolqfailed} {
|
||||
}
|
||||
<xdolq>. {
|
||||
/* This is only needed for $ inside the quoted text */
|
||||
}
|
||||
<xdolq><<EOF>> { yyerror("unterminated dollar-quoted string"); }
|
||||
|
||||
/* ----------
|
||||
* Any unmatched character is returned as is
|
||||
* ----------
|
||||
*/
|
||||
. { return yytext[0]; }
|
||||
. {
|
||||
return yytext[0];
|
||||
}
|
||||
|
||||
%%
|
||||
|
||||
|
@ -437,7 +487,7 @@ plpgsql_scanner_lineno(void)
|
|||
* to cite in error messages.
|
||||
*/
|
||||
void
|
||||
plpgsql_scanner_init(const char *str, int functype)
|
||||
plpgsql_scanner_init(const char *str)
|
||||
{
|
||||
Size slen;
|
||||
|
||||
|
@ -460,9 +510,6 @@ plpgsql_scanner_init(const char *str, int functype)
|
|||
/* Other setup */
|
||||
scanstr = str;
|
||||
|
||||
scanner_functype = functype;
|
||||
scanner_typereported = false;
|
||||
|
||||
have_pushback_token = false;
|
||||
|
||||
cur_line_start = scanbuf;
|
||||
|
@ -493,77 +540,3 @@ plpgsql_scanner_finish(void)
|
|||
yy_delete_buffer(scanbufhandle);
|
||||
pfree(scanbuf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called after a T_STRING token is read to get the string literal's value
|
||||
* as a palloc'd string. (We make this a separate call because in many
|
||||
* scenarios there's no need to get the decoded value.)
|
||||
*
|
||||
* Note: we expect the literal to be the most recently lexed token. This
|
||||
* would not work well if we supported multiple-token pushback or if
|
||||
* plpgsql_yylex() wanted to read ahead beyond a T_STRING token.
|
||||
*/
|
||||
char *
|
||||
plpgsql_get_string_value(void)
|
||||
{
|
||||
char *result;
|
||||
const char *cp;
|
||||
int len;
|
||||
|
||||
if (dolqlen > 0)
|
||||
{
|
||||
/* Token is a $foo$...$foo$ string */
|
||||
len = yyleng - 2 * dolqlen;
|
||||
Assert(len >= 0);
|
||||
result = (char *) palloc(len + 1);
|
||||
memcpy(result, yytext + dolqlen, len);
|
||||
result[len] = '\0';
|
||||
}
|
||||
else if (*yytext == 'E' || *yytext == 'e')
|
||||
{
|
||||
/* Token is an E'...' string */
|
||||
result = (char *) palloc(yyleng + 1); /* more than enough room */
|
||||
len = 0;
|
||||
for (cp = yytext + 2; *cp; cp++)
|
||||
{
|
||||
if (*cp == '\'')
|
||||
{
|
||||
if (cp[1] == '\'')
|
||||
result[len++] = *cp++;
|
||||
/* else it must be string end quote */
|
||||
}
|
||||
else if (*cp == '\\')
|
||||
{
|
||||
if (cp[1] != '\0') /* just a paranoid check */
|
||||
result[len++] = *(++cp);
|
||||
}
|
||||
else
|
||||
result[len++] = *cp;
|
||||
}
|
||||
result[len] = '\0';
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Token is a '...' string */
|
||||
result = (char *) palloc(yyleng + 1); /* more than enough room */
|
||||
len = 0;
|
||||
for (cp = yytext + 1; *cp; cp++)
|
||||
{
|
||||
if (*cp == '\'')
|
||||
{
|
||||
if (cp[1] == '\'')
|
||||
result[len++] = *cp++;
|
||||
/* else it must be string end quote */
|
||||
}
|
||||
else if (*cp == '\\')
|
||||
{
|
||||
if (cp[1] != '\0') /* just a paranoid check */
|
||||
result[len++] = *(++cp);
|
||||
}
|
||||
else
|
||||
result[len++] = *cp;
|
||||
}
|
||||
result[len] = '\0';
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -3737,3 +3737,74 @@ SELECT * FROM leaker_1(true);
|
|||
|
||||
DROP FUNCTION leaker_1(bool);
|
||||
DROP FUNCTION leaker_2(bool);
|
||||
-- Test handling of string literals.
|
||||
set standard_conforming_strings = off;
|
||||
create or replace function strtest() returns text as $$
|
||||
begin
|
||||
raise notice 'foo\\bar\041baz';
|
||||
return 'foo\\bar\041baz';
|
||||
end
|
||||
$$ language plpgsql;
|
||||
WARNING: nonstandard use of \\ in a string literal
|
||||
HINT: Use the escape string syntax for backslashes, e.g., E'\\'.
|
||||
CONTEXT: string literal in PL/PgSQL function "strtest" near line 2
|
||||
WARNING: nonstandard use of \\ in a string literal
|
||||
LINE 1: SELECT 'foo\\bar\041baz'
|
||||
^
|
||||
HINT: Use the escape string syntax for backslashes, e.g., E'\\'.
|
||||
QUERY: SELECT 'foo\\bar\041baz'
|
||||
CONTEXT: SQL statement in PL/PgSQL function "strtest" near line 3
|
||||
select strtest();
|
||||
NOTICE: foo\bar!baz
|
||||
WARNING: nonstandard use of \\ in a string literal
|
||||
LINE 1: SELECT 'foo\\bar\041baz'
|
||||
^
|
||||
HINT: Use the escape string syntax for backslashes, e.g., E'\\'.
|
||||
QUERY: SELECT 'foo\\bar\041baz'
|
||||
CONTEXT: PL/pgSQL function "strtest" line 3 at RETURN
|
||||
strtest
|
||||
-------------
|
||||
foo\bar!baz
|
||||
(1 row)
|
||||
|
||||
create or replace function strtest() returns text as $$
|
||||
begin
|
||||
raise notice E'foo\\bar\041baz';
|
||||
return E'foo\\bar\041baz';
|
||||
end
|
||||
$$ language plpgsql;
|
||||
select strtest();
|
||||
NOTICE: foo\bar!baz
|
||||
strtest
|
||||
-------------
|
||||
foo\bar!baz
|
||||
(1 row)
|
||||
|
||||
set standard_conforming_strings = on;
|
||||
create or replace function strtest() returns text as $$
|
||||
begin
|
||||
raise notice 'foo\\bar\041baz\';
|
||||
return 'foo\\bar\041baz\';
|
||||
end
|
||||
$$ language plpgsql;
|
||||
select strtest();
|
||||
NOTICE: foo\\bar\041baz\
|
||||
strtest
|
||||
------------------
|
||||
foo\\bar\041baz\
|
||||
(1 row)
|
||||
|
||||
create or replace function strtest() returns text as $$
|
||||
begin
|
||||
raise notice E'foo\\bar\041baz';
|
||||
return E'foo\\bar\041baz';
|
||||
end
|
||||
$$ language plpgsql;
|
||||
select strtest();
|
||||
NOTICE: foo\bar!baz
|
||||
strtest
|
||||
-------------
|
||||
foo\bar!baz
|
||||
(1 row)
|
||||
|
||||
drop function strtest();
|
||||
|
|
|
@ -3005,3 +3005,47 @@ SELECT * FROM leaker_1(true);
|
|||
|
||||
DROP FUNCTION leaker_1(bool);
|
||||
DROP FUNCTION leaker_2(bool);
|
||||
|
||||
-- Test handling of string literals.
|
||||
|
||||
set standard_conforming_strings = off;
|
||||
|
||||
create or replace function strtest() returns text as $$
|
||||
begin
|
||||
raise notice 'foo\\bar\041baz';
|
||||
return 'foo\\bar\041baz';
|
||||
end
|
||||
$$ language plpgsql;
|
||||
|
||||
select strtest();
|
||||
|
||||
create or replace function strtest() returns text as $$
|
||||
begin
|
||||
raise notice E'foo\\bar\041baz';
|
||||
return E'foo\\bar\041baz';
|
||||
end
|
||||
$$ language plpgsql;
|
||||
|
||||
select strtest();
|
||||
|
||||
set standard_conforming_strings = on;
|
||||
|
||||
create or replace function strtest() returns text as $$
|
||||
begin
|
||||
raise notice 'foo\\bar\041baz\';
|
||||
return 'foo\\bar\041baz\';
|
||||
end
|
||||
$$ language plpgsql;
|
||||
|
||||
select strtest();
|
||||
|
||||
create or replace function strtest() returns text as $$
|
||||
begin
|
||||
raise notice E'foo\\bar\041baz';
|
||||
return E'foo\\bar\041baz';
|
||||
end
|
||||
$$ language plpgsql;
|
||||
|
||||
select strtest();
|
||||
|
||||
drop function strtest();
|
||||
|
|
Loading…
Reference in a new issue