Revise plpgsql's scanner to process comments and string literals in a way

more nearly matching the core SQL scanner. The user-visible effects are: * Block comments (slash-star comments) now nest, as per SQL spec. * In standard_conforming_strings mode, backslash as the last character of a non-E string literal is now correctly taken as an ordinary character; formerly it was misinterpreted as escaping the ending quote. (Since the string also had to pass through the core scanner, this invariably led to syntax errors.) * Formerly, backslashes in the format string of RAISE were always treated as quoting the next character, regardless of mode. Now, they are ordinary characters with standard_conforming_strings on, while with it off, they introduce the same set of escapes as in the core SQL scanner. Also, escape_string_warning is now effective for RAISE format strings. These changes make RAISE format strings work just like any other string literal. This is implemented by copying and pasting a lot of logic from the core scanner. It would be a good idea to look into getting rid of plpgsql's scanner entirely in favor of using the core scanner. However, that involves more change than I can justify making during beta --- in particular, the core scanner would have to become re-entrant. In passing, remove the kluge that made the plpgsql scanner emit T_FUNCTION or T_TRIGGER as a made-up first token. That presumably had some value once upon a time, but now it's just useless complication for both the scanner and the grammar.
2009-04-19 18:52:58 +00:00 · 2009-04-19 18:52:58 +00:00 · 3a624e9200
commit 3a624e9200
parent 7f2f798b30
8 changed files with 395 additions and 233 deletions
--- a/doc/src/sgml/plpgsql.sgml
+++ b/doc/src/sgml/plpgsql.sgml
@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/plpgsql.sgml,v 1.139 2009/04/02 19:20:45 momjian Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/plpgsql.sgml,v 1.140 2009/04/19 18:52:56 tgl Exp $ -->

 <chapter id="plpgsql">
  <title><application>PL/pgSQL</application> - <acronym>SQL</acronym> Procedural Language</title>
@ -220,10 +220,8 @@ END <optional> <replaceable>label</replaceable> </optional>;
     There are two types of comments in <application>PL/pgSQL</>. A double
     dash (<literal>--</literal>) starts a comment that extends to the end of
     the line. A <literal>/*</literal> starts a block comment that extends to
-     the next occurrence of <literal>*/</literal>.  Block comments cannot be
-     nested, but double dash comments can be enclosed into a block comment and
-     a double dash can hide the block comment delimiters <literal>/*</literal>
-     and <literal>*/</literal>.
+     the next occurrence of <literal>*/</literal>.  Block comments nest,
+     just as in ordinary SQL.
    </para>

    <para>
--- a/src/pl/plpgsql/src/gram.y
+++ b/src/pl/plpgsql/src/gram.y
@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.121 2009/02/18 11:33:04 petere Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.122 2009/04/19 18:52:57 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -62,6 +62,8 @@ static PLpgSQL_row		*make_scalar_list1(const char *initial_name,
 										   int lineno);
 static	void			 check_sql_expr(const char *stmt);
 static	void			 plpgsql_sql_error_callback(void *arg);
+static	char			*parse_string_token(const char *token);
+static	void			 plpgsql_string_error_callback(void *arg);
 static	char			*check_label(const char *yytxt);
 static	void			 check_labels(const char *start_label,
 									  const char *end_label);
@ -228,8 +230,6 @@ static List				*read_raise_options(void);
 		/*
 		 * Other tokens
 		 */
-%token	T_FUNCTION
-%token	T_TRIGGER
 %token	T_STRING
 %token	T_NUMBER
 %token	T_SCALAR				/* a VAR, RECFIELD, or TRIGARG */
@ -244,13 +244,9 @@ static List				*read_raise_options(void);

 %%

-pl_function		: T_FUNCTION comp_optsect pl_block opt_semi
+pl_function		: comp_optsect pl_block opt_semi
 					{
-						yylval.program = (PLpgSQL_stmt_block *)$3;
-					}
-				| T_TRIGGER comp_optsect pl_block opt_semi
-					{
-						yylval.program = (PLpgSQL_stmt_block *)$3;
+						yylval.program = (PLpgSQL_stmt_block *) $2;
 					}
 				;

@ -1403,7 +1399,7 @@ stmt_raise		: K_RAISE lno
 							if (tok == T_STRING)
 							{
 								/* old style message and parameters */
-								new->message = plpgsql_get_string_value();
+								new->message = parse_string_token(yytext);
 								/*
 								 * We expect either a semi-colon, which
 								 * indicates no parameters, or a comma that
@ -1435,7 +1431,7 @@ stmt_raise		: K_RAISE lno

 									if (yylex() != T_STRING)
 										yyerror("syntax error");
-									sqlstatestr = plpgsql_get_string_value();
+									sqlstatestr = parse_string_token(yytext);

 									if (strlen(sqlstatestr) != 5)
 										yyerror("invalid SQLSTATE code");
@ -1778,7 +1774,7 @@ proc_condition	: opt_lblname
 							/* next token should be a string literal */
 							if (yylex() != T_STRING)
 								yyerror("syntax error");
-							sqlstatestr = plpgsql_get_string_value();
+							sqlstatestr = parse_string_token(yytext);

 							if (strlen(sqlstatestr) != 5)
 								yyerror("invalid SQLSTATE code");
@ -2738,6 +2734,49 @@ plpgsql_sql_error_callback(void *arg)
 	errposition(0);
 }

+/*
+ * Convert a string-literal token to the represented string value.
+ *
+ * To do this, we need to invoke the core lexer.  To avoid confusion between
+ * the core bison/flex definitions and our own, the actual invocation is in
+ * pl_funcs.c.  Here we are only concerned with setting up the right errcontext
+ * state, which is handled the same as in check_sql_expr().
+ */
+static char *
+parse_string_token(const char *token)
+{
+	char	   *result;
+	ErrorContextCallback  syntax_errcontext;
+	ErrorContextCallback *previous_errcontext;
+
+	/* See comments in check_sql_expr() */
+	Assert(error_context_stack->callback == plpgsql_compile_error_callback);
+
+	previous_errcontext = error_context_stack;
+	syntax_errcontext.callback = plpgsql_string_error_callback;
+	syntax_errcontext.arg = (char *) token;
+	syntax_errcontext.previous = error_context_stack->previous;
+	error_context_stack = &syntax_errcontext;
+
+	result = plpgsql_parse_string_token(token);
+
+	/* Restore former ereport callback */
+	error_context_stack = previous_errcontext;
+
+	return result;
+}
+
+static void
+plpgsql_string_error_callback(void *arg)
+{
+	Assert(plpgsql_error_funcname);
+
+	errcontext("string literal in PL/PgSQL function \"%s\" near line %d",
+			   plpgsql_error_funcname, plpgsql_error_lineno);
+	/* representing the string literal as internalquery seems overkill */
+	errposition(0);
+}
+
 static char *
 check_label(const char *yytxt)
 {
--- a/src/pl/plpgsql/src/pl_comp.c
+++ b/src/pl/plpgsql/src/pl_comp.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.134 2009/02/18 11:33:04 petere Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.135 2009/04/19 18:52:57 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -261,7 +261,7 @@ do_compile(FunctionCallInfo fcinfo,
 		   bool forValidator)
 {
 	Form_pg_proc procStruct = (Form_pg_proc) GETSTRUCT(procTup);
-	int			functype = CALLED_AS_TRIGGER(fcinfo) ? T_TRIGGER : T_FUNCTION;
+	bool		is_trigger = CALLED_AS_TRIGGER(fcinfo);
 	Datum		prosrcdatum;
 	bool		isnull;
 	char	   *proc_source;
@ -293,7 +293,7 @@ do_compile(FunctionCallInfo fcinfo,
 	if (isnull)
 		elog(ERROR, "null prosrc");
 	proc_source = TextDatumGetCString(prosrcdatum);
-	plpgsql_scanner_init(proc_source, functype);
+	plpgsql_scanner_init(proc_source);

 	plpgsql_error_funcname = pstrdup(NameStr(procStruct->proname));
 	plpgsql_error_lineno = 0;
@ -359,13 +359,13 @@ do_compile(FunctionCallInfo fcinfo,
 	function->fn_oid = fcinfo->flinfo->fn_oid;
 	function->fn_xmin = HeapTupleHeaderGetXmin(procTup->t_data);
 	function->fn_tid = procTup->t_self;
-	function->fn_functype = functype;
+	function->fn_is_trigger = is_trigger;
 	function->fn_cxt = func_cxt;
 	function->out_param_varno = -1;		/* set up for no OUT param */

-	switch (functype)
+	switch (is_trigger)
 	{
-		case T_FUNCTION:
+		case false:

 			/*
 			 * Fetch info about the procedure's parameters. Allocations aren't
@ -564,7 +564,7 @@ do_compile(FunctionCallInfo fcinfo,
 			ReleaseSysCache(typeTup);
 			break;

-		case T_TRIGGER:
+		case true:
 			/* Trigger procedure's return type is unknown yet */
 			function->fn_rettype = InvalidOid;
 			function->fn_retbyval = false;
@ -645,7 +645,7 @@ do_compile(FunctionCallInfo fcinfo,
 			break;

 		default:
-			elog(ERROR, "unrecognized function typecode: %u", functype);
+			elog(ERROR, "unrecognized function typecode: %d", (int) is_trigger);
 			break;
 	}

@ -790,7 +790,7 @@ plpgsql_parse_word(const char *word)
 	 * Recognize tg_argv when compiling triggers
 	 * (XXX this sucks, it should be a regular variable in the namestack)
 	 */
-	if (plpgsql_curr_compile->fn_functype == T_TRIGGER)
+	if (plpgsql_curr_compile->fn_is_trigger)
 	{
 		if (strcmp(cp[0], "tg_argv") == 0)
 		{
--- a/src/pl/plpgsql/src/pl_funcs.c
+++ b/src/pl/plpgsql/src/pl_funcs.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.76 2009/02/18 11:33:04 petere Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.77 2009/04/19 18:52:57 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -17,6 +17,8 @@

 #include <ctype.h>

+#include "parser/gramparse.h"
+#include "parser/gram.h"
 #include "parser/scansup.h"


@ -459,6 +461,41 @@ plpgsql_convert_ident(const char *s, char **output, int numidents)
 }


+/*
+ * plpgsql_parse_string_token - get the value represented by a string literal
+ *
+ * We do not make plpgsql's lexer produce the represented value, because
+ * in many cases we don't need it.  Instead this function is invoked when
+ * we do need it.  The input is the T_STRING token as identified by the lexer.
+ *
+ * The result is a palloc'd string.
+ *
+ * Note: this is called only from plpgsql's gram.y, but we can't just put it
+ * there because including parser/gram.h there would cause confusion.
+ */
+char *
+plpgsql_parse_string_token(const char *token)
+{
+	int		ctoken;
+
+	/*
+	 * We use the core lexer to do the dirty work.  Aside from getting the
+	 * right results for escape sequences and so on, this helps us produce
+	 * appropriate warnings for escape_string_warning etc.
+	 */
+	scanner_init(token);
+
+	ctoken = base_yylex();
+
+	if (ctoken != SCONST)
+		elog(ERROR, "unexpected result from base lexer: %d", ctoken);
+
+	scanner_finish();
+
+	return base_yylval.str;
+}
+
+
 /*
 * Statement type as a string, for use in error messages etc.
 */
--- a/src/pl/plpgsql/src/plpgsql.h
+++ b/src/pl/plpgsql/src/plpgsql.h
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.110 2009/04/09 02:57:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.111 2009/04/19 18:52:57 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -650,7 +650,7 @@ typedef struct PLpgSQL_function
 	Oid			fn_oid;
 	TransactionId fn_xmin;
 	ItemPointerData fn_tid;
-	int			fn_functype;
+	bool		fn_is_trigger;
 	PLpgSQL_func_hashkey *fn_hashkey;	/* back-link to hashtable key */
 	MemoryContext fn_cxt;

@ -880,6 +880,7 @@ extern void plpgsql_ns_rename(char *oldname, char *newname);
 * ----------
 */
 extern void plpgsql_convert_ident(const char *s, char **output, int numidents);
+extern char *plpgsql_parse_string_token(const char *token);
 extern const char *plpgsql_stmt_typename(PLpgSQL_stmt *stmt);
 extern void plpgsql_dumptree(PLpgSQL_function *func);

@ -894,8 +895,7 @@ extern int	plpgsql_yylex(void);
 extern void plpgsql_push_back_token(int token);
 extern void plpgsql_yyerror(const char *message);
 extern int	plpgsql_scanner_lineno(void);
-extern void plpgsql_scanner_init(const char *str, int functype);
+extern void plpgsql_scanner_init(const char *str);
 extern void plpgsql_scanner_finish(void);
-extern char *plpgsql_get_string_value(void);

 #endif   /* PLPGSQL_H */
--- a/src/pl/plpgsql/src/scan.l
+++ b/src/pl/plpgsql/src/scan.l
@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.67 2009/02/18 11:33:04 petere Exp $
+ *	  $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.68 2009/04/19 18:52:57 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -19,27 +19,31 @@
 #include "mb/pg_wchar.h"


-/* No reason to constrain amount of data slurped */
-#define YY_READ_BUF_SIZE 16777216
-
 /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
 #undef fprintf
 #define fprintf(file, fmt, msg)  ereport(ERROR, (errmsg_internal("%s", msg)))

+/*
+ * When we parse a token that requires multiple lexer rules to process,
+ * remember the token's starting position this way.
+ */
+#define SAVE_TOKEN_START()  \
+	( start_lineno = plpgsql_scanner_lineno(), start_charpos = yytext )
+
 /* Handles to the buffer that the lexer uses internally */
 static YY_BUFFER_STATE scanbufhandle;
 static char *scanbuf;

 static const char *scanstr;		/* original input string */

-static int	scanner_functype;
-static bool	scanner_typereported;
 static int	pushback_token;
 static bool have_pushback_token;
 static const char *cur_line_start;
 static int	cur_line_num;
+static int		xcdepth = 0;	/* depth of nesting in slash-star comments */
 static char    *dolqstart;      /* current $foo$ quote start string */
-static int	dolqlen;			/* signal to plpgsql_get_string_value */
+
+extern bool		standard_conforming_strings;

 bool plpgsql_SpaceScanned = false;
 %}
@ -54,31 +58,73 @@ bool plpgsql_SpaceScanned = false;

 %option case-insensitive

+/*
+ * Exclusive states are a subset of the core lexer's:
+ *  <xc> extended C-style comments
+ *  <xq> standard quoted strings
+ *  <xe> extended quoted strings (support backslash escape sequences)
+ *  <xdolq> $foo$ quoted strings
+ */

-%x	IN_STRING
-%x	IN_COMMENT
-%x	IN_DOLLARQUOTE
+%x xc
+%x xe
+%x xq
+%x xdolq
+
+/*
+ * Definitions --- these generally must match the core lexer, but in some
+ * cases we can simplify, since we only care about identifying the token
+ * boundaries and not about deriving the represented value.  Also, we
+ * aren't trying to lex multicharacter operators so their interactions
+ * with comments go away.
+ */
+
+space			[ \t\n\r\f]
+horiz_space		[ \t\f]
+newline			[\n\r]
+non_newline		[^\n\r]
+
+comment			("--"{non_newline}*)
+
+whitespace		({space}+|{comment})
+special_whitespace		({space}+|{comment}{newline})
+horiz_whitespace		({horiz_space}|{comment})
+whitespace_with_newline	({horiz_whitespace}*{newline}{special_whitespace}*)
+
+quote			'
+quotestop		{quote}{whitespace}*
+quotecontinue	{quote}{whitespace_with_newline}{quote}
+quotefail		{quote}{whitespace}*"-"
+
+xestart			[eE]{quote}
+xeinside		[^\\']+
+xeescape		[\\].
+
+xqstart			{quote}
+xqdouble		{quote}{quote}
+xqinside		[^']+
+
+dolq_start		[A-Za-z\200-\377_]
+dolq_cont		[A-Za-z\200-\377_0-9]
+dolqdelim		\$({dolq_start}{dolq_cont}*)?\$
+dolqfailed		\${dolq_start}{dolq_cont}*
+dolqinside		[^$]+
+
+xcstart			\/\*
+xcstop			\*+\/
+xcinside		[^*/]+

 digit			[0-9]
 ident_start		[A-Za-z\200-\377_]
 ident_cont		[A-Za-z\200-\377_0-9\$]

+/* This is a simpler treatment of quoted identifiers than the core uses */
 quoted_ident	(\"[^\"]*\")+

 identifier		({ident_start}{ident_cont}*|{quoted_ident})

 param			\${digit}+

-space			[ \t\n\r\f]
-
-/* $foo$ style quotes ("dollar quoting")
- * copied straight from the backend SQL parser
- */
-dolq_start		[A-Za-z\200-\377_]
-dolq_cont		[A-Za-z\200-\377_0-9]
-dolqdelim		\$({dolq_start}{dolq_cont}*)?\$
-dolqinside		[^$]+
-
 %%
    /* ----------
     * Local variables in scanner to remember where
@ -95,17 +141,6 @@ dolqinside		[^$]+
    BEGIN(INITIAL);
    plpgsql_SpaceScanned = false;

-    /* ----------
-     * On the first call to a new source report the
-     * function's type (T_FUNCTION or T_TRIGGER)
-     * ----------
-     */
-	if (!scanner_typereported)
-	{
-		scanner_typereported = true;
-		return scanner_functype;
-	}
-
    /* ----------
     * The keyword rules
     * ----------
@ -225,119 +260,134 @@ dump			{ return O_DUMP;			}

 {digit}+		{ return T_NUMBER;			}

-\".				{
-				plpgsql_error_lineno = plpgsql_scanner_lineno();
-				ereport(ERROR,
-						(errcode(ERRCODE_DATATYPE_MISMATCH),
-						 errmsg("unterminated quoted identifier")));
-			}
+\".				{ yyerror("unterminated quoted identifier"); }

    /* ----------
-     * Ignore whitespaces but remember this happened
+     * Ignore whitespace (including comments) but remember this happened
     * ----------
     */
-{space}+		{ plpgsql_SpaceScanned = true;		}
+{whitespace}	{ plpgsql_SpaceScanned = true; }

    /* ----------
-     * Eat up comments
+     * Comment and literal handling is mostly copied from the core lexer
     * ----------
     */
--[^\r\n]*		;
+{xcstart}		{
+					/* Set location in case of syntax error in comment */
+					SAVE_TOKEN_START();
+					xcdepth = 0;
+					BEGIN(xc);
+					plpgsql_SpaceScanned = true;
+				}

-\/\*			{ start_lineno = plpgsql_scanner_lineno();
-			  BEGIN(IN_COMMENT);
-			}
-<IN_COMMENT>\*\/	{ BEGIN(INITIAL); plpgsql_SpaceScanned = true; }
-<IN_COMMENT>\n		;
-<IN_COMMENT>.		;
-<IN_COMMENT><<EOF>>	{
-				plpgsql_error_lineno = start_lineno;
-				ereport(ERROR,
-						(errcode(ERRCODE_DATATYPE_MISMATCH),
-						 errmsg("unterminated /* comment")));
-			}
+<xc>{xcstart}	{
+					xcdepth++;
+				}

-    /* ----------
-     * Collect anything inside of ''s and return one STRING token
-	 *
-	 * Hacking yytext/yyleng here lets us avoid using yymore(), which is
-	 * a win for performance.  It's safe because we know the underlying
-	 * input buffer is not changing.
-     * ----------
-     */
-'			{
-			  start_lineno = plpgsql_scanner_lineno();
-			  start_charpos = yytext;
-			  BEGIN(IN_STRING);
-			}
-[eE]'		{
-			  /* for now, treat the same as a regular literal */
-			  start_lineno = plpgsql_scanner_lineno();
-			  start_charpos = yytext;
-			  BEGIN(IN_STRING);
-			}
-<IN_STRING>\\.		{ }
-<IN_STRING>\\		{ /* can only happen with \ at EOF */ }
-<IN_STRING>''		{ }
-<IN_STRING>'		{
-			  /* tell plpgsql_get_string_value it's not a dollar quote */
-			  dolqlen = 0;
-			  /* adjust yytext/yyleng to describe whole string token */
-			  yyleng += (yytext - start_charpos);
-			  yytext = start_charpos;
-			  BEGIN(INITIAL);
-			  return T_STRING;
-			}
-<IN_STRING>[^'\\]+	{ }
-<IN_STRING><<EOF>>	{
-				plpgsql_error_lineno = start_lineno;
-				ereport(ERROR,
-						(errcode(ERRCODE_DATATYPE_MISMATCH),
-						 errmsg("unterminated quoted string")));
-			}
+<xc>{xcstop}	{
+					if (xcdepth <= 0)
+						BEGIN(INITIAL);
+					else
+						xcdepth--;
+				}

-{dolqdelim}		{
-			  start_lineno = plpgsql_scanner_lineno();
-			  start_charpos = yytext;
-			  dolqstart = pstrdup(yytext);
-			  BEGIN(IN_DOLLARQUOTE);
-			}
-<IN_DOLLARQUOTE>{dolqdelim} {
-			  if (strcmp(yytext, dolqstart) == 0)
-			  {
-					pfree(dolqstart);
-					/* tell plpgsql_get_string_value it is a dollar quote */
-					dolqlen = yyleng;
+<xc>{xcinside}	{
+					/* ignore */
+				}
+
+<xc>\/+			{
+					/* ignore */
+				}
+
+<xc>\*+			{
+					/* ignore */
+				}
+
+<xc><<EOF>>		{ yyerror("unterminated /* comment"); }
+
+{xqstart}		{
+					SAVE_TOKEN_START();
+					if (standard_conforming_strings)
+						BEGIN(xq);
+					else
+						BEGIN(xe);
+				}
+{xestart}		{
+					SAVE_TOKEN_START();
+					BEGIN(xe);
+				}
+<xq,xe>{quotestop}	|
+<xq,xe>{quotefail} {
+					yyless(1);
+					BEGIN(INITIAL);
 					/* adjust yytext/yyleng to describe whole string token */
 					yyleng += (yytext - start_charpos);
 					yytext = start_charpos;
-					BEGIN(INITIAL);
 					return T_STRING;
-			  }
-			  else
-			  {
-					/*
-					 * When we fail to match $...$ to dolqstart, transfer
-					 * the $... part to the output, but put back the final
-					 * $ for rescanning.  Consider $delim$...$junk$delim$
-					 */
-					yyless(yyleng-1);
-			  }
-			}
-<IN_DOLLARQUOTE>{dolqinside} { }
-<IN_DOLLARQUOTE>.	{ /* needed for $ inside the quoted text */ }
-<IN_DOLLARQUOTE><<EOF>>	{
-				plpgsql_error_lineno = start_lineno;
-				ereport(ERROR,
-						(errcode(ERRCODE_DATATYPE_MISMATCH),
-						 errmsg("unterminated dollar-quoted string")));
-			}
+				}
+<xq,xe>{xqdouble} {
+				}
+<xq>{xqinside}  {
+				}
+<xe>{xeinside}  {
+				}
+<xe>{xeescape}  {
+				}
+<xq,xe>{quotecontinue} {
+					/* ignore */
+				}
+<xe>.			{
+					/* This is only needed for \ just before EOF */
+				}
+<xq,xe><<EOF>>		{ yyerror("unterminated quoted string"); }
+
+{dolqdelim}		{
+					SAVE_TOKEN_START();
+					dolqstart = pstrdup(yytext);
+					BEGIN(xdolq);
+				}
+{dolqfailed}	{
+					/* throw back all but the initial "$" */
+					yyless(1);
+					/* and treat it as {other} */
+					return yytext[0];
+				}
+<xdolq>{dolqdelim} {
+					if (strcmp(yytext, dolqstart) == 0)
+					{
+						pfree(dolqstart);
+						BEGIN(INITIAL);
+						/* adjust yytext/yyleng to describe whole string */
+						yyleng += (yytext - start_charpos);
+						yytext = start_charpos;
+						return T_STRING;
+					}
+					else
+					{
+						/*
+						 * When we fail to match $...$ to dolqstart, transfer
+						 * the $... part to the output, but put back the final
+						 * $ for rescanning.  Consider $delim$...$junk$delim$
+						 */
+						yyless(yyleng-1);
+					}
+				}
+<xdolq>{dolqinside} {
+				}
+<xdolq>{dolqfailed} {
+				}
+<xdolq>.		{
+					/* This is only needed for $ inside the quoted text */
+				}
+<xdolq><<EOF>>	{ yyerror("unterminated dollar-quoted string"); }

    /* ----------
     * Any unmatched character is returned as is
     * ----------
     */
-.			{ return yytext[0];			}
+.				{
+					return yytext[0];
+				}

 %%

@ -437,7 +487,7 @@ plpgsql_scanner_lineno(void)
 * to cite in error messages.
 */
 void
-plpgsql_scanner_init(const char *str, int functype)
+plpgsql_scanner_init(const char *str)
 {
 	Size	slen;

@ -460,9 +510,6 @@ plpgsql_scanner_init(const char *str, int functype)
 	/* Other setup */
 	scanstr = str;

-    scanner_functype = functype;
-    scanner_typereported = false;
-
 	have_pushback_token = false;

 	cur_line_start = scanbuf;
@ -493,77 +540,3 @@ plpgsql_scanner_finish(void)
 	yy_delete_buffer(scanbufhandle);
 	pfree(scanbuf);
 }
-
-/*
- * Called after a T_STRING token is read to get the string literal's value
- * as a palloc'd string.  (We make this a separate call because in many
- * scenarios there's no need to get the decoded value.)
- *
- * Note: we expect the literal to be the most recently lexed token.  This
- * would not work well if we supported multiple-token pushback or if
- * plpgsql_yylex() wanted to read ahead beyond a T_STRING token.
- */
-char *
-plpgsql_get_string_value(void)
-{
-	char	   *result;
-	const char *cp;
-	int			len;
-
-	if (dolqlen > 0)
-	{
-		/* Token is a $foo$...$foo$ string */
-		len = yyleng - 2 * dolqlen;
-		Assert(len >= 0);
-		result = (char *) palloc(len + 1);
-		memcpy(result, yytext + dolqlen, len);
-		result[len] = '\0';
-	}
-	else if (*yytext == 'E' || *yytext == 'e')
-	{
-		/* Token is an E'...' string */
-		result = (char *) palloc(yyleng + 1);	/* more than enough room */
-		len = 0;
-		for (cp = yytext + 2; *cp; cp++)
-		{
-			if (*cp == '\'')
-			{
-				if (cp[1] == '\'')
-					result[len++] = *cp++;
-				/* else it must be string end quote */
-			}
-			else if (*cp == '\\')
-			{
-				if (cp[1] != '\0')	/* just a paranoid check */
-					result[len++] = *(++cp);
-			}
-			else
-				result[len++] = *cp;
-		}
-		result[len] = '\0';
-	}
-	else
-	{
-		/* Token is a '...' string */
-		result = (char *) palloc(yyleng + 1);	/* more than enough room */
-		len = 0;
-		for (cp = yytext + 1; *cp; cp++)
-		{
-			if (*cp == '\'')
-			{
-				if (cp[1] == '\'')
-					result[len++] = *cp++;
-				/* else it must be string end quote */
-			}
-			else if (*cp == '\\')
-			{
-				if (cp[1] != '\0')	/* just a paranoid check */
-					result[len++] = *(++cp);
-			}
-			else
-				result[len++] = *cp;
-		}
-		result[len] = '\0';
-	}
-	return result;
-}
--- a/src/test/regress/expected/plpgsql.out
+++ b/src/test/regress/expected/plpgsql.out
@ -3737,3 +3737,74 @@ SELECT * FROM leaker_1(true);

 DROP FUNCTION leaker_1(bool);
 DROP FUNCTION leaker_2(bool);
+-- Test handling of string literals.
+set standard_conforming_strings = off;
+create or replace function strtest() returns text as $$
+begin
+  raise notice 'foo\\bar\041baz';
+  return 'foo\\bar\041baz';
+end
+$$ language plpgsql;
+WARNING:  nonstandard use of \\ in a string literal
+HINT:  Use the escape string syntax for backslashes, e.g., E'\\'.
+CONTEXT:  string literal in PL/PgSQL function "strtest" near line 2
+WARNING:  nonstandard use of \\ in a string literal
+LINE 1: SELECT  'foo\\bar\041baz'
+                ^
+HINT:  Use the escape string syntax for backslashes, e.g., E'\\'.
+QUERY:  SELECT  'foo\\bar\041baz'
+CONTEXT:  SQL statement in PL/PgSQL function "strtest" near line 3
+select strtest();
+NOTICE:  foo\bar!baz
+WARNING:  nonstandard use of \\ in a string literal
+LINE 1: SELECT  'foo\\bar\041baz'
+                ^
+HINT:  Use the escape string syntax for backslashes, e.g., E'\\'.
+QUERY:  SELECT  'foo\\bar\041baz'
+CONTEXT:  PL/pgSQL function "strtest" line 3 at RETURN
+   strtest   
+-------------
+ foo\bar!baz
+(1 row)
+
+create or replace function strtest() returns text as $$
+begin
+  raise notice E'foo\\bar\041baz';
+  return E'foo\\bar\041baz';
+end
+$$ language plpgsql;
+select strtest();
+NOTICE:  foo\bar!baz
+   strtest   
+-------------
+ foo\bar!baz
+(1 row)
+
+set standard_conforming_strings = on;
+create or replace function strtest() returns text as $$
+begin
+  raise notice 'foo\\bar\041baz\';
+  return 'foo\\bar\041baz\';
+end
+$$ language plpgsql;
+select strtest();
+NOTICE:  foo\\bar\041baz\
+     strtest      
+------------------
+ foo\\bar\041baz\
+(1 row)
+
+create or replace function strtest() returns text as $$
+begin
+  raise notice E'foo\\bar\041baz';
+  return E'foo\\bar\041baz';
+end
+$$ language plpgsql;
+select strtest();
+NOTICE:  foo\bar!baz
+   strtest   
+-------------
+ foo\bar!baz
+(1 row)
+
+drop function strtest();
--- a/src/test/regress/sql/plpgsql.sql
+++ b/src/test/regress/sql/plpgsql.sql
@ -3005,3 +3005,47 @@ SELECT * FROM leaker_1(true);

 DROP FUNCTION leaker_1(bool);
 DROP FUNCTION leaker_2(bool);
+
+-- Test handling of string literals.
+
+set standard_conforming_strings = off;
+
+create or replace function strtest() returns text as $$
+begin
+  raise notice 'foo\\bar\041baz';
+  return 'foo\\bar\041baz';
+end
+$$ language plpgsql;
+
+select strtest();
+
+create or replace function strtest() returns text as $$
+begin
+  raise notice E'foo\\bar\041baz';
+  return E'foo\\bar\041baz';
+end
+$$ language plpgsql;
+
+select strtest();
+
+set standard_conforming_strings = on;
+
+create or replace function strtest() returns text as $$
+begin
+  raise notice 'foo\\bar\041baz\';
+  return 'foo\\bar\041baz\';
+end
+$$ language plpgsql;
+
+select strtest();
+
+create or replace function strtest() returns text as $$
+begin
+  raise notice E'foo\\bar\041baz';
+  return E'foo\\bar\041baz';
+end
+$$ language plpgsql;
+
+select strtest();
+
+drop function strtest();