Tweak the core scanner so that it can be used by plpgsql too.

Changes: Pass in the keyword lookup array instead of having it be hardwired. (This incidentally allows elimination of some duplicate coding in ecpg.) Re-order the token declarations in gram.y so that non-keyword tokens have numbers that won't change when keywords are added or removed. Add ".." and ":=" to the set of tokens recognized by scan.l. (Since these combinations are nowhere legal in core SQL, this does not change anything except the precise wording of the error you get when you write this.)
author: Tom Lane <tgl@sss.pgh.pa.us> 2009-07-14 20:24:10 +0000
committer: Tom Lane <tgl@sss.pgh.pa.us> 2009-07-14 20:24:10 +0000
commit: 1aa58d3a8389fcf8899745049f128f6b8fec7bc9 (patch)
tree: 2e36e9cf65a517ce558fbfe694821da5bb28c131 /src/backend
parent: 0d4899e448df2b02434d6d423156408cde012707 (diff)
download: postgresql-1aa58d3a8389fcf8899745049f128f6b8fec7bc9.tar.gz
postgresql-1aa58d3a8389fcf8899745049f128f6b8fec7bc9.zip
7 files changed, 78 insertions, 36 deletions
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index f4b795db451..dfe9b19cf1c 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.668 2009/07/13 02:02:20 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.669 2009/07/14 20:24:10 tgl Exp $
  *
  * HISTORY
  *	  AUTHOR			DATE			MAJOR EVENT
@@ -421,10 +421,23 @@ static TypeName *TableFuncTypeName(List *columns);
 
 
 /*
- * If you make any token changes, update the keyword table in
- * src/include/parser/kwlist.h and add new keywords to the appropriate one of
- * the reserved-or-not-so-reserved keyword lists, below; search
- * this file for "Name classification hierarchy".
+ * Non-keyword token types.  These are hard-wired into the "flex" lexer.
+ * They must be listed first so that their numeric codes do not depend on
+ * the set of keywords.  PL/pgsql depends on this so that it can share the
+ * same lexer.  If you add/change tokens here, fix PL/pgsql to match!
+ *
+ * DOT_DOT and COLON_EQUALS are unused in the core SQL grammar, and so will
+ * always provoke parse errors.  They are needed by PL/pgsql.
+ */
+%token <str>	IDENT FCONST SCONST BCONST XCONST Op
+%token <ival>	ICONST PARAM
+%token			TYPECAST DOT_DOT COLON_EQUALS
+
+/*
+ * If you want to make any keyword changes, update the keyword table in
+ * src/include/parser/kwlist.h and add new keywords to the appropriate one
+ * of the reserved-or-not-so-reserved keyword lists, below; search
+ * this file for "Keyword category lists".
  */
 
 /* ordinary key words in alphabetical order */
@@ -515,17 +528,15 @@ static TypeName *TableFuncTypeName(List *columns);
 
 	ZONE
 
-/* The grammar thinks these are keywords, but they are not in the kwlist.h
+/*
+ * The grammar thinks these are keywords, but they are not in the kwlist.h
  * list and so can never be entered directly.  The filter in parser.c
  * creates these tokens when required.
  */
 %token			NULLS_FIRST NULLS_LAST WITH_TIME
 
-/* Special token types, not actually keywords - see the "lex" file */
-%token <str>	IDENT FCONST SCONST BCONST XCONST Op
-%token <ival>	ICONST PARAM
 
-/* precedence: lowest to highest */
+/* Precedence: lowest to highest */
 %nonassoc	SET				/* see relation_expr_opt_alias */
 %left		UNION EXCEPT
 %left		INTERSECT
diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c
index 05e7fb9ee5a..4fce4528468 100644
--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -9,14 +9,13 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.213 2009/07/12 17:12:33 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.214 2009/07/14 20:24:10 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
 #include "parser/gramparse.h"
-#include "parser/keywords.h"
 
 #define PG_KEYWORD(a,b,c) {a,b,c},
 
@@ -25,5 +24,4 @@ const ScanKeyword ScanKeywords[] = {
 #include "parser/kwlist.h"
 };
 
-/* End of ScanKeywords, for use in kwlookup.c and elsewhere */
-const ScanKeyword *LastScanKeyword = endof(ScanKeywords);
+const int	NumScanKeywords = lengthof(ScanKeywords);
diff --git a/src/backend/parser/kwlookup.c b/src/backend/parser/kwlookup.c
index 7321a57c156..58c8cdd78f7 100644
--- a/src/backend/parser/kwlookup.c
+++ b/src/backend/parser/kwlookup.c
@@ -6,15 +6,12 @@
  * NB - this file is also used by ECPG and several frontend programs in
  * src/bin/ including pg_dump and psql
  *
- * Note that this file expects that the ScanKeywords array is defined
- * and that LastScanKeyword points to its element one past the last.
- *
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/kwlookup.c,v 2.2 2009/03/08 16:53:30 alvherre Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/kwlookup.c,v 2.3 2009/07/14 20:24:10 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -39,7 +36,9 @@
  * receive a different case-normalization mapping.
  */
 const ScanKeyword *
-ScanKeywordLookup(const char *text)
+ScanKeywordLookup(const char *text,
+				  const ScanKeyword *keywords,
+				  int num_keywords)
 {
 	int			len,
 				i;
@@ -69,8 +68,8 @@ ScanKeywordLookup(const char *text)
 	/*
 	 * Now do a binary search using plain strcmp() comparison.
 	 */
-	low = &ScanKeywords[0];
-	high = LastScanKeyword - 1;
+	low = keywords;
+	high = keywords + (num_keywords - 1);
 	while (low <= high)
 	{
 		const ScanKeyword *middle;
diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c
index cb8ff8a3394..93632c88114 100644
--- a/src/backend/parser/parser.c
+++ b/src/backend/parser/parser.c
@@ -14,7 +14,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.80 2009/07/13 02:02:20 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.81 2009/07/14 20:24:10 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -39,7 +39,7 @@ raw_parser(const char *str)
 	int			yyresult;
 
 	/* initialize the flex scanner */
-	yyscanner = scanner_init(str, &yyextra);
+	yyscanner = scanner_init(str, &yyextra, ScanKeywords, NumScanKeywords);
 
 	/* filtered_base_yylex() only needs this much initialization */
 	yyextra.have_lookahead = false;
@@ -79,7 +79,7 @@ pg_parse_string_token(const char *token)
 	YYSTYPE		yylval;
 	YYLTYPE		yylloc;
 
-	yyscanner = scanner_init(token, &yyextra);
+	yyscanner = scanner_init(token, &yyextra, ScanKeywords, NumScanKeywords);
 
 	ctoken = base_yylex(&yylval, &yylloc, yyscanner);
 
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index a73934913d8..a5ed54792b6 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -24,7 +24,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.156 2009/07/13 03:11:12 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.157 2009/07/14 20:24:10 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -304,6 +304,10 @@ identifier		{ident_start}{ident_cont}*
 
 typecast		"::"
 
+/* these two token types are used by PL/pgsql, though not in core SQL */
+dot_dot			\.\.
+colon_equals	":="
+
 /*
  * "self" is the set of chars that should be returned as single-character
  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
@@ -450,11 +454,21 @@ other			.
 
 					SET_YYLLOC();
 					yyless(1);				/* eat only 'n' this time */
-					/* nchar had better be a keyword! */
-					keyword = ScanKeywordLookup("nchar");
-					Assert(keyword != NULL);
-					yylval->keyword = keyword->name;
-					return keyword->value;
+
+					keyword = ScanKeywordLookup("nchar",
+												yyextra->keywords,
+												yyextra->num_keywords);
+					if (keyword != NULL)
+					{
+						yylval->keyword = keyword->name;
+						return keyword->value;
+					}
+					else
+					{
+						/* If NCHAR isn't a keyword, just return "n" */
+						yylval->str = pstrdup("n");
+						return IDENT;
+					}
 				}
 
 {xqstart}		{
@@ -680,6 +694,16 @@ other			.
 					return TYPECAST;
 				}
 
+{dot_dot}		{
+					SET_YYLLOC();
+					return DOT_DOT;
+				}
+
+{colon_equals}	{
+					SET_YYLLOC();
+					return COLON_EQUALS;
+				}
+
 {self}			{
 					SET_YYLLOC();
 					return yytext[0];
@@ -830,7 +854,9 @@ other			.
 					SET_YYLLOC();
 
 					/* Is it a keyword? */
-					keyword = ScanKeywordLookup(yytext);
+					keyword = ScanKeywordLookup(yytext,
+												yyextra->keywords,
+												yyextra->num_keywords);
 					if (keyword != NULL)
 					{
 						yylval->keyword = keyword->name;
@@ -939,7 +965,10 @@ scanner_yyerror(const char *message, base_yyscan_t yyscanner)
  * Called before any actual parsing is done
  */
 base_yyscan_t
-scanner_init(const char *str, base_yy_extra_type *yyext)
+scanner_init(const char *str,
+			 base_yy_extra_type *yyext,
+			 const ScanKeyword *keywords,
+			 int num_keywords)
 {
 	Size		slen = strlen(str);
 	yyscan_t	scanner;
@@ -949,6 +978,9 @@ scanner_init(const char *str, base_yy_extra_type *yyext)
 
 	base_yyset_extra(yyext, scanner);
 
+	yyext->keywords = keywords;
+	yyext->num_keywords = num_keywords;
+
 	/*
 	 * Make a scan buffer with special termination needed by flex.
 	 */
diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c
index f78fc7363d4..e2da654c534 100644
--- a/src/backend/utils/adt/misc.c
+++ b/src/backend/utils/adt/misc.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/misc.c,v 1.71 2009/06/11 14:49:03 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/misc.c,v 1.72 2009/07/14 20:24:10 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -334,7 +334,7 @@ pg_get_keywords(PG_FUNCTION_ARGS)
 
 	funcctx = SRF_PERCALL_SETUP();
 
-	if (&ScanKeywords[funcctx->call_cntr] < LastScanKeyword)
+	if (funcctx->call_cntr < NumScanKeywords)
 	{
 		char	   *values[3];
 		HeapTuple	tuple;
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 6fdef41cc04..d30db3a2ba2 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.301 2009/07/12 17:12:34 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.302 2009/07/14 20:24:10 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -6219,7 +6219,9 @@ quote_identifier(const char *ident)
 		 * Note: ScanKeywordLookup() does case-insensitive comparison, but
 		 * that's fine, since we already know we have all-lower-case.
 		 */
-		const ScanKeyword *keyword = ScanKeywordLookup(ident);
+		const ScanKeyword *keyword = ScanKeywordLookup(ident,
+													   ScanKeywords,
+													   NumScanKeywords);
 
 		if (keyword != NULL && keyword->category != UNRESERVED_KEYWORD)
 			safe = false;
author	Tom Lane <tgl@sss.pgh.pa.us>	2009-07-14 20:24:10 +0000
committer	Tom Lane <tgl@sss.pgh.pa.us>	2009-07-14 20:24:10 +0000
commit	1aa58d3a8389fcf8899745049f128f6b8fec7bc9 (patch)
tree	2e36e9cf65a517ce558fbfe694821da5bb28c131 /src/backend
parent	0d4899e448df2b02434d6d423156408cde012707 (diff)
download	postgresql-1aa58d3a8389fcf8899745049f128f6b8fec7bc9.tar.gz postgresql-1aa58d3a8389fcf8899745049f128f6b8fec7bc9.zip