From 8b17298f0b6bb2a64b55fab0339c8fd6ec2d74fb Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Mon, 25 Mar 2019 15:42:51 +0300
Subject: Cosmetic changes for jsonpath_gram.y and jsonpath_scan.l

This commit include formatting improvements, renamings and comments.  Also,
it makes jsonpath_scan.l be more uniform with other our lexers.  Firstly,
states names are renamed to more short alternatives.  Secondly, <INITIAL>
prefix removed from the rules.  Corresponding rules are moved to the tail, so
they would anyway work only in initial state.

Author: Alexander Korotkov
Reviewed-by: John Naylor
---
 src/backend/utils/adt/jsonpath_scan.l | 332 ++++++++++++++++++----------------
 1 file changed, 173 insertions(+), 159 deletions(-)

(limited to 'src/backend/utils/adt/jsonpath_scan.l')
diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l
index 02cb54ee7f9..e93307f4073 100644
--- a/src/backend/utils/adt/jsonpath_scan.l
+++ b/src/backend/utils/adt/jsonpath_scan.l
@@ -4,6 +4,9 @@
  * jsonpath_scan.l
  *	Lexical parser for jsonpath datatype
  *
+ * Splits jsonpath string into tokens represented as JsonPathString structs.
+ * Decodes unicode and hex escaped strings.
+ *
  * Copyright (c) 2019, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
@@ -19,9 +22,6 @@
 
 static JsonPathString scanstring;
 
-/* No reason to constrain amount of data slurped */
-/* #define YY_READ_BUF_SIZE 16777216 */
-
 /* Handles to the buffer that the lexer uses internally */
 static YY_BUFFER_STATE scanbufhandle;
 static char *scanbuf;
@@ -29,9 +29,7 @@ static int	scanbuflen;
 
 static void addstring(bool init, char *s, int l);
 static void addchar(bool init, char s);
-static int checkSpecialVal(void); /* examine scanstring for the special
-								   * value */
-
+static enum yytokentype checkKeyword(void);
 static void parseUnicode(char *s, int l);
 static void parseHexChars(char *s, int l);
 
@@ -60,11 +58,22 @@ fprintf_to_ereport(const char *fmt, const char *msg)
 %option noyyrealloc
 %option noyyfree
 
-%x xQUOTED
-%x xNONQUOTED
-%x xVARQUOTED
-%x xSINGLEQUOTED
-%x xCOMMENT
+/*
+ * We use exclusive states for quoted, signle-quoted and non-quoted strings,
+ * quoted variable names and C-tyle comments.
+ * Exclusive states:
+ *  <xq> - quoted strings
+ *  <xnq> - non-quoted strings
+ *  <xvq> - quoted variable names
+ *  <xsq> - single-quoted strings
+ *  <xc> - C-style comment
+ */
+
+%x xq
+%x xnq
+%x xvq
+%x xsq
+%x xc
 
 special		 [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
 any			[^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f]
@@ -73,189 +82,188 @@ hex_dig		[0-9A-Fa-f]
 unicode		\\u({hex_dig}{4}|\{{hex_dig}{1,6}\})
 hex_char	\\x{hex_dig}{2}
 
-
 %%
 
-<INITIAL>\&\&					{ return AND_P; }
-
-<INITIAL>\|\|					{ return OR_P; }
-
-<INITIAL>\!						{ return NOT_P; }
-
-<INITIAL>\*\*					{ return ANY_P; }
-
-<INITIAL>\<						{ return LESS_P; }
-
-<INITIAL>\<\=					{ return LESSEQUAL_P; }
-
-<INITIAL>\=\=					{ return EQUAL_P; }
-
-<INITIAL>\<\>					{ return NOTEQUAL_P; }
+<xnq>{any}+						{
+									addstring(false, yytext, yyleng);
+								}
 
-<INITIAL>\!\=					{ return NOTEQUAL_P; }
+<xnq>{blank}+					{
+									yylval->str = scanstring;
+									BEGIN INITIAL;
+									return checkKeyword();
+								}
 
-<INITIAL>\>\=					{ return GREATEREQUAL_P; }
 
-<INITIAL>\>						{ return GREATER_P; }
+<xnq>\/\*						{
+									yylval->str = scanstring;
+									BEGIN xc;
+								}
 
-<INITIAL>\${any}+				{
-									addstring(true, yytext + 1, yyleng - 1);
-									addchar(false, '\0');
+<xnq>({special}|\"|\')			{
 									yylval->str = scanstring;
-									return VARIABLE_P;
+									yyless(0);
+									BEGIN INITIAL;
+									return checkKeyword();
 								}
 
-<INITIAL>\$\"					{
-									addchar(true, '\0');
-									BEGIN xVARQUOTED;
+<xnq><<EOF>>					{
+									yylval->str = scanstring;
+									BEGIN INITIAL;
+									return checkKeyword();
 								}
 
-<INITIAL>{special}				{ return *yytext; }
+<xnq,xq,xvq,xsq>\\[\"\'\\]		{ addchar(false, yytext[1]); }
 
-<INITIAL>{blank}+				{ /* ignore */ }
+<xnq,xq,xvq,xsq>\\b				{ addchar(false, '\b'); }
 
-<INITIAL>\/\*					{
-									addchar(true, '\0');
-									BEGIN xCOMMENT;
-								}
+<xnq,xq,xvq,xsq>\\f				{ addchar(false, '\f'); }
 
-<INITIAL>[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+  /* float */  {
-									addstring(true, yytext, yyleng);
-									addchar(false, '\0');
-									yylval->str = scanstring;
-									return NUMERIC_P;
-								}
+<xnq,xq,xvq,xsq>\\n				{ addchar(false, '\n'); }
 
-<INITIAL>\.[0-9]+[eE][+-]?[0-9]+  /* float */  {
-									addstring(true, yytext, yyleng);
-									addchar(false, '\0');
-									yylval->str = scanstring;
-									return NUMERIC_P;
-								}
+<xnq,xq,xvq,xsq>\\r				{ addchar(false, '\r'); }
 
-<INITIAL>([0-9]+)?\.[0-9]+		{
-									addstring(true, yytext, yyleng);
-									addchar(false, '\0');
-									yylval->str = scanstring;
-									return NUMERIC_P;
-								}
+<xnq,xq,xvq,xsq>\\t				{ addchar(false, '\t'); }
 
-<INITIAL>[0-9]+					{
-									addstring(true, yytext, yyleng);
-									addchar(false, '\0');
-									yylval->str = scanstring;
-									return INT_P;
-								}
+<xnq,xq,xvq,xsq>\\v				{ addchar(false, '\v'); }
 
-<INITIAL>{any}+					{
-									addstring(true, yytext, yyleng);
-									BEGIN xNONQUOTED;
-								}
+<xnq,xq,xvq,xsq>{unicode}+		{ parseUnicode(yytext, yyleng); }
 
-<INITIAL>\"						{
-									addchar(true, '\0');
-									BEGIN xQUOTED;
-								}
+<xnq,xq,xvq,xsq>{hex_char}+		{ parseHexChars(yytext, yyleng); }
 
-<INITIAL>\'						{
-									addchar(true, '\0');
-									BEGIN xSINGLEQUOTED;
-								}
+<xnq,xq,xvq,xsq>\\x				{ yyerror(NULL, "Hex character sequence is invalid"); }
 
-<INITIAL>\\						{
-									yyless(0);
-									addchar(true, '\0');
-									BEGIN xNONQUOTED;
-								}
+<xnq,xq,xvq,xsq>\\u				{ yyerror(NULL, "Unicode sequence is invalid"); }
 
-<xNONQUOTED>{any}+				{
-									addstring(false, yytext, yyleng);
-								}
+<xnq,xq,xvq,xsq>\\.				{ yyerror(NULL, "Escape sequence is invalid"); }
 
-<xNONQUOTED>{blank}+			{
-									yylval->str = scanstring;
-									BEGIN INITIAL;
-									return checkSpecialVal();
-								}
+<xnq,xq,xvq,xsq>\\				{ yyerror(NULL, "Unexpected end after backslash"); }
 
+<xq,xvq,xsq><<EOF>>				{ yyerror(NULL, "Unexpected end of quoted string"); }
 
-<xNONQUOTED>\/\*				{
+<xq>\"							{
 									yylval->str = scanstring;
-									BEGIN xCOMMENT;
+									BEGIN INITIAL;
+									return STRING_P;
 								}
 
-<xNONQUOTED>({special}|\"|\')	{
+<xvq>\"							{
 									yylval->str = scanstring;
-									yyless(0);
 									BEGIN INITIAL;
-									return checkSpecialVal();
+									return VARIABLE_P;
 								}
 
-<xNONQUOTED><<EOF>>				{
+<xsq>\'							{
 									yylval->str = scanstring;
 									BEGIN INITIAL;
-									return checkSpecialVal();
+									return STRING_P;
 								}
 
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\[\"\'\\]	{ addchar(false, yytext[1]); }
+<xq,xvq>[^\\\"]+				{ addstring(false, yytext, yyleng); }
+
+<xsq>[^\\\']+					{ addstring(false, yytext, yyleng); }
+
+<xc>\*\/						{ BEGIN INITIAL; }
 
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\b	{ addchar(false, '\b'); }
+<xc>[^\*]+						{ }
 
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\f	{ addchar(false, '\f'); }
+<xc>\*							{ }
 
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\n	{ addchar(false, '\n'); }
+<xc><<EOF>>						{ yyerror(NULL, "Unexpected end of comment"); }
 
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\r	{ addchar(false, '\r'); }
+\&\&							{ return AND_P; }
 
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\t	{ addchar(false, '\t'); }
+\|\|							{ return OR_P; }
 
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\v	{ addchar(false, '\v'); }
+\!								{ return NOT_P; }
 
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>{unicode}+		{ parseUnicode(yytext, yyleng); }
+\*\*							{ return ANY_P; }
 
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>{hex_char}+	{ parseHexChars(yytext, yyleng); }
+\<								{ return LESS_P; }
 
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\x	{ yyerror(NULL, "Hex character sequence is invalid"); }
+\<\=							{ return LESSEQUAL_P; }
 
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\u	{ yyerror(NULL, "Unicode sequence is invalid"); }
+\=\=							{ return EQUAL_P; }
 
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\.	{ yyerror(NULL, "Escape sequence is invalid"); }
+\<\>							{ return NOTEQUAL_P; }
 
-<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\		{ yyerror(NULL, "Unexpected end after backslash"); }
+\!\=							{ return NOTEQUAL_P; }
 
-<xQUOTED,xVARQUOTED,xSINGLEQUOTED><<EOF>>			{ yyerror(NULL, "Unexpected end of quoted string"); }
+\>\=							{ return GREATEREQUAL_P; }
 
-<xQUOTED>\"						{
+\>								{ return GREATER_P; }
+
+\${any}+						{
+									addstring(true, yytext + 1, yyleng - 1);
+									addchar(false, '\0');
 									yylval->str = scanstring;
-									BEGIN INITIAL;
-									return STRING_P;
+									return VARIABLE_P;
+								}
+
+\$\"							{
+									addchar(true, '\0');
+									BEGIN xvq;
 								}
 
-<xVARQUOTED>\"					{
+{special}						{ return *yytext; }
+
+{blank}+						{ /* ignore */ }
+
+\/\*							{
+									addchar(true, '\0');
+									BEGIN xc;
+								}
+
+[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ { /* float */
+									addstring(true, yytext, yyleng);
+									addchar(false, '\0');
 									yylval->str = scanstring;
-									BEGIN INITIAL;
-									return VARIABLE_P;
+									return NUMERIC_P;
 								}
 
-<xSINGLEQUOTED>\'				{
+\.[0-9]+[eE][+-]?[0-9]+			{ /* float */
+									addstring(true, yytext, yyleng);
+									addchar(false, '\0');
 									yylval->str = scanstring;
-									BEGIN INITIAL;
-									return STRING_P;
+									return NUMERIC_P;
 								}
 
-<xQUOTED,xVARQUOTED>[^\\\"]+	{ addstring(false, yytext, yyleng); }
+([0-9]+)?\.[0-9]+				{
+									addstring(true, yytext, yyleng);
+									addchar(false, '\0');
+									yylval->str = scanstring;
+									return NUMERIC_P;
+								}
 
-<xSINGLEQUOTED>[^\\\']+			{ addstring(false, yytext, yyleng); }
+[0-9]+							{
+									addstring(true, yytext, yyleng);
+									addchar(false, '\0');
+									yylval->str = scanstring;
+									return INT_P;
+								}
 
-<INITIAL><<EOF>>				{ yyterminate(); }
+{any}+							{
+									addstring(true, yytext, yyleng);
+									BEGIN xnq;
+								}
 
-<xCOMMENT>\*\/					{ BEGIN INITIAL; }
+\"								{
+									addchar(true, '\0');
+									BEGIN xq;
+								}
 
-<xCOMMENT>[^\*]+				{ }
+\'								{
+									addchar(true, '\0');
+									BEGIN xsq;
+								}
 
-<xCOMMENT>\*					{ }
+\\								{
+									yyless(0);
+									addchar(true, '\0');
+									BEGIN xnq;
+								}
 
-<xCOMMENT><<EOF>>				{ yyerror(NULL, "Unexpected end of comment"); }
+<<EOF>>							{ yyterminate(); }
 
 %%
 
@@ -292,7 +300,6 @@ typedef struct JsonPathKeyword
  * Array of key words should be sorted by length and then
  * alphabetical order
  */
-
 static const JsonPathKeyword keywords[] = {
 	{ 2, false,	IS_P,		"is"},
 	{ 2, false,	TO_P,		"to"},
@@ -317,8 +324,9 @@ static const JsonPathKeyword keywords[] = {
 	{ 10,false, LIKE_REGEX_P, "like_regex"},
 };
 
-static int
-checkSpecialVal()
+/* Check if current scanstring value is a keyword */
+static enum yytokentype
+checkKeyword()
 {
 	int						res = IDENT_P;
 	int						diff;
@@ -329,7 +337,7 @@ checkSpecialVal()
 	if (scanstring.len > keywords[lengthof(keywords) - 1].len)
 		return res;
 
-	while(StopLow < StopHigh)
+	while (StopLow < StopHigh)
 	{
 		StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
 
@@ -397,49 +405,50 @@ jsonpath_scanner_finish(void)
 	pfree(scanbuf);
 }
 
+/*
+ * Resize scanstring so that it can append string of given length.
+ * Reinitialize if required.
+ */
 static void
-addstring(bool init, char *s, int l)
+resizeString(bool init, int appendLen)
 {
 	if (init)
 	{
-		scanstring.total = 32;
-		scanstring.val = palloc(scanstring.total);
+		scanstring.total = Max(32, appendLen);
+		scanstring.val = (char *) palloc(scanstring.total);
 		scanstring.len = 0;
 	}
-
-	if (s && l)
+	else
 	{
-		while(scanstring.len + l + 1 >= scanstring.total)
+		if (scanstring.len + appendLen >= scanstring.total)
 		{
-			scanstring.total *= 2;
+			while (scanstring.len + appendLen >= scanstring.total)
+				scanstring.total *= 2;
 			scanstring.val = repalloc(scanstring.val, scanstring.total);
 		}
-
-		memcpy(scanstring.val + scanstring.len, s, l);
-		scanstring.len += l;
 	}
 }
 
+/* Add set of bytes at "s" of length "l" to scanstring */
 static void
-addchar(bool init, char s)
+addstring(bool init, char *s, int l)
 {
-	if (init)
-	{
-		scanstring.total = 32;
-		scanstring.val = palloc(scanstring.total);
-		scanstring.len = 0;
-	}
-	else if(scanstring.len + 1 >= scanstring.total)
-	{
-		scanstring.total *= 2;
-		scanstring.val = repalloc(scanstring.val, scanstring.total);
-	}
+	resizeString(init, l + 1);
+	memcpy(scanstring.val + scanstring.len, s, l);
+	scanstring.len += l;
+}
 
-	scanstring.val[ scanstring.len ] = s;
-	if (s != '\0')
+/* Add single byte "c" to scanstring */
+static void
+addchar(bool init, char c)
+{
+	resizeString(init, 1);
+	scanstring.val[scanstring.len] = c;
+	if (c != '\0')
 		scanstring.len++;
 }
 
+/* Interface to jsonpath parser */
 JsonPathParseResult *
 parsejsonpath(const char *str, int len)
 {
@@ -447,7 +456,7 @@ parsejsonpath(const char *str, int len)
 
 	jsonpath_scanner_init(str, len);
 
-	if (jsonpath_yyparse((void*)&parseresult) != 0)
+	if (jsonpath_yyparse((void *) &parseresult) != 0)
 		jsonpath_yyerror(NULL, "bugus input");
 
 	jsonpath_scanner_finish();
@@ -455,6 +464,7 @@ parsejsonpath(const char *str, int len)
 	return parseresult;
 }
 
+/* Turn hex character into integer */
 static int
 hexval(char c)
 {
@@ -468,6 +478,7 @@ hexval(char c)
 	return 0; /* not reached */
 }
 
+/* Add given unicode character to scanstring */
 static void
 addUnicodeChar(int ch)
 {
@@ -515,6 +526,7 @@ addUnicodeChar(int ch)
 	}
 }
 
+/* Add unicode character and process its hi surrogate */
 static void
 addUnicode(int ch, int *hi_surrogate)
 {
@@ -592,6 +604,7 @@ parseUnicode(char *s, int l)
 	}
 }
 
+/* Parse sequence of hex-encoded characters */
 static void
 parseHexChars(char *s, int l)
 {
@@ -601,7 +614,8 @@ parseHexChars(char *s, int l)
 
 	for (i = 0; i < l / 4; i++)
 	{
-		int			ch = (hexval(s[i * 4 + 2]) << 4) | hexval(s[i * 4 + 3]);
+		int			ch = (hexval(s[i * 4 + 2]) << 4) |
+						  hexval(s[i * 4 + 3]);
 
 		addUnicodeChar(ch);
 	}
-- 
cgit v1.2.3