From 8b17298f0b6bb2a64b55fab0339c8fd6ec2d74fb Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 25 Mar 2019 15:42:51 +0300 Subject: Cosmetic changes for jsonpath_gram.y and jsonpath_scan.l This commit include formatting improvements, renamings and comments. Also, it makes jsonpath_scan.l be more uniform with other our lexers. Firstly, states names are renamed to more short alternatives. Secondly, prefix removed from the rules. Corresponding rules are moved to the tail, so they would anyway work only in initial state. Author: Alexander Korotkov Reviewed-by: John Naylor --- src/backend/utils/adt/jsonpath_scan.l | 332 ++++++++++++++++++---------------- 1 file changed, 173 insertions(+), 159 deletions(-) (limited to 'src/backend/utils/adt/jsonpath_scan.l') diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l index 02cb54ee7f9..e93307f4073 100644 --- a/src/backend/utils/adt/jsonpath_scan.l +++ b/src/backend/utils/adt/jsonpath_scan.l @@ -4,6 +4,9 @@ * jsonpath_scan.l * Lexical parser for jsonpath datatype * + * Splits jsonpath string into tokens represented as JsonPathString structs. + * Decodes unicode and hex escaped strings. + * * Copyright (c) 2019, PostgreSQL Global Development Group * * IDENTIFICATION @@ -19,9 +22,6 @@ static JsonPathString scanstring; -/* No reason to constrain amount of data slurped */ -/* #define YY_READ_BUF_SIZE 16777216 */ - /* Handles to the buffer that the lexer uses internally */ static YY_BUFFER_STATE scanbufhandle; static char *scanbuf; @@ -29,9 +29,7 @@ static int scanbuflen; static void addstring(bool init, char *s, int l); static void addchar(bool init, char s); -static int checkSpecialVal(void); /* examine scanstring for the special - * value */ - +static enum yytokentype checkKeyword(void); static void parseUnicode(char *s, int l); static void parseHexChars(char *s, int l); @@ -60,11 +58,22 @@ fprintf_to_ereport(const char *fmt, const char *msg) %option noyyrealloc %option noyyfree -%x xQUOTED -%x xNONQUOTED -%x xVARQUOTED -%x xSINGLEQUOTED -%x xCOMMENT +/* + * We use exclusive states for quoted, signle-quoted and non-quoted strings, + * quoted variable names and C-tyle comments. + * Exclusive states: + * - quoted strings + * - non-quoted strings + * - quoted variable names + * - single-quoted strings + * - C-style comment + */ + +%x xq +%x xnq +%x xvq +%x xsq +%x xc special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/] any [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f] @@ -73,189 +82,188 @@ hex_dig [0-9A-Fa-f] unicode \\u({hex_dig}{4}|\{{hex_dig}{1,6}\}) hex_char \\x{hex_dig}{2} - %% -\&\& { return AND_P; } - -\|\| { return OR_P; } - -\! { return NOT_P; } - -\*\* { return ANY_P; } - -\< { return LESS_P; } - -\<\= { return LESSEQUAL_P; } - -\=\= { return EQUAL_P; } - -\<\> { return NOTEQUAL_P; } +{any}+ { + addstring(false, yytext, yyleng); + } -\!\= { return NOTEQUAL_P; } +{blank}+ { + yylval->str = scanstring; + BEGIN INITIAL; + return checkKeyword(); + } -\>\= { return GREATEREQUAL_P; } -\> { return GREATER_P; } +\/\* { + yylval->str = scanstring; + BEGIN xc; + } -\${any}+ { - addstring(true, yytext + 1, yyleng - 1); - addchar(false, '\0'); +({special}|\"|\') { yylval->str = scanstring; - return VARIABLE_P; + yyless(0); + BEGIN INITIAL; + return checkKeyword(); } -\$\" { - addchar(true, '\0'); - BEGIN xVARQUOTED; +<> { + yylval->str = scanstring; + BEGIN INITIAL; + return checkKeyword(); } -{special} { return *yytext; } +\\[\"\'\\] { addchar(false, yytext[1]); } -{blank}+ { /* ignore */ } +\\b { addchar(false, '\b'); } -\/\* { - addchar(true, '\0'); - BEGIN xCOMMENT; - } +\\f { addchar(false, '\f'); } -[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ /* float */ { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; - return NUMERIC_P; - } +\\n { addchar(false, '\n'); } -\.[0-9]+[eE][+-]?[0-9]+ /* float */ { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; - return NUMERIC_P; - } +\\r { addchar(false, '\r'); } -([0-9]+)?\.[0-9]+ { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; - return NUMERIC_P; - } +\\t { addchar(false, '\t'); } -[0-9]+ { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; - return INT_P; - } +\\v { addchar(false, '\v'); } -{any}+ { - addstring(true, yytext, yyleng); - BEGIN xNONQUOTED; - } +{unicode}+ { parseUnicode(yytext, yyleng); } -\" { - addchar(true, '\0'); - BEGIN xQUOTED; - } +{hex_char}+ { parseHexChars(yytext, yyleng); } -\' { - addchar(true, '\0'); - BEGIN xSINGLEQUOTED; - } +\\x { yyerror(NULL, "Hex character sequence is invalid"); } -\\ { - yyless(0); - addchar(true, '\0'); - BEGIN xNONQUOTED; - } +\\u { yyerror(NULL, "Unicode sequence is invalid"); } -{any}+ { - addstring(false, yytext, yyleng); - } +\\. { yyerror(NULL, "Escape sequence is invalid"); } -{blank}+ { - yylval->str = scanstring; - BEGIN INITIAL; - return checkSpecialVal(); - } +\\ { yyerror(NULL, "Unexpected end after backslash"); } +<> { yyerror(NULL, "Unexpected end of quoted string"); } -\/\* { +\" { yylval->str = scanstring; - BEGIN xCOMMENT; + BEGIN INITIAL; + return STRING_P; } -({special}|\"|\') { +\" { yylval->str = scanstring; - yyless(0); BEGIN INITIAL; - return checkSpecialVal(); + return VARIABLE_P; } -<> { +\' { yylval->str = scanstring; BEGIN INITIAL; - return checkSpecialVal(); + return STRING_P; } -\\[\"\'\\] { addchar(false, yytext[1]); } +[^\\\"]+ { addstring(false, yytext, yyleng); } + +[^\\\']+ { addstring(false, yytext, yyleng); } + +\*\/ { BEGIN INITIAL; } -\\b { addchar(false, '\b'); } +[^\*]+ { } -\\f { addchar(false, '\f'); } +\* { } -\\n { addchar(false, '\n'); } +<> { yyerror(NULL, "Unexpected end of comment"); } -\\r { addchar(false, '\r'); } +\&\& { return AND_P; } -\\t { addchar(false, '\t'); } +\|\| { return OR_P; } -\\v { addchar(false, '\v'); } +\! { return NOT_P; } -{unicode}+ { parseUnicode(yytext, yyleng); } +\*\* { return ANY_P; } -{hex_char}+ { parseHexChars(yytext, yyleng); } +\< { return LESS_P; } -\\x { yyerror(NULL, "Hex character sequence is invalid"); } +\<\= { return LESSEQUAL_P; } -\\u { yyerror(NULL, "Unicode sequence is invalid"); } +\=\= { return EQUAL_P; } -\\. { yyerror(NULL, "Escape sequence is invalid"); } +\<\> { return NOTEQUAL_P; } -\\ { yyerror(NULL, "Unexpected end after backslash"); } +\!\= { return NOTEQUAL_P; } -<> { yyerror(NULL, "Unexpected end of quoted string"); } +\>\= { return GREATEREQUAL_P; } -\" { +\> { return GREATER_P; } + +\${any}+ { + addstring(true, yytext + 1, yyleng - 1); + addchar(false, '\0'); yylval->str = scanstring; - BEGIN INITIAL; - return STRING_P; + return VARIABLE_P; + } + +\$\" { + addchar(true, '\0'); + BEGIN xvq; } -\" { +{special} { return *yytext; } + +{blank}+ { /* ignore */ } + +\/\* { + addchar(true, '\0'); + BEGIN xc; + } + +[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ { /* float */ + addstring(true, yytext, yyleng); + addchar(false, '\0'); yylval->str = scanstring; - BEGIN INITIAL; - return VARIABLE_P; + return NUMERIC_P; } -\' { +\.[0-9]+[eE][+-]?[0-9]+ { /* float */ + addstring(true, yytext, yyleng); + addchar(false, '\0'); yylval->str = scanstring; - BEGIN INITIAL; - return STRING_P; + return NUMERIC_P; } -[^\\\"]+ { addstring(false, yytext, yyleng); } +([0-9]+)?\.[0-9]+ { + addstring(true, yytext, yyleng); + addchar(false, '\0'); + yylval->str = scanstring; + return NUMERIC_P; + } -[^\\\']+ { addstring(false, yytext, yyleng); } +[0-9]+ { + addstring(true, yytext, yyleng); + addchar(false, '\0'); + yylval->str = scanstring; + return INT_P; + } -<> { yyterminate(); } +{any}+ { + addstring(true, yytext, yyleng); + BEGIN xnq; + } -\*\/ { BEGIN INITIAL; } +\" { + addchar(true, '\0'); + BEGIN xq; + } -[^\*]+ { } +\' { + addchar(true, '\0'); + BEGIN xsq; + } -\* { } +\\ { + yyless(0); + addchar(true, '\0'); + BEGIN xnq; + } -<> { yyerror(NULL, "Unexpected end of comment"); } +<> { yyterminate(); } %% @@ -292,7 +300,6 @@ typedef struct JsonPathKeyword * Array of key words should be sorted by length and then * alphabetical order */ - static const JsonPathKeyword keywords[] = { { 2, false, IS_P, "is"}, { 2, false, TO_P, "to"}, @@ -317,8 +324,9 @@ static const JsonPathKeyword keywords[] = { { 10,false, LIKE_REGEX_P, "like_regex"}, }; -static int -checkSpecialVal() +/* Check if current scanstring value is a keyword */ +static enum yytokentype +checkKeyword() { int res = IDENT_P; int diff; @@ -329,7 +337,7 @@ checkSpecialVal() if (scanstring.len > keywords[lengthof(keywords) - 1].len) return res; - while(StopLow < StopHigh) + while (StopLow < StopHigh) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); @@ -397,49 +405,50 @@ jsonpath_scanner_finish(void) pfree(scanbuf); } +/* + * Resize scanstring so that it can append string of given length. + * Reinitialize if required. + */ static void -addstring(bool init, char *s, int l) +resizeString(bool init, int appendLen) { if (init) { - scanstring.total = 32; - scanstring.val = palloc(scanstring.total); + scanstring.total = Max(32, appendLen); + scanstring.val = (char *) palloc(scanstring.total); scanstring.len = 0; } - - if (s && l) + else { - while(scanstring.len + l + 1 >= scanstring.total) + if (scanstring.len + appendLen >= scanstring.total) { - scanstring.total *= 2; + while (scanstring.len + appendLen >= scanstring.total) + scanstring.total *= 2; scanstring.val = repalloc(scanstring.val, scanstring.total); } - - memcpy(scanstring.val + scanstring.len, s, l); - scanstring.len += l; } } +/* Add set of bytes at "s" of length "l" to scanstring */ static void -addchar(bool init, char s) +addstring(bool init, char *s, int l) { - if (init) - { - scanstring.total = 32; - scanstring.val = palloc(scanstring.total); - scanstring.len = 0; - } - else if(scanstring.len + 1 >= scanstring.total) - { - scanstring.total *= 2; - scanstring.val = repalloc(scanstring.val, scanstring.total); - } + resizeString(init, l + 1); + memcpy(scanstring.val + scanstring.len, s, l); + scanstring.len += l; +} - scanstring.val[ scanstring.len ] = s; - if (s != '\0') +/* Add single byte "c" to scanstring */ +static void +addchar(bool init, char c) +{ + resizeString(init, 1); + scanstring.val[scanstring.len] = c; + if (c != '\0') scanstring.len++; } +/* Interface to jsonpath parser */ JsonPathParseResult * parsejsonpath(const char *str, int len) { @@ -447,7 +456,7 @@ parsejsonpath(const char *str, int len) jsonpath_scanner_init(str, len); - if (jsonpath_yyparse((void*)&parseresult) != 0) + if (jsonpath_yyparse((void *) &parseresult) != 0) jsonpath_yyerror(NULL, "bugus input"); jsonpath_scanner_finish(); @@ -455,6 +464,7 @@ parsejsonpath(const char *str, int len) return parseresult; } +/* Turn hex character into integer */ static int hexval(char c) { @@ -468,6 +478,7 @@ hexval(char c) return 0; /* not reached */ } +/* Add given unicode character to scanstring */ static void addUnicodeChar(int ch) { @@ -515,6 +526,7 @@ addUnicodeChar(int ch) } } +/* Add unicode character and process its hi surrogate */ static void addUnicode(int ch, int *hi_surrogate) { @@ -592,6 +604,7 @@ parseUnicode(char *s, int l) } } +/* Parse sequence of hex-encoded characters */ static void parseHexChars(char *s, int l) { @@ -601,7 +614,8 @@ parseHexChars(char *s, int l) for (i = 0; i < l / 4; i++) { - int ch = (hexval(s[i * 4 + 2]) << 4) | hexval(s[i * 4 + 3]); + int ch = (hexval(s[i * 4 + 2]) << 4) | + hexval(s[i * 4 + 3]); addUnicodeChar(ch); } -- cgit v1.2.3