diff options
author | Alexander Korotkov <akorotkov@postgresql.org> | 2019-03-25 15:42:51 +0300 |
---|---|---|
committer | Alexander Korotkov <akorotkov@postgresql.org> | 2019-03-25 15:42:51 +0300 |
commit | 8b17298f0b6bb2a64b55fab0339c8fd6ec2d74fb (patch) | |
tree | d7a9010c6756ab00a3e2fa72063d5d61a10dde94 /src/backend/utils/adt/jsonpath_scan.l | |
parent | d303122eab616ccbcfb0bab0fc674bf625d17a7b (diff) | |
download | postgresql-8b17298f0b6bb2a64b55fab0339c8fd6ec2d74fb.tar.gz postgresql-8b17298f0b6bb2a64b55fab0339c8fd6ec2d74fb.zip |
Cosmetic changes for jsonpath_gram.y and jsonpath_scan.l
This commit include formatting improvements, renamings and comments. Also,
it makes jsonpath_scan.l be more uniform with other our lexers. Firstly,
states names are renamed to more short alternatives. Secondly, <INITIAL>
prefix removed from the rules. Corresponding rules are moved to the tail, so
they would anyway work only in initial state.
Author: Alexander Korotkov
Reviewed-by: John Naylor
Diffstat (limited to 'src/backend/utils/adt/jsonpath_scan.l')
-rw-r--r-- | src/backend/utils/adt/jsonpath_scan.l | 332 |
1 files changed, 173 insertions, 159 deletions
diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l index 02cb54ee7f9..e93307f4073 100644 --- a/src/backend/utils/adt/jsonpath_scan.l +++ b/src/backend/utils/adt/jsonpath_scan.l @@ -4,6 +4,9 @@ * jsonpath_scan.l * Lexical parser for jsonpath datatype * + * Splits jsonpath string into tokens represented as JsonPathString structs. + * Decodes unicode and hex escaped strings. + * * Copyright (c) 2019, PostgreSQL Global Development Group * * IDENTIFICATION @@ -19,9 +22,6 @@ static JsonPathString scanstring; -/* No reason to constrain amount of data slurped */ -/* #define YY_READ_BUF_SIZE 16777216 */ - /* Handles to the buffer that the lexer uses internally */ static YY_BUFFER_STATE scanbufhandle; static char *scanbuf; @@ -29,9 +29,7 @@ static int scanbuflen; static void addstring(bool init, char *s, int l); static void addchar(bool init, char s); -static int checkSpecialVal(void); /* examine scanstring for the special - * value */ - +static enum yytokentype checkKeyword(void); static void parseUnicode(char *s, int l); static void parseHexChars(char *s, int l); @@ -60,11 +58,22 @@ fprintf_to_ereport(const char *fmt, const char *msg) %option noyyrealloc %option noyyfree -%x xQUOTED -%x xNONQUOTED -%x xVARQUOTED -%x xSINGLEQUOTED -%x xCOMMENT +/* + * We use exclusive states for quoted, signle-quoted and non-quoted strings, + * quoted variable names and C-tyle comments. + * Exclusive states: + * <xq> - quoted strings + * <xnq> - non-quoted strings + * <xvq> - quoted variable names + * <xsq> - single-quoted strings + * <xc> - C-style comment + */ + +%x xq +%x xnq +%x xvq +%x xsq +%x xc special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/] any [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f] @@ -73,189 +82,188 @@ hex_dig [0-9A-Fa-f] unicode \\u({hex_dig}{4}|\{{hex_dig}{1,6}\}) hex_char \\x{hex_dig}{2} - %% -<INITIAL>\&\& { return AND_P; } - -<INITIAL>\|\| { return OR_P; } - -<INITIAL>\! { return NOT_P; } - -<INITIAL>\*\* { return ANY_P; } - -<INITIAL>\< { return LESS_P; } - -<INITIAL>\<\= { return LESSEQUAL_P; } - -<INITIAL>\=\= { return EQUAL_P; } - -<INITIAL>\<\> { return NOTEQUAL_P; } +<xnq>{any}+ { + addstring(false, yytext, yyleng); + } -<INITIAL>\!\= { return NOTEQUAL_P; } +<xnq>{blank}+ { + yylval->str = scanstring; + BEGIN INITIAL; + return checkKeyword(); + } -<INITIAL>\>\= { return GREATEREQUAL_P; } -<INITIAL>\> { return GREATER_P; } +<xnq>\/\* { + yylval->str = scanstring; + BEGIN xc; + } -<INITIAL>\${any}+ { - addstring(true, yytext + 1, yyleng - 1); - addchar(false, '\0'); +<xnq>({special}|\"|\') { yylval->str = scanstring; - return VARIABLE_P; + yyless(0); + BEGIN INITIAL; + return checkKeyword(); } -<INITIAL>\$\" { - addchar(true, '\0'); - BEGIN xVARQUOTED; +<xnq><<EOF>> { + yylval->str = scanstring; + BEGIN INITIAL; + return checkKeyword(); } -<INITIAL>{special} { return *yytext; } +<xnq,xq,xvq,xsq>\\[\"\'\\] { addchar(false, yytext[1]); } -<INITIAL>{blank}+ { /* ignore */ } +<xnq,xq,xvq,xsq>\\b { addchar(false, '\b'); } -<INITIAL>\/\* { - addchar(true, '\0'); - BEGIN xCOMMENT; - } +<xnq,xq,xvq,xsq>\\f { addchar(false, '\f'); } -<INITIAL>[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ /* float */ { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; - return NUMERIC_P; - } +<xnq,xq,xvq,xsq>\\n { addchar(false, '\n'); } -<INITIAL>\.[0-9]+[eE][+-]?[0-9]+ /* float */ { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; - return NUMERIC_P; - } +<xnq,xq,xvq,xsq>\\r { addchar(false, '\r'); } -<INITIAL>([0-9]+)?\.[0-9]+ { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; - return NUMERIC_P; - } +<xnq,xq,xvq,xsq>\\t { addchar(false, '\t'); } -<INITIAL>[0-9]+ { - addstring(true, yytext, yyleng); - addchar(false, '\0'); - yylval->str = scanstring; - return INT_P; - } +<xnq,xq,xvq,xsq>\\v { addchar(false, '\v'); } -<INITIAL>{any}+ { - addstring(true, yytext, yyleng); - BEGIN xNONQUOTED; - } +<xnq,xq,xvq,xsq>{unicode}+ { parseUnicode(yytext, yyleng); } -<INITIAL>\" { - addchar(true, '\0'); - BEGIN xQUOTED; - } +<xnq,xq,xvq,xsq>{hex_char}+ { parseHexChars(yytext, yyleng); } -<INITIAL>\' { - addchar(true, '\0'); - BEGIN xSINGLEQUOTED; - } +<xnq,xq,xvq,xsq>\\x { yyerror(NULL, "Hex character sequence is invalid"); } -<INITIAL>\\ { - yyless(0); - addchar(true, '\0'); - BEGIN xNONQUOTED; - } +<xnq,xq,xvq,xsq>\\u { yyerror(NULL, "Unicode sequence is invalid"); } -<xNONQUOTED>{any}+ { - addstring(false, yytext, yyleng); - } +<xnq,xq,xvq,xsq>\\. { yyerror(NULL, "Escape sequence is invalid"); } -<xNONQUOTED>{blank}+ { - yylval->str = scanstring; - BEGIN INITIAL; - return checkSpecialVal(); - } +<xnq,xq,xvq,xsq>\\ { yyerror(NULL, "Unexpected end after backslash"); } +<xq,xvq,xsq><<EOF>> { yyerror(NULL, "Unexpected end of quoted string"); } -<xNONQUOTED>\/\* { +<xq>\" { yylval->str = scanstring; - BEGIN xCOMMENT; + BEGIN INITIAL; + return STRING_P; } -<xNONQUOTED>({special}|\"|\') { +<xvq>\" { yylval->str = scanstring; - yyless(0); BEGIN INITIAL; - return checkSpecialVal(); + return VARIABLE_P; } -<xNONQUOTED><<EOF>> { +<xsq>\' { yylval->str = scanstring; BEGIN INITIAL; - return checkSpecialVal(); + return STRING_P; } -<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\[\"\'\\] { addchar(false, yytext[1]); } +<xq,xvq>[^\\\"]+ { addstring(false, yytext, yyleng); } + +<xsq>[^\\\']+ { addstring(false, yytext, yyleng); } + +<xc>\*\/ { BEGIN INITIAL; } -<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\b { addchar(false, '\b'); } +<xc>[^\*]+ { } -<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\f { addchar(false, '\f'); } +<xc>\* { } -<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\n { addchar(false, '\n'); } +<xc><<EOF>> { yyerror(NULL, "Unexpected end of comment"); } -<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\r { addchar(false, '\r'); } +\&\& { return AND_P; } -<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\t { addchar(false, '\t'); } +\|\| { return OR_P; } -<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\v { addchar(false, '\v'); } +\! { return NOT_P; } -<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>{unicode}+ { parseUnicode(yytext, yyleng); } +\*\* { return ANY_P; } -<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>{hex_char}+ { parseHexChars(yytext, yyleng); } +\< { return LESS_P; } -<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\x { yyerror(NULL, "Hex character sequence is invalid"); } +\<\= { return LESSEQUAL_P; } -<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\u { yyerror(NULL, "Unicode sequence is invalid"); } +\=\= { return EQUAL_P; } -<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\. { yyerror(NULL, "Escape sequence is invalid"); } +\<\> { return NOTEQUAL_P; } -<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\ { yyerror(NULL, "Unexpected end after backslash"); } +\!\= { return NOTEQUAL_P; } -<xQUOTED,xVARQUOTED,xSINGLEQUOTED><<EOF>> { yyerror(NULL, "Unexpected end of quoted string"); } +\>\= { return GREATEREQUAL_P; } -<xQUOTED>\" { +\> { return GREATER_P; } + +\${any}+ { + addstring(true, yytext + 1, yyleng - 1); + addchar(false, '\0'); yylval->str = scanstring; - BEGIN INITIAL; - return STRING_P; + return VARIABLE_P; + } + +\$\" { + addchar(true, '\0'); + BEGIN xvq; } -<xVARQUOTED>\" { +{special} { return *yytext; } + +{blank}+ { /* ignore */ } + +\/\* { + addchar(true, '\0'); + BEGIN xc; + } + +[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ { /* float */ + addstring(true, yytext, yyleng); + addchar(false, '\0'); yylval->str = scanstring; - BEGIN INITIAL; - return VARIABLE_P; + return NUMERIC_P; } -<xSINGLEQUOTED>\' { +\.[0-9]+[eE][+-]?[0-9]+ { /* float */ + addstring(true, yytext, yyleng); + addchar(false, '\0'); yylval->str = scanstring; - BEGIN INITIAL; - return STRING_P; + return NUMERIC_P; } -<xQUOTED,xVARQUOTED>[^\\\"]+ { addstring(false, yytext, yyleng); } +([0-9]+)?\.[0-9]+ { + addstring(true, yytext, yyleng); + addchar(false, '\0'); + yylval->str = scanstring; + return NUMERIC_P; + } -<xSINGLEQUOTED>[^\\\']+ { addstring(false, yytext, yyleng); } +[0-9]+ { + addstring(true, yytext, yyleng); + addchar(false, '\0'); + yylval->str = scanstring; + return INT_P; + } -<INITIAL><<EOF>> { yyterminate(); } +{any}+ { + addstring(true, yytext, yyleng); + BEGIN xnq; + } -<xCOMMENT>\*\/ { BEGIN INITIAL; } +\" { + addchar(true, '\0'); + BEGIN xq; + } -<xCOMMENT>[^\*]+ { } +\' { + addchar(true, '\0'); + BEGIN xsq; + } -<xCOMMENT>\* { } +\\ { + yyless(0); + addchar(true, '\0'); + BEGIN xnq; + } -<xCOMMENT><<EOF>> { yyerror(NULL, "Unexpected end of comment"); } +<<EOF>> { yyterminate(); } %% @@ -292,7 +300,6 @@ typedef struct JsonPathKeyword * Array of key words should be sorted by length and then * alphabetical order */ - static const JsonPathKeyword keywords[] = { { 2, false, IS_P, "is"}, { 2, false, TO_P, "to"}, @@ -317,8 +324,9 @@ static const JsonPathKeyword keywords[] = { { 10,false, LIKE_REGEX_P, "like_regex"}, }; -static int -checkSpecialVal() +/* Check if current scanstring value is a keyword */ +static enum yytokentype +checkKeyword() { int res = IDENT_P; int diff; @@ -329,7 +337,7 @@ checkSpecialVal() if (scanstring.len > keywords[lengthof(keywords) - 1].len) return res; - while(StopLow < StopHigh) + while (StopLow < StopHigh) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); @@ -397,49 +405,50 @@ jsonpath_scanner_finish(void) pfree(scanbuf); } +/* + * Resize scanstring so that it can append string of given length. + * Reinitialize if required. + */ static void -addstring(bool init, char *s, int l) +resizeString(bool init, int appendLen) { if (init) { - scanstring.total = 32; - scanstring.val = palloc(scanstring.total); + scanstring.total = Max(32, appendLen); + scanstring.val = (char *) palloc(scanstring.total); scanstring.len = 0; } - - if (s && l) + else { - while(scanstring.len + l + 1 >= scanstring.total) + if (scanstring.len + appendLen >= scanstring.total) { - scanstring.total *= 2; + while (scanstring.len + appendLen >= scanstring.total) + scanstring.total *= 2; scanstring.val = repalloc(scanstring.val, scanstring.total); } - - memcpy(scanstring.val + scanstring.len, s, l); - scanstring.len += l; } } +/* Add set of bytes at "s" of length "l" to scanstring */ static void -addchar(bool init, char s) +addstring(bool init, char *s, int l) { - if (init) - { - scanstring.total = 32; - scanstring.val = palloc(scanstring.total); - scanstring.len = 0; - } - else if(scanstring.len + 1 >= scanstring.total) - { - scanstring.total *= 2; - scanstring.val = repalloc(scanstring.val, scanstring.total); - } + resizeString(init, l + 1); + memcpy(scanstring.val + scanstring.len, s, l); + scanstring.len += l; +} - scanstring.val[ scanstring.len ] = s; - if (s != '\0') +/* Add single byte "c" to scanstring */ +static void +addchar(bool init, char c) +{ + resizeString(init, 1); + scanstring.val[scanstring.len] = c; + if (c != '\0') scanstring.len++; } +/* Interface to jsonpath parser */ JsonPathParseResult * parsejsonpath(const char *str, int len) { @@ -447,7 +456,7 @@ parsejsonpath(const char *str, int len) jsonpath_scanner_init(str, len); - if (jsonpath_yyparse((void*)&parseresult) != 0) + if (jsonpath_yyparse((void *) &parseresult) != 0) jsonpath_yyerror(NULL, "bugus input"); jsonpath_scanner_finish(); @@ -455,6 +464,7 @@ parsejsonpath(const char *str, int len) return parseresult; } +/* Turn hex character into integer */ static int hexval(char c) { @@ -468,6 +478,7 @@ hexval(char c) return 0; /* not reached */ } +/* Add given unicode character to scanstring */ static void addUnicodeChar(int ch) { @@ -515,6 +526,7 @@ addUnicodeChar(int ch) } } +/* Add unicode character and process its hi surrogate */ static void addUnicode(int ch, int *hi_surrogate) { @@ -592,6 +604,7 @@ parseUnicode(char *s, int l) } } +/* Parse sequence of hex-encoded characters */ static void parseHexChars(char *s, int l) { @@ -601,7 +614,8 @@ parseHexChars(char *s, int l) for (i = 0; i < l / 4; i++) { - int ch = (hexval(s[i * 4 + 2]) << 4) | hexval(s[i * 4 + 3]); + int ch = (hexval(s[i * 4 + 2]) << 4) | + hexval(s[i * 4 + 3]); addUnicodeChar(ch); } |