diff options
Diffstat (limited to 'src/include/parser')
-rw-r--r-- | src/include/parser/gramparse.h | 80 | ||||
-rw-r--r-- | src/include/parser/scanner.h | 120 |
2 files changed, 131 insertions, 69 deletions
diff --git a/src/include/parser/gramparse.h b/src/include/parser/gramparse.h index 09c99091361..41774028b5b 100644 --- a/src/include/parser/gramparse.h +++ b/src/include/parser/gramparse.h @@ -11,7 +11,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.49 2009/11/05 23:24:26 tgl Exp $ + * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.50 2009/11/09 18:38:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,20 +20,11 @@ #define GRAMPARSE_H #include "nodes/parsenodes.h" -#include "parser/keywords.h" +#include "parser/scanner.h" /* - * We track token locations in terms of byte offsets from the start of the - * source string, not the column number/line number representation that - * bison uses by default. Also, to minimize overhead we track only one - * location (usually the first token location) for each construct, not - * the beginning and ending locations as bison does by default. It's - * therefore sufficient to make YYLTYPE an int. - */ -#define YYLTYPE int - -/* - * After defining YYLTYPE, it's safe to include gram.h. + * NB: include gram.h only AFTER including scanner.h, because scanner.h + * is what #defines YYLTYPE. */ #include "parser/gram.h" @@ -44,63 +35,25 @@ typedef struct base_yy_extra_type { /* - * The string the lexer is physically scanning. We keep this mainly so - * that we can cheaply compute the offset of the current token (yytext). + * Fields used by the core scanner. */ - char *scanbuf; - Size scanbuflen; + core_yy_extra_type core_yy_extra; /* - * The keyword list to use. - */ - const ScanKeyword *keywords; - int num_keywords; - - /* - * literalbuf is used to accumulate literal values when multiple rules - * are needed to parse a single literal. Call startlit() to reset buffer - * to empty, addlit() to add text. NOTE: the string in literalbuf is - * NOT necessarily null-terminated, but there always IS room to add a - * trailing null at offset literallen. We store a null only when we - * need it. - */ - char *literalbuf; /* palloc'd expandable buffer */ - int literallen; /* actual current string length */ - int literalalloc; /* current allocated buffer size */ - - int xcdepth; /* depth of nesting in slash-star comments */ - char *dolqstart; /* current $foo$ quote start string */ - - /* first part of UTF16 surrogate pair for Unicode escapes */ - int32 utf16_first_part; - - /* state variables for literal-lexing warnings */ - bool warn_on_first_escape; - bool saw_non_ascii; - - /* - * State variables for filtered_base_yylex(). + * State variables for base_yylex(). */ bool have_lookahead; /* is lookahead info valid? */ int lookahead_token; /* one-token lookahead */ - YYSTYPE lookahead_yylval; /* yylval for lookahead token */ + core_YYSTYPE lookahead_yylval; /* yylval for lookahead token */ YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */ /* - * State variables that belong to the grammar, not the lexer. It's - * simpler to keep these here than to invent a separate structure. - * These fields are unused/undefined if the lexer is invoked on its own. + * State variables that belong to the grammar. */ - List *parsetree; /* final parse result is delivered here */ } base_yy_extra_type; /* - * The type of yyscanner is opaque outside scan.l. - */ -typedef void *base_yyscan_t; - -/* * In principle we should use yyget_extra() to fetch the yyextra field * from a yyscanner struct. However, flex always puts that field first, * and this is sufficiently performance-critical to make it seem worth @@ -110,22 +63,11 @@ typedef void *base_yyscan_t; /* from parser.c */ -extern int filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, - base_yyscan_t yyscanner); - -/* from scan.l */ -extern base_yyscan_t scanner_init(const char *str, - base_yy_extra_type *yyext, - const ScanKeyword *keywords, - int num_keywords); -extern void scanner_finish(base_yyscan_t yyscanner); extern int base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, - base_yyscan_t yyscanner); -extern int scanner_errposition(int location, base_yyscan_t yyscanner); -extern void scanner_yyerror(const char *message, base_yyscan_t yyscanner); + core_yyscan_t yyscanner); /* from gram.y */ extern void parser_init(base_yy_extra_type *yyext); -extern int base_yyparse(base_yyscan_t yyscanner); +extern int base_yyparse(core_yyscan_t yyscanner); #endif /* GRAMPARSE_H */ diff --git a/src/include/parser/scanner.h b/src/include/parser/scanner.h new file mode 100644 index 00000000000..ccab1db862c --- /dev/null +++ b/src/include/parser/scanner.h @@ -0,0 +1,120 @@ +/*------------------------------------------------------------------------- + * + * scanner.h + * API for the core scanner (flex machine) + * + * The core scanner is also used by PL/pgsql, so we provide a public API + * for it. However, the rest of the backend is only expected to use the + * higher-level API provided by parser.h. + * + * + * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL: pgsql/src/include/parser/scanner.h,v 1.1 2009/11/09 18:38:48 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#ifndef SCANNER_H +#define SCANNER_H + +#include "parser/keywords.h" + +/* + * The scanner returns extra data about scanned tokens in this union type. + * Note that this is a subset of the fields used in YYSTYPE of the bison + * parsers built atop the scanner. + */ +typedef union core_YYSTYPE +{ + int ival; /* for integer literals */ + char *str; /* for identifiers and non-integer literals */ + const char *keyword; /* canonical spelling of keywords */ +} core_YYSTYPE; + +/* + * We track token locations in terms of byte offsets from the start of the + * source string, not the column number/line number representation that + * bison uses by default. Also, to minimize overhead we track only one + * location (usually the first token location) for each construct, not + * the beginning and ending locations as bison does by default. It's + * therefore sufficient to make YYLTYPE an int. + */ +#define YYLTYPE int + +/* + * Another important component of the scanner's API is the token code numbers. + * However, those are not defined in this file, because bison insists on + * defining them for itself. The token codes used by the core scanner are + * the ASCII characters plus these: + * %token <str> IDENT FCONST SCONST BCONST XCONST Op + * %token <ival> ICONST PARAM + * %token TYPECAST DOT_DOT COLON_EQUALS + * The above token definitions *must* be the first ones declared in any + * bison parser built atop this scanner, so that they will have consistent + * numbers assigned to them (specifically, IDENT = 258 and so on). + */ + +/* + * The YY_EXTRA data that a flex scanner allows us to pass around. + * Private state needed by the core scanner goes here. Note that the actual + * yy_extra struct may be larger and have this as its first component, thus + * allowing the calling parser to keep some fields of its own in YY_EXTRA. + */ +typedef struct core_yy_extra_type +{ + /* + * The string the scanner is physically scanning. We keep this mainly so + * that we can cheaply compute the offset of the current token (yytext). + */ + char *scanbuf; + Size scanbuflen; + + /* + * The keyword list to use. + */ + const ScanKeyword *keywords; + int num_keywords; + + /* + * literalbuf is used to accumulate literal values when multiple rules + * are needed to parse a single literal. Call startlit() to reset buffer + * to empty, addlit() to add text. NOTE: the string in literalbuf is + * NOT necessarily null-terminated, but there always IS room to add a + * trailing null at offset literallen. We store a null only when we + * need it. + */ + char *literalbuf; /* palloc'd expandable buffer */ + int literallen; /* actual current string length */ + int literalalloc; /* current allocated buffer size */ + + int xcdepth; /* depth of nesting in slash-star comments */ + char *dolqstart; /* current $foo$ quote start string */ + + /* first part of UTF16 surrogate pair for Unicode escapes */ + int32 utf16_first_part; + + /* state variables for literal-lexing warnings */ + bool warn_on_first_escape; + bool saw_non_ascii; +} core_yy_extra_type; + +/* + * The type of yyscanner is opaque outside scan.l. + */ +typedef void *core_yyscan_t; + + +/* Entry points in parser/scan.l */ +extern core_yyscan_t scanner_init(const char *str, + core_yy_extra_type *yyext, + const ScanKeyword *keywords, + int num_keywords); +extern void scanner_finish(core_yyscan_t yyscanner); +extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp, + core_yyscan_t yyscanner); +extern int scanner_errposition(int location, core_yyscan_t yyscanner); +extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner); + +#endif /* SCANNER_H */ |