2 files changed, 131 insertions, 69 deletions
diff --git a/src/include/parser/gramparse.h b/src/include/parser/gramparse.h
index 09c99091361..41774028b5b 100644
--- a/src/include/parser/gramparse.h
+++ b/src/include/parser/gramparse.h
@@ -11,7 +11,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.49 2009/11/05 23:24:26 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.50 2009/11/09 18:38:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -20,20 +20,11 @@
 #define GRAMPARSE_H
 
 #include "nodes/parsenodes.h"
-#include "parser/keywords.h"
+#include "parser/scanner.h"
 
 /*
- * We track token locations in terms of byte offsets from the start of the
- * source string, not the column number/line number representation that
- * bison uses by default.  Also, to minimize overhead we track only one
- * location (usually the first token location) for each construct, not
- * the beginning and ending locations as bison does by default.  It's
- * therefore sufficient to make YYLTYPE an int.
- */
-#define YYLTYPE  int
-
-/*
- * After defining YYLTYPE, it's safe to include gram.h.
+ * NB: include gram.h only AFTER including scanner.h, because scanner.h
+ * is what #defines YYLTYPE.
  */
 #include "parser/gram.h"
 
@@ -44,63 +35,25 @@
 typedef struct base_yy_extra_type
 {
 	/*
-	 * The string the lexer is physically scanning.  We keep this mainly so
-	 * that we can cheaply compute the offset of the current token (yytext).
+	 * Fields used by the core scanner.
 	 */
-	char	   *scanbuf;
-	Size		scanbuflen;
+	core_yy_extra_type core_yy_extra;
 
 	/*
-	 * The keyword list to use.
-	 */
-	const ScanKeyword *keywords;
-	int			num_keywords;
-
-	/*
-	 * literalbuf is used to accumulate literal values when multiple rules
-	 * are needed to parse a single literal.  Call startlit() to reset buffer
-	 * to empty, addlit() to add text.  NOTE: the string in literalbuf is
-	 * NOT necessarily null-terminated, but there always IS room to add a
-	 * trailing null at offset literallen.  We store a null only when we
-	 * need it.
-	 */
-	char	   *literalbuf;		/* palloc'd expandable buffer */
-	int			literallen;		/* actual current string length */
-	int			literalalloc;	/* current allocated buffer size */
-
-	int			xcdepth;		/* depth of nesting in slash-star comments */
-	char	   *dolqstart;		/* current $foo$ quote start string */
-
-	/* first part of UTF16 surrogate pair for Unicode escapes */
-	int32		utf16_first_part;
-
-	/* state variables for literal-lexing warnings */
-	bool		warn_on_first_escape;
-	bool		saw_non_ascii;
-
-	/*
-	 * State variables for filtered_base_yylex().
+	 * State variables for base_yylex().
 	 */
 	bool		have_lookahead;		/* is lookahead info valid? */
 	int			lookahead_token;	/* one-token lookahead */
-	YYSTYPE		lookahead_yylval;	/* yylval for lookahead token */
+	core_YYSTYPE lookahead_yylval;	/* yylval for lookahead token */
 	YYLTYPE		lookahead_yylloc;	/* yylloc for lookahead token */
 
 	/*
-	 * State variables that belong to the grammar, not the lexer.  It's
-	 * simpler to keep these here than to invent a separate structure.
-	 * These fields are unused/undefined if the lexer is invoked on its own.
+	 * State variables that belong to the grammar.
 	 */
-
 	List	   *parsetree;		/* final parse result is delivered here */
 } base_yy_extra_type;
 
 /*
- * The type of yyscanner is opaque outside scan.l.
- */
-typedef void *base_yyscan_t;
-
-/*
  * In principle we should use yyget_extra() to fetch the yyextra field
  * from a yyscanner struct.  However, flex always puts that field first,
  * and this is sufficiently performance-critical to make it seem worth
@@ -110,22 +63,11 @@ typedef void *base_yyscan_t;
 
 
 /* from parser.c */
-extern int	filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
-								base_yyscan_t yyscanner);
-
-/* from scan.l */
-extern base_yyscan_t scanner_init(const char *str,
-								  base_yy_extra_type *yyext,
-								  const ScanKeyword *keywords,
-								  int num_keywords);
-extern void scanner_finish(base_yyscan_t yyscanner);
 extern int	base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
-					   base_yyscan_t yyscanner);
-extern int	scanner_errposition(int location, base_yyscan_t yyscanner);
-extern void scanner_yyerror(const char *message, base_yyscan_t yyscanner);
+					   core_yyscan_t yyscanner);
 
 /* from gram.y */
 extern void parser_init(base_yy_extra_type *yyext);
-extern int	base_yyparse(base_yyscan_t yyscanner);
+extern int	base_yyparse(core_yyscan_t yyscanner);
 
 #endif   /* GRAMPARSE_H */
diff --git a/src/include/parser/scanner.h b/src/include/parser/scanner.h
new file mode 100644
index 00000000000..ccab1db862c
--- /dev/null
+++ b/src/include/parser/scanner.h
@@ -0,0 +1,120 @@
+/*-------------------------------------------------------------------------
+ *
+ * scanner.h
+ *		API for the core scanner (flex machine)
+ *
+ * The core scanner is also used by PL/pgsql, so we provide a public API
+ * for it.  However, the rest of the backend is only expected to use the
+ * higher-level API provided by parser.h.
+ *
+ *
+ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $PostgreSQL: pgsql/src/include/parser/scanner.h,v 1.1 2009/11/09 18:38:48 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef SCANNER_H
+#define SCANNER_H
+
+#include "parser/keywords.h"
+
+/*
+ * The scanner returns extra data about scanned tokens in this union type.
+ * Note that this is a subset of the fields used in YYSTYPE of the bison
+ * parsers built atop the scanner.
+ */
+typedef union core_YYSTYPE
+{
+	int			ival;			/* for integer literals */
+	char	   *str;			/* for identifiers and non-integer literals */
+	const char *keyword;		/* canonical spelling of keywords */
+} core_YYSTYPE;
+
+/*
+ * We track token locations in terms of byte offsets from the start of the
+ * source string, not the column number/line number representation that
+ * bison uses by default.  Also, to minimize overhead we track only one
+ * location (usually the first token location) for each construct, not
+ * the beginning and ending locations as bison does by default.  It's
+ * therefore sufficient to make YYLTYPE an int.
+ */
+#define YYLTYPE  int
+
+/*
+ * Another important component of the scanner's API is the token code numbers.
+ * However, those are not defined in this file, because bison insists on
+ * defining them for itself.  The token codes used by the core scanner are
+ * the ASCII characters plus these:
+ *	%token <str>	IDENT FCONST SCONST BCONST XCONST Op
+ *	%token <ival>	ICONST PARAM
+ *	%token			TYPECAST DOT_DOT COLON_EQUALS
+ * The above token definitions *must* be the first ones declared in any
+ * bison parser built atop this scanner, so that they will have consistent
+ * numbers assigned to them (specifically, IDENT = 258 and so on).
+ */
+
+/*
+ * The YY_EXTRA data that a flex scanner allows us to pass around.
+ * Private state needed by the core scanner goes here.  Note that the actual
+ * yy_extra struct may be larger and have this as its first component, thus
+ * allowing the calling parser to keep some fields of its own in YY_EXTRA.
+ */
+typedef struct core_yy_extra_type
+{
+	/*
+	 * The string the scanner is physically scanning.  We keep this mainly so
+	 * that we can cheaply compute the offset of the current token (yytext).
+	 */
+	char	   *scanbuf;
+	Size		scanbuflen;
+
+	/*
+	 * The keyword list to use.
+	 */
+	const ScanKeyword *keywords;
+	int			num_keywords;
+
+	/*
+	 * literalbuf is used to accumulate literal values when multiple rules
+	 * are needed to parse a single literal.  Call startlit() to reset buffer
+	 * to empty, addlit() to add text.  NOTE: the string in literalbuf is
+	 * NOT necessarily null-terminated, but there always IS room to add a
+	 * trailing null at offset literallen.  We store a null only when we
+	 * need it.
+	 */
+	char	   *literalbuf;		/* palloc'd expandable buffer */
+	int			literallen;		/* actual current string length */
+	int			literalalloc;	/* current allocated buffer size */
+
+	int			xcdepth;		/* depth of nesting in slash-star comments */
+	char	   *dolqstart;		/* current $foo$ quote start string */
+
+	/* first part of UTF16 surrogate pair for Unicode escapes */
+	int32		utf16_first_part;
+
+	/* state variables for literal-lexing warnings */
+	bool		warn_on_first_escape;
+	bool		saw_non_ascii;
+} core_yy_extra_type;
+
+/*
+ * The type of yyscanner is opaque outside scan.l.
+ */
+typedef void *core_yyscan_t;
+
+
+/* Entry points in parser/scan.l */
+extern core_yyscan_t scanner_init(const char *str,
+								  core_yy_extra_type *yyext,
+								  const ScanKeyword *keywords,
+								  int num_keywords);
+extern void scanner_finish(core_yyscan_t yyscanner);
+extern int	core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp,
+					   core_yyscan_t yyscanner);
+extern int	scanner_errposition(int location, core_yyscan_t yyscanner);
+extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner);
+
+#endif   /* SCANNER_H */