1 files changed, 945 insertions, 9 deletions
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
index 98d6e66a217..3d1bd37ac26 100644
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -43,6 +43,169 @@ typedef enum					/* contexts of JSON parser */
 	JSON_PARSE_END,				/* saw the end of a document, expect nothing */
 } JsonParseContext;
 
+/*
+ * Setup for table-driven parser.
+ * These enums need to be separate from the JsonTokenType and from each other
+ * so we can have all of them on the prediction stack, which consists of
+ * tokens, non-terminals, and semantic action markers.
+ */
+
+typedef enum
+{
+	JSON_NT_JSON = 32,
+	JSON_NT_ARRAY_ELEMENTS,
+	JSON_NT_MORE_ARRAY_ELEMENTS,
+	JSON_NT_KEY_PAIRS,
+	JSON_NT_MORE_KEY_PAIRS,
+} JsonNonTerminal;
+
+typedef enum
+{
+	JSON_SEM_OSTART = 64,
+	JSON_SEM_OEND,
+	JSON_SEM_ASTART,
+	JSON_SEM_AEND,
+	JSON_SEM_OFIELD_INIT,
+	JSON_SEM_OFIELD_START,
+	JSON_SEM_OFIELD_END,
+	JSON_SEM_AELEM_START,
+	JSON_SEM_AELEM_END,
+	JSON_SEM_SCALAR_INIT,
+	JSON_SEM_SCALAR_CALL,
+} JsonParserSem;
+
+/*
+ * struct containing the 3 stacks used in non-recursive parsing,
+ * and the token and value for scalars that need to be preserved
+ * across calls.
+ */
+typedef struct JsonParserStack
+{
+	int			stack_size;
+	char	   *prediction;
+	int			pred_index;
+	/* these two are indexed by lex_level */
+	char	  **fnames;
+	bool	   *fnull;
+	JsonTokenType scalar_tok;
+	char	   *scalar_val;
+} JsonParserStack;
+
+/*
+ * struct containing state used when there is a possible partial token at the
+ * end of a json chunk when we are doing incremental parsing.
+ */
+typedef struct JsonIncrementalState
+{
+	bool		is_last_chunk;
+	bool		partial_completed;
+	StringInfoData partial_token;
+} JsonIncrementalState;
+
+/*
+ * constants and macros used in the nonrecursive parser
+ */
+#define JSON_NUM_TERMINALS 13
+#define JSON_NUM_NONTERMINALS 5
+#define JSON_NT_OFFSET JSON_NT_JSON
+/* for indexing the table */
+#define OFS(NT) (NT) - JSON_NT_OFFSET
+/* classify items we get off the stack */
+#define IS_SEM(x) ((x) & 0x40)
+#define IS_NT(x)  ((x) & 0x20)
+
+/*
+ * These productions are stored in reverse order right to left so that when
+ * they are pushed on the stack what we expect next is at the top of the stack.
+ */
+static char JSON_PROD_EPSILON[] = {0};	/* epsilon - an empty production */
+
+/* JSON -> string */
+static char JSON_PROD_SCALAR_STRING[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_STRING, JSON_SEM_SCALAR_INIT, 0};
+
+/* JSON -> number */
+static char JSON_PROD_SCALAR_NUMBER[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_NUMBER, JSON_SEM_SCALAR_INIT, 0};
+
+/* JSON -> 'true' */
+static char JSON_PROD_SCALAR_TRUE[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_TRUE, JSON_SEM_SCALAR_INIT, 0};
+
+/* JSON -> 'false' */
+static char JSON_PROD_SCALAR_FALSE[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_FALSE, JSON_SEM_SCALAR_INIT, 0};
+
+/* JSON -> 'null' */
+static char JSON_PROD_SCALAR_NULL[] = {JSON_SEM_SCALAR_CALL, JSON_TOKEN_NULL, JSON_SEM_SCALAR_INIT, 0};
+
+/* JSON -> '{' KEY_PAIRS '}' */
+static char JSON_PROD_OBJECT[] = {JSON_SEM_OEND, JSON_TOKEN_OBJECT_END, JSON_NT_KEY_PAIRS, JSON_TOKEN_OBJECT_START, JSON_SEM_OSTART, 0};
+
+/* JSON -> '[' ARRAY_ELEMENTS ']' */
+static char JSON_PROD_ARRAY[] = {JSON_SEM_AEND, JSON_TOKEN_ARRAY_END, JSON_NT_ARRAY_ELEMENTS, JSON_TOKEN_ARRAY_START, JSON_SEM_ASTART, 0};
+
+/* ARRAY_ELEMENTS -> JSON MORE_ARRAY_ELEMENTS */
+static char JSON_PROD_ARRAY_ELEMENTS[] = {JSON_NT_MORE_ARRAY_ELEMENTS, JSON_SEM_AELEM_END, JSON_NT_JSON, JSON_SEM_AELEM_START, 0};
+
+/* MORE_ARRAY_ELEMENTS -> ',' JSON MORE_ARRAY_ELEMENTS */
+static char JSON_PROD_MORE_ARRAY_ELEMENTS[] = {JSON_NT_MORE_ARRAY_ELEMENTS, JSON_SEM_AELEM_END, JSON_NT_JSON, JSON_SEM_AELEM_START, JSON_TOKEN_COMMA, 0};
+
+/* KEY_PAIRS -> string ':' JSON MORE_KEY_PAIRS */
+static char JSON_PROD_KEY_PAIRS[] = {JSON_NT_MORE_KEY_PAIRS, JSON_SEM_OFIELD_END, JSON_NT_JSON, JSON_SEM_OFIELD_START, JSON_TOKEN_COLON, JSON_TOKEN_STRING, JSON_SEM_OFIELD_INIT, 0};
+
+/* MORE_KEY_PAIRS -> ',' string ':'  JSON MORE_KEY_PAIRS */
+static char JSON_PROD_MORE_KEY_PAIRS[] = {JSON_NT_MORE_KEY_PAIRS, JSON_SEM_OFIELD_END, JSON_NT_JSON, JSON_SEM_OFIELD_START, JSON_TOKEN_COLON, JSON_TOKEN_STRING, JSON_SEM_OFIELD_INIT, JSON_TOKEN_COMMA, 0};
+
+/*
+ * Note: there are also epsilon productions for ARRAY_ELEMENTS,
+ * MORE_ARRAY_ELEMENTS, KEY_PAIRS and MORE_KEY_PAIRS
+ * They are all the same as none require any semantic actions.
+ */
+
+/*
+ * Table connecting the productions with their director sets of
+ * terminal symbols.
+ * Any combination not specified here represents an error.
+ */
+
+typedef struct
+{
+	size_t		len;
+	char	   *prod;
+}			td_entry;
+
+#define TD_ENTRY(PROD) { sizeof(PROD) - 1, (PROD) }
+
+static td_entry td_parser_table[JSON_NUM_NONTERMINALS][JSON_NUM_TERMINALS] =
+{
+	/* JSON */
+	[OFS(JSON_NT_JSON)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_SCALAR_STRING),
+		[OFS(JSON_NT_JSON)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_SCALAR_NUMBER),
+		[OFS(JSON_NT_JSON)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_SCALAR_TRUE),
+		[OFS(JSON_NT_JSON)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_SCALAR_FALSE),
+		[OFS(JSON_NT_JSON)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_SCALAR_NULL),
+		[OFS(JSON_NT_JSON)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY),
+		[OFS(JSON_NT_JSON)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_OBJECT),
+	/* ARRAY_ELEMENTS */
+		[OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
+		[OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_OBJECT_START] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
+		[OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
+		[OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NUMBER] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
+		[OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_TRUE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
+		[OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_FALSE] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
+		[OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_NULL] = TD_ENTRY(JSON_PROD_ARRAY_ELEMENTS),
+		[OFS(JSON_NT_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON),
+	/* MORE_ARRAY_ELEMENTS */
+		[OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_ARRAY_ELEMENTS),
+		[OFS(JSON_NT_MORE_ARRAY_ELEMENTS)][JSON_TOKEN_ARRAY_END] = TD_ENTRY(JSON_PROD_EPSILON),
+	/* KEY_PAIRS */
+		[OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_STRING] = TD_ENTRY(JSON_PROD_KEY_PAIRS),
+		[OFS(JSON_NT_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON),
+	/* MORE_KEY_PAIRS */
+		[OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_COMMA] = TD_ENTRY(JSON_PROD_MORE_KEY_PAIRS),
+		[OFS(JSON_NT_MORE_KEY_PAIRS)][JSON_TOKEN_OBJECT_END] = TD_ENTRY(JSON_PROD_EPSILON),
+};
+
+/* the GOAL production. Not stored in the table, but will be the initial contents of the prediction stack */
+static char JSON_PROD_GOAL[] = {JSON_TOKEN_END, JSON_NT_JSON, 0};
+
 static inline JsonParseErrorType json_lex_string(JsonLexContext *lex);
 static inline JsonParseErrorType json_lex_number(JsonLexContext *lex, char *s,
 												 bool *num_err, int *total_len);
@@ -60,7 +223,7 @@ JsonSemAction nullSemAction =
 	NULL, NULL, NULL, NULL, NULL
 };
 
-/* Recursive Descent parser support routines */
+/* Parser support routines */
 
 /*
  * lex_peek
@@ -111,6 +274,8 @@ IsValidJsonNumber(const char *str, int len)
 	if (len <= 0)
 		return false;
 
+	dummy_lex.incremental = false;
+
 	/*
 	 * json_lex_number expects a leading  '-' to have been eaten already.
 	 *
@@ -175,6 +340,130 @@ makeJsonLexContextCstringLen(JsonLexContext *lex, char *json,
 	return lex;
 }
 
+
+/*
+ * makeJsonLexContextIncremental
+ *
+ * Similar to above but set up for use in incremental parsing. That means we
+ * need explicit stacks for predictions, field names and null indicators, but
+ * we don't need the input, that will be handed in bit by bit to the
+ * parse routine. We also need an accumulator for partial tokens in case
+ * the boundary between chunks happns to fall in the middle of a token.
+ */
+#define JS_STACK_CHUNK_SIZE 64
+#define JS_MAX_PROD_LEN 10		/* more than we need */
+#define JSON_TD_MAX_STACK 6400	/* hard coded for now - this is a REALLY high
+								 * number */
+
+JsonLexContext *
+makeJsonLexContextIncremental(JsonLexContext *lex, int encoding,
+							  bool need_escapes)
+{
+	if (lex == NULL)
+	{
+		lex = palloc0(sizeof(JsonLexContext));
+		lex->flags |= JSONLEX_FREE_STRUCT;
+	}
+	else
+		memset(lex, 0, sizeof(JsonLexContext));
+
+	lex->line_number = 1;
+	lex->input_encoding = encoding;
+	lex->incremental = true;
+	lex->inc_state = palloc0(sizeof(JsonIncrementalState));
+	initStringInfo(&(lex->inc_state->partial_token));
+	lex->pstack = palloc(sizeof(JsonParserStack));
+	lex->pstack->stack_size = JS_STACK_CHUNK_SIZE;
+	lex->pstack->prediction = palloc(JS_STACK_CHUNK_SIZE * JS_MAX_PROD_LEN);
+	lex->pstack->pred_index = 0;
+	lex->pstack->fnames = palloc(JS_STACK_CHUNK_SIZE * sizeof(char *));
+	lex->pstack->fnull = palloc(JS_STACK_CHUNK_SIZE * sizeof(bool));
+	if (need_escapes)
+	{
+		lex->strval = makeStringInfo();
+		lex->flags |= JSONLEX_FREE_STRVAL;
+	}
+	return lex;
+}
+
+static inline void
+inc_lex_level(JsonLexContext *lex)
+{
+	lex->lex_level += 1;
+
+	if (lex->incremental && lex->lex_level >= lex->pstack->stack_size)
+	{
+		lex->pstack->stack_size += JS_STACK_CHUNK_SIZE;
+		lex->pstack->prediction =
+			repalloc(lex->pstack->prediction,
+					 lex->pstack->stack_size * JS_MAX_PROD_LEN);
+		if (lex->pstack->fnames)
+			lex->pstack->fnames =
+				repalloc(lex->pstack->fnames,
+						 lex->pstack->stack_size * sizeof(char *));
+		if (lex->pstack->fnull)
+			lex->pstack->fnull =
+				repalloc(lex->pstack->fnull, lex->pstack->stack_size * sizeof(bool));
+	}
+}
+
+static inline void
+dec_lex_level(JsonLexContext *lex)
+{
+	lex->lex_level -= 1;
+}
+
+static inline void
+push_prediction(JsonParserStack *pstack, td_entry entry)
+{
+	memcpy(pstack->prediction + pstack->pred_index, entry.prod, entry.len);
+	pstack->pred_index += entry.len;
+}
+
+static inline char
+pop_prediction(JsonParserStack *pstack)
+{
+	Assert(pstack->pred_index > 0);
+	return pstack->prediction[--pstack->pred_index];
+}
+
+static inline char
+next_prediction(JsonParserStack *pstack)
+{
+	Assert(pstack->pred_index > 0);
+	return pstack->prediction[pstack->pred_index - 1];
+}
+
+static inline bool
+have_prediction(JsonParserStack *pstack)
+{
+	return pstack->pred_index > 0;
+}
+
+static inline void
+set_fname(JsonLexContext *lex, char *fname)
+{
+	lex->pstack->fnames[lex->lex_level] = fname;
+}
+
+static inline char *
+get_fname(JsonLexContext *lex)
+{
+	return lex->pstack->fnames[lex->lex_level];
+}
+
+static inline void
+set_fnull(JsonLexContext *lex, bool fnull)
+{
+	lex->pstack->fnull[lex->lex_level] = fnull;
+}
+
+static inline bool
+get_fnull(JsonLexContext *lex)
+{
+	return lex->pstack->fnull[lex->lex_level];
+}
+
 /*
  * Free memory in a JsonLexContext.
  *
@@ -192,7 +481,18 @@ freeJsonLexContext(JsonLexContext *lex)
 		destroyStringInfo(lex->errormsg);
 
 	if (lex->flags & JSONLEX_FREE_STRUCT)
+	{
+		if (lex->incremental)
+		{
+			pfree(lex->inc_state->partial_token.data);
+			pfree(lex->inc_state);
+			pfree(lex->pstack->prediction);
+			pfree(lex->pstack->fnames);
+			pfree(lex->pstack->fnull);
+			pfree(lex->pstack);
+		}
 		pfree(lex);
+	}
 }
 
 /*
@@ -204,13 +504,44 @@ freeJsonLexContext(JsonLexContext *lex)
  * makeJsonLexContext(). sem is a structure of function pointers to semantic
  * action routines to be called at appropriate spots during parsing, and a
  * pointer to a state object to be passed to those routines.
+ *
+ * If FORCE_JSON_PSTACK is defined then the routine will call the non-recursive
+ * JSON parser. This is a useful way to validate that it's doing the right
+ * think at least for non-incremental cases. If this is on we expect to see
+ * regression diffs relating to error messages about stack depth, but no
+ * other differences.
  */
 JsonParseErrorType
 pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
 {
+#ifdef FORCE_JSON_PSTACK
+
+	lex->incremental = true;
+	lex->inc_state = palloc0(sizeof(JsonIncrementalState));
+
+	/*
+	 * We don't need partial token processing, there is only one chunk. But we
+	 * still need to init the partial token string so that freeJsonLexContext
+	 * works.
+	 */
+	initStringInfo(&(lex->inc_state->partial_token));
+	lex->pstack = palloc(sizeof(JsonParserStack));
+	lex->pstack->stack_size = JS_STACK_CHUNK_SIZE;
+	lex->pstack->prediction = palloc(JS_STACK_CHUNK_SIZE * JS_MAX_PROD_LEN);
+	lex->pstack->pred_index = 0;
+	lex->pstack->fnames = palloc(JS_STACK_CHUNK_SIZE * sizeof(char *));
+	lex->pstack->fnull = palloc(JS_STACK_CHUNK_SIZE * sizeof(bool));
+
+	return pg_parse_json_incremental(lex, sem, lex->input, lex->input_length, true);
+
+#else
+
 	JsonTokenType tok;
 	JsonParseErrorType result;
 
+	if (lex->incremental)
+		return JSON_INVALID_LEXER_TYPE;
+
 	/* get the initial token */
 	result = json_lex(lex);
 	if (result != JSON_SUCCESS)
@@ -235,6 +566,7 @@ pg_parse_json(JsonLexContext *lex, JsonSemAction *sem)
 		result = lex_expect(JSON_PARSE_END, lex, JSON_TOKEN_END);
 
 	return result;
+#endif
 }
 
 /*
@@ -291,6 +623,372 @@ json_count_array_elements(JsonLexContext *lex, int *elements)
 }
 
 /*
+ * pg_parse_json_incremental
+ *
+ * Routine for incremental parsing of json. This uses the non-recursive top
+ * down method of the Dragon Book Algorithm 4.3. It's somewhat slower than
+ * the Recursive Descent pattern used above, so we only use it for incremental
+ * parsing of JSON.
+ *
+ * The lexing context needs to be set up by a call to
+ * makeJsonLexContextIncremental(). sem is a structure of function pointers
+ * to semantic action routines, which should function exactly as those used
+ * in the recursive descent parser.
+ *
+ * This routine can be called repeatedly with chunks of JSON. On the final
+ * chunk is_last must be set to true. len is the length of the json chunk,
+ * which does not need to be null terminated.
+ */
+JsonParseErrorType
+pg_parse_json_incremental(JsonLexContext *lex,
+						  JsonSemAction *sem,
+						  char *json,
+						  int len,
+						  bool is_last)
+{
+	JsonTokenType tok;
+	JsonParseErrorType result;
+	JsonParseContext ctx = JSON_PARSE_VALUE;
+	JsonParserStack *pstack = lex->pstack;
+
+
+	if (!lex->incremental)
+		return JSON_INVALID_LEXER_TYPE;
+
+	lex->input = lex->token_terminator = lex->line_start = json;
+	lex->input_length = len;
+	lex->inc_state->is_last_chunk = is_last;
+
+	/* get the initial token */
+	result = json_lex(lex);
+	if (result != JSON_SUCCESS)
+		return result;
+
+	tok = lex_peek(lex);
+
+	/* use prediction stack for incremental parsing */
+
+	if (!have_prediction(pstack))
+	{
+		td_entry	goal = TD_ENTRY(JSON_PROD_GOAL);
+
+		push_prediction(pstack, goal);
+	}
+
+	while (have_prediction(pstack))
+	{
+		char		top = pop_prediction(pstack);
+		td_entry	entry;
+
+		/*
+		 * these first two branches are the guts of the Table Driven method
+		 */
+		if (top == tok)
+		{
+			/*
+			 * tok can only be a terminal symbol, so top must be too. the
+			 * token matches the top of the stack, so get the next token.
+			 */
+			if (tok < JSON_TOKEN_END)
+			{
+				result = json_lex(lex);
+				if (result != JSON_SUCCESS)
+					return result;
+				tok = lex_peek(lex);
+			}
+		}
+		else if (IS_NT(top) && (entry = td_parser_table[OFS(top)][tok]).prod != NULL)
+		{
+			/*
+			 * the token is in the director set for a production of the
+			 * non-terminal at the top of the stack, so push the reversed RHS
+			 * of the production onto the stack.
+			 */
+			push_prediction(pstack, entry);
+		}
+		else if (IS_SEM(top))
+		{
+			/*
+			 * top is a semantic action marker, so take action accordingly.
+			 * It's important to have these markers in the prediction stack
+			 * before any token they might need so we don't advance the token
+			 * prematurely. Note in a couple of cases we need to do something
+			 * both before and after the token.
+			 */
+			switch (top)
+			{
+				case JSON_SEM_OSTART:
+					{
+						json_struct_action ostart = sem->object_start;
+
+						if (lex->lex_level >= JSON_TD_MAX_STACK)
+							return JSON_NESTING_TOO_DEEP;
+
+						if (ostart != NULL)
+						{
+							result = (*ostart) (sem->semstate);
+							if (result != JSON_SUCCESS)
+								return result;
+						}
+						inc_lex_level(lex);
+					}
+					break;
+				case JSON_SEM_OEND:
+					{
+						json_struct_action oend = sem->object_end;
+
+						dec_lex_level(lex);
+						if (oend != NULL)
+						{
+							result = (*oend) (sem->semstate);
+							if (result != JSON_SUCCESS)
+								return result;
+						}
+					}
+					break;
+				case JSON_SEM_ASTART:
+					{
+						json_struct_action astart = sem->array_start;
+
+						if (lex->lex_level >= JSON_TD_MAX_STACK)
+							return JSON_NESTING_TOO_DEEP;
+
+						if (astart != NULL)
+						{
+							result = (*astart) (sem->semstate);
+							if (result != JSON_SUCCESS)
+								return result;
+						}
+						inc_lex_level(lex);
+					}
+					break;
+				case JSON_SEM_AEND:
+					{
+						json_struct_action aend = sem->array_end;
+
+						dec_lex_level(lex);
+						if (aend != NULL)
+						{
+							result = (*aend) (sem->semstate);
+							if (result != JSON_SUCCESS)
+								return result;
+						}
+					}
+					break;
+				case JSON_SEM_OFIELD_INIT:
+					{
+						/*
+						 * all we do here is save out the field name. We have
+						 * to wait to get past the ':' to see if the next
+						 * value is null so we can call the semantic routine
+						 */
+						char	   *fname = NULL;
+						json_ofield_action ostart = sem->object_field_start;
+						json_ofield_action oend = sem->object_field_end;
+
+						if ((ostart != NULL || oend != NULL) && lex->strval != NULL)
+						{
+							fname = pstrdup(lex->strval->data);
+						}
+						set_fname(lex, fname);
+					}
+					break;
+				case JSON_SEM_OFIELD_START:
+					{
+						/*
+						 * the current token should be the first token of the
+						 * value
+						 */
+						bool		isnull = tok == JSON_TOKEN_NULL;
+						json_ofield_action ostart = sem->object_field_start;
+
+						set_fnull(lex, isnull);
+
+						if (ostart != NULL)
+						{
+							char	   *fname = get_fname(lex);
+
+							result = (*ostart) (sem->semstate, fname, isnull);
+							if (result != JSON_SUCCESS)
+								return result;
+						}
+					}
+					break;
+				case JSON_SEM_OFIELD_END:
+					{
+						json_ofield_action oend = sem->object_field_end;
+
+						if (oend != NULL)
+						{
+							char	   *fname = get_fname(lex);
+							bool		isnull = get_fnull(lex);
+
+							result = (*oend) (sem->semstate, fname, isnull);
+							if (result != JSON_SUCCESS)
+								return result;
+						}
+					}
+					break;
+				case JSON_SEM_AELEM_START:
+					{
+						json_aelem_action astart = sem->array_element_start;
+						bool		isnull = tok == JSON_TOKEN_NULL;
+
+						set_fnull(lex, isnull);
+
+						if (astart != NULL)
+						{
+							result = (*astart) (sem->semstate, isnull);
+							if (result != JSON_SUCCESS)
+								return result;
+						}
+					}
+					break;
+				case JSON_SEM_AELEM_END:
+					{
+						json_aelem_action aend = sem->array_element_end;
+
+						if (aend != NULL)
+						{
+							bool		isnull = get_fnull(lex);
+
+							result = (*aend) (sem->semstate, isnull);
+							if (result != JSON_SUCCESS)
+								return result;
+						}
+					}
+					break;
+				case JSON_SEM_SCALAR_INIT:
+					{
+						json_scalar_action sfunc = sem->scalar;
+
+						pstack->scalar_val = NULL;
+
+						if (sfunc != NULL)
+						{
+							/*
+							 * extract the de-escaped string value, or the raw
+							 * lexeme
+							 */
+							/*
+							 * XXX copied from RD parser but looks like a
+							 * buglet
+							 */
+							if (tok == JSON_TOKEN_STRING)
+							{
+								if (lex->strval != NULL)
+									pstack->scalar_val = pstrdup(lex->strval->data);
+							}
+							else
+							{
+								int			tlen = (lex->token_terminator - lex->token_start);
+
+								pstack->scalar_val = palloc(tlen + 1);
+								memcpy(pstack->scalar_val, lex->token_start, tlen);
+								pstack->scalar_val[tlen] = '\0';
+							}
+							pstack->scalar_tok = tok;
+						}
+					}
+					break;
+				case JSON_SEM_SCALAR_CALL:
+					{
+						/*
+						 * We'd like to be able to get rid of this business of
+						 * two bits of scalar action, but we can't. It breaks
+						 * certain semantic actions which expect that when
+						 * called the lexer has consumed the item. See for
+						 * example get_scalar() in jsonfuncs.c.
+						 */
+						json_scalar_action sfunc = sem->scalar;
+
+						if (sfunc != NULL)
+						{
+							result = (*sfunc) (sem->semstate, pstack->scalar_val, pstack->scalar_tok);
+							if (result != JSON_SUCCESS)
+								return result;
+						}
+					}
+					break;
+				default:
+					/* should not happen */
+					break;
+			}
+		}
+		else
+		{
+			/*
+			 * The token didn't match the stack top if it's a terminal nor a
+			 * production for the stack top if it's a non-terminal.
+			 *
+			 * Various cases here are Asserted to be not possible, as the
+			 * token would not appear at the top of the prediction stack
+			 * unless the lookahead matched.
+			 */
+			switch (top)
+			{
+				case JSON_TOKEN_STRING:
+					if (next_prediction(pstack) == JSON_TOKEN_COLON)
+						ctx = JSON_PARSE_STRING;
+					else
+					{
+						Assert(false);
+						ctx = JSON_PARSE_VALUE;
+					}
+					break;
+				case JSON_TOKEN_NUMBER:
+				case JSON_TOKEN_TRUE:
+				case JSON_TOKEN_FALSE:
+				case JSON_TOKEN_NULL:
+				case JSON_TOKEN_ARRAY_START:
+				case JSON_TOKEN_OBJECT_START:
+					Assert(false);
+					ctx = JSON_PARSE_VALUE;
+					break;
+				case JSON_TOKEN_ARRAY_END:
+					Assert(false);
+					ctx = JSON_PARSE_ARRAY_NEXT;
+					break;
+				case JSON_TOKEN_OBJECT_END:
+					Assert(false);
+					ctx = JSON_PARSE_OBJECT_NEXT;
+					break;
+				case JSON_TOKEN_COMMA:
+					Assert(false);
+					if (next_prediction(pstack) == JSON_TOKEN_STRING)
+						ctx = JSON_PARSE_OBJECT_NEXT;
+					else
+						ctx = JSON_PARSE_ARRAY_NEXT;
+					break;
+				case JSON_TOKEN_COLON:
+					ctx = JSON_PARSE_OBJECT_LABEL;
+					break;
+				case JSON_TOKEN_END:
+					ctx = JSON_PARSE_END;
+					break;
+				case JSON_NT_MORE_ARRAY_ELEMENTS:
+					ctx = JSON_PARSE_ARRAY_NEXT;
+					break;
+				case JSON_NT_ARRAY_ELEMENTS:
+					ctx = JSON_PARSE_ARRAY_START;
+					break;
+				case JSON_NT_MORE_KEY_PAIRS:
+					ctx = JSON_PARSE_OBJECT_NEXT;
+					break;
+				case JSON_NT_KEY_PAIRS:
+					ctx = JSON_PARSE_OBJECT_START;
+					break;
+				default:
+					ctx = JSON_PARSE_VALUE;
+			}
+			return report_parse_error(ctx, lex);
+		}
+	}
+
+	return JSON_SUCCESS;
+}
+
+/*
  *	Recursive Descent parse routines. There is one for each structural
  *	element in a json document:
  *	  - scalar (string, number, true, false, null)
@@ -587,6 +1285,18 @@ parse_array(JsonLexContext *lex, JsonSemAction *sem)
 
 /*
  * Lex one token from the input stream.
+ *
+ * When doing incremental parsing, we can reach the end of the input string
+ * without having (or knowing we have) a complete token. If it's not the
+ * final chunk of input, the partial token is then saved to the lex
+ * structure's ptok StringInfo. On subsequent calls input is appended to this
+ * buffer until we have something that we think is a complete token,
+ * which is then lexed using a recursive call to json_lex. Processing then
+ * continues as normal on subsequent calls.
+ *
+ * Note than when doing incremental processing, the lex.prev_token_terminator
+ * should not be relied on. It could point into a previous input chunk or
+ * worse.
  */
 JsonParseErrorType
 json_lex(JsonLexContext *lex)
@@ -595,8 +1305,202 @@ json_lex(JsonLexContext *lex)
 	char	   *const end = lex->input + lex->input_length;
 	JsonParseErrorType result;
 
-	/* Skip leading whitespace. */
+	if (lex->incremental && lex->inc_state->partial_completed)
+	{
+		/*
+		 * We just lexed a completed partial token on the last call, so reset
+		 * everything
+		 */
+		resetStringInfo(&(lex->inc_state->partial_token));
+		lex->token_terminator = lex->input;
+		lex->inc_state->partial_completed = false;
+	}
+
 	s = lex->token_terminator;
+
+	if (lex->incremental && lex->inc_state->partial_token.len)
+	{
+		/*
+		 * We have a partial token. Extend it and if completed lex it by a
+		 * recursive call
+		 */
+		StringInfo	ptok = &(lex->inc_state->partial_token);
+		int			added = 0;
+		bool		tok_done = false;
+		JsonLexContext dummy_lex;
+		JsonParseErrorType partial_result;
+
+		if (ptok->data[0] == '"')
+		{
+			/*
+			 * It's a string. Accumulate characters until we reach an
+			 * unescaped '"'.
+			 */
+			int			escapes = 0;
+
+			for (int i = ptok->len - 1; i > 0; i--)
+			{
+				/* count the trailing backslashes on the partial token */
+				if (ptok->data[i] == '\\')
+					escapes++;
+				else
+					break;
+			}
+
+			for (int i = 0; i < lex->input_length; i++)
+			{
+				char		c = lex->input[i];
+
+				appendStringInfoCharMacro(ptok, c);
+				added++;
+				if (c == '"' && escapes % 2 == 0)
+				{
+					tok_done = true;
+					break;
+				}
+				if (c == '\\')
+					escapes++;
+				else
+					escapes = 0;
+			}
+		}
+		else
+		{
+			/* not a string */
+			char		c = ptok->data[0];
+
+			if (c == '-' || (c >= '0' && c <= '9'))
+			{
+				/* for numbers look for possible numeric continuations */
+
+				bool		numend = false;
+
+				for (int i = 0; i < lex->input_length && !numend; i++)
+				{
+					char		cc = lex->input[i];
+
+					switch (cc)
+					{
+						case '+':
+						case '-':
+						case 'e':
+						case 'E':
+						case '0':
+						case '1':
+						case '2':
+						case '3':
+						case '4':
+						case '5':
+						case '6':
+						case '7':
+						case '8':
+						case '9':
+							{
+								appendStringInfoCharMacro(ptok, cc);
+								added++;
+							}
+							break;
+						default:
+							numend = true;
+					}
+				}
+			}
+
+			/*
+			 * Add any remaining alpha_numeric chars. This takes care of the
+			 * {null, false, true} literals as well as any trailing
+			 * alpha-numeric junk on non-string tokens.
+			 */
+			for (int i = added; i < lex->input_length; i++)
+			{
+				char		cc = lex->input[i];
+
+				if (JSON_ALPHANUMERIC_CHAR(cc))
+				{
+					appendStringInfoCharMacro(ptok, cc);
+					added++;
+				}
+				else
+				{
+					tok_done = true;
+					break;
+				}
+			}
+			if (added == lex->input_length &&
+				lex->inc_state->is_last_chunk)
+			{
+				tok_done = true;
+			}
+		}
+
+		if (!tok_done)
+		{
+			/* We should have consumed the whole chunk in this case. */
+			Assert(added == lex->input_length);
+
+			if (!lex->inc_state->is_last_chunk)
+				return JSON_INCOMPLETE;
+
+			/* json_errdetail() needs access to the accumulated token. */
+			lex->token_start = ptok->data;
+			lex->token_terminator = ptok->data + ptok->len;
+			return JSON_INVALID_TOKEN;
+		}
+
+		/*
+		 * Everything up to lex->input[added] has been added to the partial
+		 * token, so move the input past it.
+		 */
+		lex->input += added;
+		lex->input_length -= added;
+
+		dummy_lex.input = dummy_lex.token_terminator =
+			dummy_lex.line_start = ptok->data;
+		dummy_lex.line_number = lex->line_number;
+		dummy_lex.input_length = ptok->len;
+		dummy_lex.input_encoding = lex->input_encoding;
+		dummy_lex.incremental = false;
+		dummy_lex.strval = lex->strval;
+
+		partial_result = json_lex(&dummy_lex);
+
+		/*
+		 * We either have a complete token or an error. In either case we need
+		 * to point to the partial token data for the semantic or error
+		 * routines. If it's not an error we'll readjust on the next call to
+		 * json_lex.
+		 */
+		lex->token_type = dummy_lex.token_type;
+		lex->line_number = dummy_lex.line_number;
+
+		/*
+		 * We know the prev_token_terminator must be back in some previous
+		 * piece of input, so we just make it NULL.
+		 */
+		lex->prev_token_terminator = NULL;
+
+		/*
+		 * Normally token_start would be ptok->data, but it could be later,
+		 * see json_lex_string's handling of invalid escapes.
+		 */
+		lex->token_start = dummy_lex.token_start;
+		lex->token_terminator = dummy_lex.token_terminator;
+		if (partial_result == JSON_SUCCESS)
+		{
+			/* make sure we've used all the input */
+			if (lex->token_terminator - lex->token_start != ptok->len)
+			{
+				Assert(false);
+				return JSON_INVALID_TOKEN;
+			}
+
+			lex->inc_state->partial_completed = true;
+		}
+		return partial_result;
+		/* end of partial token processing */
+	}
+
+	/* Skip leading whitespace. */
 	while (s < end && (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))
 	{
 		if (*s++ == '\n')
@@ -708,6 +1612,14 @@ json_lex(JsonLexContext *lex)
 						return JSON_INVALID_TOKEN;
 					}
 
+					if (lex->incremental && !lex->inc_state->is_last_chunk &&
+						p == lex->input + lex->input_length)
+					{
+						appendBinaryStringInfo(
+											   &(lex->inc_state->partial_token), s, end - s);
+						return JSON_INCOMPLETE;
+					}
+
 					/*
 					 * We've got a real alphanumeric token here.  If it
 					 * happens to be true, false, or null, all is well.  If
@@ -732,7 +1644,10 @@ json_lex(JsonLexContext *lex)
 		}						/* end of switch */
 	}
 
-	return JSON_SUCCESS;
+	if (lex->incremental && lex->token_type == JSON_TOKEN_END && !lex->inc_state->is_last_chunk)
+		return JSON_INCOMPLETE;
+	else
+		return JSON_SUCCESS;
 }
 
 /*
@@ -754,8 +1669,14 @@ json_lex_string(JsonLexContext *lex)
 	int			hi_surrogate = -1;
 
 	/* Convenience macros for error exits */
-#define FAIL_AT_CHAR_START(code) \
+#define FAIL_OR_INCOMPLETE_AT_CHAR_START(code) \
 	do { \
+		if (lex->incremental && !lex->inc_state->is_last_chunk) \
+		{ \
+			appendBinaryStringInfo(&lex->inc_state->partial_token, \
+								   lex->token_start, end - lex->token_start); \
+			return JSON_INCOMPLETE; \
+		} \
 		lex->token_terminator = s; \
 		return code; \
 	} while (0)
@@ -776,7 +1697,7 @@ json_lex_string(JsonLexContext *lex)
 		s++;
 		/* Premature end of the string. */
 		if (s >= end)
-			FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
+			FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
 		else if (*s == '"')
 			break;
 		else if (*s == '\\')
@@ -784,7 +1705,7 @@ json_lex_string(JsonLexContext *lex)
 			/* OK, we have an escape character. */
 			s++;
 			if (s >= end)
-				FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
+				FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
 			else if (*s == 'u')
 			{
 				int			i;
@@ -794,7 +1715,7 @@ json_lex_string(JsonLexContext *lex)
 				{
 					s++;
 					if (s >= end)
-						FAIL_AT_CHAR_START(JSON_INVALID_TOKEN);
+						FAIL_OR_INCOMPLETE_AT_CHAR_START(JSON_INVALID_TOKEN);
 					else if (*s >= '0' && *s <= '9')
 						ch = (ch * 16) + (*s - '0');
 					else if (*s >= 'a' && *s <= 'f')
@@ -979,7 +1900,7 @@ json_lex_string(JsonLexContext *lex)
 	lex->token_terminator = s + 1;
 	return JSON_SUCCESS;
 
-#undef FAIL_AT_CHAR_START
+#undef FAIL_OR_INCOMPLETE_AT_CHAR_START
 #undef FAIL_AT_CHAR_END
 }
 
@@ -1088,7 +2009,14 @@ json_lex_number(JsonLexContext *lex, char *s,
 	if (total_len != NULL)
 		*total_len = len;
 
-	if (num_err != NULL)
+	if (lex->incremental && !lex->inc_state->is_last_chunk &&
+		len >= lex->input_length)
+	{
+		appendBinaryStringInfo(&lex->inc_state->partial_token,
+							   lex->token_start, s - lex->token_start);
+		return JSON_INCOMPLETE;
+	}
+	else if (num_err != NULL)
 	{
 		/* let the caller handle any error */
 		*num_err = error;
@@ -1174,9 +2102,17 @@ json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
 
 	switch (error)
 	{
+		case JSON_INCOMPLETE:
 		case JSON_SUCCESS:
 			/* fall through to the error code after switch */
 			break;
+		case JSON_INVALID_LEXER_TYPE:
+			if (lex->incremental)
+				return (_("Recursive descent parser cannot use incremental lexer"));
+			else
+				return (_("Incremental parser requires incremental lexer"));
+		case JSON_NESTING_TOO_DEEP:
+			return (_("JSON nested too deep, maximum permitted depth is 6400"));
 		case JSON_ESCAPING_INVALID:
 			token_error(lex, "Escape sequence \"\\%.*s\" is invalid.");
 			break;