src/include/common/jsonapi.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256

/*-------------------------------------------------------------------------
 *
 * jsonapi.h
 *	  Declarations for JSON API support.
 *
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * src/include/common/jsonapi.h
 *
 *-------------------------------------------------------------------------
 */

#ifndef JSONAPI_H
#define JSONAPI_H

typedef enum JsonTokenType
{
	JSON_TOKEN_INVALID,
	JSON_TOKEN_STRING,
	JSON_TOKEN_NUMBER,
	JSON_TOKEN_OBJECT_START,
	JSON_TOKEN_OBJECT_END,
	JSON_TOKEN_ARRAY_START,
	JSON_TOKEN_ARRAY_END,
	JSON_TOKEN_COMMA,
	JSON_TOKEN_COLON,
	JSON_TOKEN_TRUE,
	JSON_TOKEN_FALSE,
	JSON_TOKEN_NULL,
	JSON_TOKEN_END,
} JsonTokenType;

typedef enum JsonParseErrorType
{
	JSON_SUCCESS,
	JSON_INCOMPLETE,
	JSON_INVALID_LEXER_TYPE,
	JSON_NESTING_TOO_DEEP,
	JSON_ESCAPING_INVALID,
	JSON_ESCAPING_REQUIRED,
	JSON_EXPECTED_ARRAY_FIRST,
	JSON_EXPECTED_ARRAY_NEXT,
	JSON_EXPECTED_COLON,
	JSON_EXPECTED_END,
	JSON_EXPECTED_JSON,
	JSON_EXPECTED_MORE,
	JSON_EXPECTED_OBJECT_FIRST,
	JSON_EXPECTED_OBJECT_NEXT,
	JSON_EXPECTED_STRING,
	JSON_INVALID_TOKEN,
	JSON_OUT_OF_MEMORY,
	JSON_UNICODE_CODE_POINT_ZERO,
	JSON_UNICODE_ESCAPE_FORMAT,
	JSON_UNICODE_HIGH_ESCAPE,
	JSON_UNICODE_UNTRANSLATABLE,
	JSON_UNICODE_HIGH_SURROGATE,
	JSON_UNICODE_LOW_SURROGATE,
	JSON_SEM_ACTION_FAILED,		/* error should already be reported */
} JsonParseErrorType;

/* Parser state private to jsonapi.c */
typedef struct JsonParserStack JsonParserStack;
typedef struct JsonIncrementalState JsonIncrementalState;

/*
 * Don't depend on the internal type header for strval; if callers need access
 * then they can include the appropriate header themselves.
 */
#ifdef JSONAPI_USE_PQEXPBUFFER
#define jsonapi_StrValType PQExpBufferData
#else
#define jsonapi_StrValType StringInfoData
#endif

/*
 * All the fields in this structure should be treated as read-only.
 *
 * If strval is not null, then it should contain the de-escaped value
 * of the lexeme if it's a string. Otherwise most of these field names
 * should be self-explanatory.
 *
 * line_number and line_start are principally for use by the parser's
 * error reporting routines.
 * token_terminator and prev_token_terminator point to the character
 * AFTER the end of the token, i.e. where there would be a nul byte
 * if we were using nul-terminated strings.
 *
 * The prev_token_terminator field should not be used when incremental is
 * true, as the previous token might have started in a previous piece of input,
 * and thus it can't be used in any pointer arithmetic or other operations in
 * conjunction with token_start.
 *
 * JSONLEX_FREE_STRUCT/STRVAL are used to drive freeJsonLexContext.
 * JSONLEX_CTX_OWNS_TOKENS is used by setJsonLexContextOwnsTokens.
 */
#define JSONLEX_FREE_STRUCT			(1 << 0)
#define JSONLEX_FREE_STRVAL			(1 << 1)
#define JSONLEX_CTX_OWNS_TOKENS		(1 << 2)
typedef struct JsonLexContext
{
	const char *input;
	size_t		input_length;
	int			input_encoding;
	const char *token_start;
	const char *token_terminator;
	const char *prev_token_terminator;
	bool		incremental;
	JsonTokenType token_type;
	int			lex_level;
	bits32		flags;
	int			line_number;	/* line number, starting from 1 */
	const char *line_start;		/* where that line starts within input */
	JsonParserStack *pstack;
	JsonIncrementalState *inc_state;
	bool		need_escapes;
	struct jsonapi_StrValType *strval;	/* only used if need_escapes == true */
	struct jsonapi_StrValType *errormsg;
} JsonLexContext;

typedef JsonParseErrorType (*json_struct_action) (void *state);
typedef JsonParseErrorType (*json_ofield_action) (void *state, char *fname, bool isnull);
typedef JsonParseErrorType (*json_aelem_action) (void *state, bool isnull);
typedef JsonParseErrorType (*json_scalar_action) (void *state, char *token, JsonTokenType tokentype);


/*
 * Semantic Action structure for use in parsing json.
 *
 * Any of these actions can be NULL, in which case nothing is done at that
 * point, Likewise, semstate can be NULL. Using an all-NULL structure amounts
 * to doing a pure parse with no side-effects, and is therefore exactly
 * what the json input routines do.
 *
 * By default, the 'fname' and 'token' strings passed to these actions are
 * palloc'd.  They are not free'd or used further by the parser, so the action
 * function is free to do what it wishes with them. This behavior may be
 * modified by setJsonLexContextOwnsTokens().
 *
 * All action functions return JsonParseErrorType.  If the result isn't
 * JSON_SUCCESS, the parse is abandoned and that error code is returned.
 * If it is JSON_SEM_ACTION_FAILED, the action function is responsible
 * for having reported the error in some appropriate way.
 */
typedef struct JsonSemAction
{
	void	   *semstate;
	json_struct_action object_start;
	json_struct_action object_end;
	json_struct_action array_start;
	json_struct_action array_end;
	json_ofield_action object_field_start;
	json_ofield_action object_field_end;
	json_aelem_action array_element_start;
	json_aelem_action array_element_end;
	json_scalar_action scalar;
} JsonSemAction;

/*
 * pg_parse_json will parse the string in the lex calling the
 * action functions in sem at the appropriate points. It is
 * up to them to keep what state they need in semstate. If they
 * need access to the state of the lexer, then its pointer
 * should be passed to them as a member of whatever semstate
 * points to. If the action pointers are NULL the parser
 * does nothing and just continues.
 */
extern JsonParseErrorType pg_parse_json(JsonLexContext *lex,
										const JsonSemAction *sem);

extern JsonParseErrorType pg_parse_json_incremental(JsonLexContext *lex,
													const JsonSemAction *sem,
													const char *json,
													size_t len,
													bool is_last);

/* the null action object used for pure validation */
extern PGDLLIMPORT const JsonSemAction nullSemAction;

/*
 * json_count_array_elements performs a fast secondary parse to determine the
 * number of elements in passed array lex context. It should be called from an
 * array_start action.
 *
 * The return value indicates whether any error occurred, while the number
 * of elements is stored into *elements (but only if the return value is
 * JSON_SUCCESS).
 */
extern JsonParseErrorType json_count_array_elements(JsonLexContext *lex,
													int *elements);

/*
 * initializer for JsonLexContext.
 *
 * If a valid 'lex' pointer is given, it is initialized.  This can be used
 * for stack-allocated structs, saving overhead.  If NULL is given, a new
 * struct is allocated.
 *
 * If need_escapes is true, ->strval stores the unescaped lexemes.
 * Unescaping is expensive, so only request it when necessary.
 *
 * If need_escapes is true or lex was given as NULL, then the caller is
 * responsible for freeing the returned struct, either by calling
 * freeJsonLexContext() or (in backend environment) via memory context
 * cleanup.
 */
extern JsonLexContext *makeJsonLexContextCstringLen(JsonLexContext *lex,
													const char *json,
													size_t len,
													int encoding,
													bool need_escapes);

/*
 * make a JsonLexContext suitable for incremental parsing.
 * the string chunks will be handed to pg_parse_json_incremental,
 * so there's no need for them here.
 */
extern JsonLexContext *makeJsonLexContextIncremental(JsonLexContext *lex,
													 int encoding,
													 bool need_escapes);

/*
 * Sets whether tokens passed to semantic action callbacks are owned by the
 * context (in which case, the callback must duplicate the tokens for long-term
 * storage) or by the callback (in which case, the callback must explicitly
 * free tokens to avoid leaks).
 *
 * By default, this setting is false: the callback owns the tokens that are
 * passed to it (and if parsing fails between the two object-field callbacks,
 * the field name token will likely leak). If set to true, tokens will be freed
 * by the lexer after the callback completes.
 *
 * Setting this to true is important for long-lived clients (such as libpq)
 * that must not leak memory during a parse failure. For a server backend using
 * memory contexts, or a client application which will exit on parse failure,
 * this setting is less critical.
 */
extern void setJsonLexContextOwnsTokens(JsonLexContext *lex,
										bool owned_by_context);

extern void freeJsonLexContext(JsonLexContext *lex);

/* lex one token */
extern JsonParseErrorType json_lex(JsonLexContext *lex);

/* construct an error detail string for a json error */
extern char *json_errdetail(JsonParseErrorType error, JsonLexContext *lex);

/*
 * Utility function to check if a string is a valid JSON number.
 *
 * str argument does not need to be nul-terminated.
 */
extern bool IsValidJsonNumber(const char *str, size_t len);

#endif							/* JSONAPI_H */