diff options
author | Amit Langote <amitlan@postgresql.org> | 2024-04-04 19:57:08 +0900 |
---|---|---|
committer | Amit Langote <amitlan@postgresql.org> | 2024-04-04 20:20:15 +0900 |
commit | de3600452b61d1bc3967e9e37e86db8956c8f577 (patch) | |
tree | df9df5969dcc64b6b6a3e7b0903fda98a2fd513a /src/backend/parser/parse_jsontable.c | |
parent | a9d6c3868451a494641b498a15f9ee1c151949a7 (diff) | |
download | postgresql-de3600452b61d1bc3967e9e37e86db8956c8f577.tar.gz postgresql-de3600452b61d1bc3967e9e37e86db8956c8f577.zip |
Add basic JSON_TABLE() functionality
JSON_TABLE() allows JSON data to be converted into a relational view
and thus used, for example, in a FROM clause, like other tabular
data. Data to show in the view is selected from a source JSON object
using a JSON path expression to get a sequence of JSON objects that's
called a "row pattern", which becomes the source to compute the
SQL/JSON values that populate the view's output columns. Column
values themselves are computed using JSON path expressions applied to
each of the JSON objects comprising the "row pattern", for which the
SQL/JSON query functions added in 6185c9737cf4 are used.
To implement JSON_TABLE() as a table function, this augments the
TableFunc and TableFuncScanState nodes that are currently used to
support XMLTABLE() with some JSON_TABLE()-specific fields.
Note that the JSON_TABLE() spec includes NESTED COLUMNS and PLAN
clauses, which are required to provide more flexibility to extract
data out of nested JSON objects, but they are not implemented here
to keep this commit of manageable size.
Author: Nikita Glukhov <n.gluhov@postgrespro.ru>
Author: Teodor Sigaev <teodor@sigaev.ru>
Author: Oleg Bartunov <obartunov@gmail.com>
Author: Alexander Korotkov <aekorotkov@gmail.com>
Author: Andrew Dunstan <andrew@dunslane.net>
Author: Amit Langote <amitlangote09@gmail.com>
Author: Jian He <jian.universality@gmail.com>
Reviewers have included (in no particular order):
Andres Freund, Alexander Korotkov, Pavel Stehule, Andrew Alsup,
Erik Rijkers, Zihong Yu, Himanshu Upadhyaya, Daniel Gustafsson,
Justin Pryzby, Álvaro Herrera, Jian He
Discussion: https://postgr.es/m/cd0bb935-0158-78a7-08b5-904886deac4b@postgrespro.ru
Discussion: https://postgr.es/m/20220616233130.rparivafipt6doj3@alap3.anarazel.de
Discussion: https://postgr.es/m/abd9b83b-aa66-f230-3d6d-734817f0995d%40postgresql.org
Discussion: https://postgr.es/m/CA+HiwqE4XTdfb1nW=Ojoy_tQSRhYt-q_kb6i5d4xcKyrLC1Nbg@mail.gmail.com
Diffstat (limited to 'src/backend/parser/parse_jsontable.c')
-rw-r--r-- | src/backend/parser/parse_jsontable.c | 421 |
1 files changed, 421 insertions, 0 deletions
diff --git a/src/backend/parser/parse_jsontable.c b/src/backend/parser/parse_jsontable.c new file mode 100644 index 00000000000..060f62170e8 --- /dev/null +++ b/src/backend/parser/parse_jsontable.c @@ -0,0 +1,421 @@ +/*------------------------------------------------------------------------- + * + * parse_jsontable.c + * parsing of JSON_TABLE + * + * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/parser/parse_jsontable.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "catalog/pg_collation.h" +#include "catalog/pg_type.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/optimizer.h" +#include "parser/parse_clause.h" +#include "parser/parse_collate.h" +#include "parser/parse_expr.h" +#include "parser/parse_relation.h" +#include "parser/parse_type.h" +#include "utils/builtins.h" +#include "utils/json.h" +#include "utils/lsyscache.h" + +/* Context for transformJsonTableColumns() */ +typedef struct JsonTableParseContext +{ + ParseState *pstate; + JsonTable *jt; + TableFunc *tf; + List *pathNames; /* list of all path and columns names */ + int pathNameId; /* path name id counter */ +} JsonTableParseContext; + +static JsonTablePlan *transformJsonTableColumns(JsonTableParseContext *cxt, + List *columns, + List *passingArgs, + JsonTablePathSpec *pathspec); +static JsonFuncExpr *transformJsonTableColumn(JsonTableColumn *jtc, + Node *contextItemExpr, + List *passingArgs); +static bool isCompositeType(Oid typid); +static JsonTablePlan *makeJsonTablePathScan(JsonTablePathSpec *pathspec, + bool errorOnError); +static void CheckDuplicateColumnOrPathNames(JsonTableParseContext *cxt, + List *columns); +static bool LookupPathOrColumnName(JsonTableParseContext *cxt, char *name); +static char *generateJsonTablePathName(JsonTableParseContext *cxt); + +/* + * transformJsonTable - + * Transform a raw JsonTable into TableFunc + * + * Mainly, this transforms the JSON_TABLE() document-generating expression + * (jt->context_item) and the column-generating expressions (jt->columns) to + * populate TableFunc.docexpr and TableFunc.colvalexprs, respectively. Also, + * the PASSING values (jt->passing) are transformed and added into + * TableFunc.passvalexprs. + */ +ParseNamespaceItem * +transformJsonTable(ParseState *pstate, JsonTable *jt) +{ + TableFunc *tf; + JsonFuncExpr *jfe; + JsonExpr *je; + JsonTablePathSpec *rootPathSpec = jt->pathspec; + bool is_lateral; + JsonTableParseContext cxt = {pstate}; + + Assert(IsA(rootPathSpec->string, A_Const) && + castNode(A_Const, rootPathSpec->string)->val.node.type == T_String); + + if (jt->on_error && + jt->on_error->btype != JSON_BEHAVIOR_ERROR && + jt->on_error->btype != JSON_BEHAVIOR_EMPTY && + jt->on_error->btype != JSON_BEHAVIOR_EMPTY_ARRAY) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid ON ERROR behavior"), + errdetail("Only EMPTY or ERROR is allowed in the top-level ON ERROR clause."), + parser_errposition(pstate, jt->on_error->location)); + + cxt.pathNameId = 0; + if (rootPathSpec->name == NULL) + rootPathSpec->name = generateJsonTablePathName(&cxt); + cxt.pathNames = list_make1(rootPathSpec->name); + CheckDuplicateColumnOrPathNames(&cxt, jt->columns); + + /* + * We make lateral_only names of this level visible, whether or not the + * RangeTableFunc is explicitly marked LATERAL. This is needed for SQL + * spec compliance and seems useful on convenience grounds for all + * functions in FROM. + * + * (LATERAL can't nest within a single pstate level, so we don't need + * save/restore logic here.) + */ + Assert(!pstate->p_lateral_active); + pstate->p_lateral_active = true; + + tf = makeNode(TableFunc); + tf->functype = TFT_JSON_TABLE; + + /* + * Transform JsonFuncExpr representing the top JSON_TABLE context_item and + * pathspec into a dummy JSON_TABLE_OP JsonExpr. + */ + jfe = makeNode(JsonFuncExpr); + jfe->op = JSON_TABLE_OP; + jfe->context_item = jt->context_item; + jfe->pathspec = (Node *) rootPathSpec->string; + jfe->passing = jt->passing; + jfe->on_empty = NULL; + jfe->on_error = jt->on_error; + jfe->location = jt->location; + tf->docexpr = transformExpr(pstate, (Node *) jfe, EXPR_KIND_FROM_FUNCTION); + + /* + * Create a JsonTablePlan that will generate row pattern that becomes + * source data for JSON path expressions in jt->columns. This also adds + * the columns' transformed JsonExpr nodes into tf->colvalexprs. + */ + cxt.jt = jt; + cxt.tf = tf; + tf->plan = (Node *) transformJsonTableColumns(&cxt, jt->columns, + jt->passing, + rootPathSpec); + + /* + * Copy the transformed PASSING arguments into the TableFunc node, because + * they are evaluated separately from the JsonExpr that we just put in + * TableFunc.docexpr. JsonExpr.passing_values is still kept around for + * get_json_table(). + */ + je = (JsonExpr *) tf->docexpr; + tf->passingvalexprs = copyObject(je->passing_values); + + tf->ordinalitycol = -1; /* undefine ordinality column number */ + tf->location = jt->location; + + pstate->p_lateral_active = false; + + /* + * Mark the RTE as LATERAL if the user said LATERAL explicitly, or if + * there are any lateral cross-references in it. + */ + is_lateral = jt->lateral || contain_vars_of_level((Node *) tf, 0); + + return addRangeTableEntryForTableFunc(pstate, + tf, jt->alias, is_lateral, true); +} + +/* + * Check if a column / path name is duplicated in the given shared list of + * names. + */ +static void +CheckDuplicateColumnOrPathNames(JsonTableParseContext *cxt, + List *columns) +{ + ListCell *lc1; + + foreach(lc1, columns) + { + JsonTableColumn *jtc = castNode(JsonTableColumn, lfirst(lc1)); + + if (LookupPathOrColumnName(cxt, jtc->name)) + ereport(ERROR, + errcode(ERRCODE_DUPLICATE_ALIAS), + errmsg("duplicate JSON_TABLE column or path name: %s", + jtc->name), + parser_errposition(cxt->pstate, jtc->location)); + cxt->pathNames = lappend(cxt->pathNames, jtc->name); + } +} + +/* + * Lookup a column/path name in the given name list, returning true if already + * there. + */ +static bool +LookupPathOrColumnName(JsonTableParseContext *cxt, char *name) +{ + ListCell *lc; + + foreach(lc, cxt->pathNames) + { + if (strcmp(name, (const char *) lfirst(lc)) == 0) + return true; + } + + return false; +} + +/* Generate a new unique JSON_TABLE path name. */ +static char * +generateJsonTablePathName(JsonTableParseContext *cxt) +{ + char namebuf[32]; + char *name = namebuf; + + snprintf(namebuf, sizeof(namebuf), "json_table_path_%d", + cxt->pathNameId++); + + name = pstrdup(name); + cxt->pathNames = lappend(cxt->pathNames, name); + + return name; +} + +/* + * Create a JsonTablePlan that will supply the source row for 'columns' + * using 'pathspec' and append the columns' transformed JsonExpr nodes and + * their type/collation information to cxt->tf. + */ +static JsonTablePlan * +transformJsonTableColumns(JsonTableParseContext *cxt, List *columns, + List *passingArgs, + JsonTablePathSpec *pathspec) +{ + ParseState *pstate = cxt->pstate; + JsonTable *jt = cxt->jt; + TableFunc *tf = cxt->tf; + ListCell *col; + bool ordinality_found = false; + bool errorOnError = jt->on_error && + jt->on_error->btype == JSON_BEHAVIOR_ERROR; + Oid contextItemTypid = exprType(tf->docexpr); + + foreach(col, columns) + { + JsonTableColumn *rawc = castNode(JsonTableColumn, lfirst(col)); + Oid typid; + int32 typmod; + Oid typcoll = InvalidOid; + Node *colexpr; + + Assert(rawc->name); + tf->colnames = lappend(tf->colnames, + makeString(pstrdup(rawc->name))); + + /* + * Determine the type and typmod for the new column. FOR ORDINALITY + * columns are INTEGER by standard; the others are user-specified. + */ + switch (rawc->coltype) + { + case JTC_FOR_ORDINALITY: + if (ordinality_found) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("cannot use more than one FOR ORDINALITY column"), + parser_errposition(pstate, rawc->location))); + ordinality_found = true; + colexpr = NULL; + typid = INT4OID; + typmod = -1; + break; + + case JTC_REGULAR: + typenameTypeIdAndMod(pstate, rawc->typeName, &typid, &typmod); + + /* + * Use JTC_FORMATTED so as to use JSON_QUERY for this column + * if the specified type is one that's better handled using + * JSON_QUERY() or if non-default WRAPPER or QUOTES behavior + * is specified. + */ + if (isCompositeType(typid) || + rawc->quotes != JS_QUOTES_UNSPEC || + rawc->wrapper != JSW_UNSPEC) + rawc->coltype = JTC_FORMATTED; + + /* FALLTHROUGH */ + case JTC_FORMATTED: + case JTC_EXISTS: + { + JsonFuncExpr *jfe; + CaseTestExpr *param = makeNode(CaseTestExpr); + + param->collation = InvalidOid; + param->typeId = contextItemTypid; + param->typeMod = -1; + + jfe = transformJsonTableColumn(rawc, (Node *) param, + passingArgs); + + colexpr = transformExpr(pstate, (Node *) jfe, + EXPR_KIND_FROM_FUNCTION); + assign_expr_collations(pstate, colexpr); + + typid = exprType(colexpr); + typmod = exprTypmod(colexpr); + typcoll = exprCollation(colexpr); + break; + } + + default: + elog(ERROR, "unknown JSON_TABLE column type: %d", (int) rawc->coltype); + break; + } + + tf->coltypes = lappend_oid(tf->coltypes, typid); + tf->coltypmods = lappend_int(tf->coltypmods, typmod); + tf->colcollations = lappend_oid(tf->colcollations, typcoll); + tf->colvalexprs = lappend(tf->colvalexprs, colexpr); + } + + return makeJsonTablePathScan(pathspec, errorOnError); +} + +/* + * Check if the type is "composite" for the purpose of checking whether to use + * JSON_VALUE() or JSON_QUERY() for a given JsonTableColumn. + */ +static bool +isCompositeType(Oid typid) +{ + char typtype = get_typtype(typid); + + return typid == JSONOID || + typid == JSONBOID || + typid == RECORDOID || + type_is_array(typid) || + typtype == TYPTYPE_COMPOSITE || + /* domain over one of the above? */ + (typtype == TYPTYPE_DOMAIN && + isCompositeType(getBaseType(typid))); +} + +/* + * Transform JSON_TABLE column definition into a JsonFuncExpr + * This turns: + * - regular column into JSON_VALUE() + * - FORMAT JSON column into JSON_QUERY() + * - EXISTS column into JSON_EXISTS() + */ +static JsonFuncExpr * +transformJsonTableColumn(JsonTableColumn *jtc, Node *contextItemExpr, + List *passingArgs) +{ + Node *pathspec; + JsonFuncExpr *jfexpr = makeNode(JsonFuncExpr); + + /* + * XXX consider inventing JSON_TABLE_VALUE_OP, etc. and pass the column + * name via JsonExpr so that JsonPathValue(), etc. can provide error + * message tailored to JSON_TABLE(), such as by mentioning the column + * names in the message. + */ + if (jtc->coltype == JTC_REGULAR) + jfexpr->op = JSON_VALUE_OP; + else if (jtc->coltype == JTC_EXISTS) + jfexpr->op = JSON_EXISTS_OP; + else + jfexpr->op = JSON_QUERY_OP; + + jfexpr->context_item = makeJsonValueExpr((Expr *) contextItemExpr, NULL, + makeJsonFormat(JS_FORMAT_DEFAULT, + JS_ENC_DEFAULT, + -1)); + if (jtc->pathspec) + pathspec = (Node *) jtc->pathspec->string; + else + { + /* Construct default path as '$."column_name"' */ + StringInfoData path; + + initStringInfo(&path); + + appendStringInfoString(&path, "$."); + escape_json(&path, jtc->name); + + pathspec = makeStringConst(path.data, -1); + } + jfexpr->pathspec = pathspec; + jfexpr->passing = passingArgs; + jfexpr->output = makeNode(JsonOutput); + jfexpr->output->typeName = jtc->typeName; + jfexpr->output->returning = makeNode(JsonReturning); + jfexpr->output->returning->format = jtc->format; + jfexpr->on_empty = jtc->on_empty; + jfexpr->on_error = jtc->on_error; + jfexpr->quotes = jtc->quotes; + jfexpr->wrapper = jtc->wrapper; + jfexpr->location = jtc->location; + + return jfexpr; +} + +/* + * Create a JsonTablePlan for given path and ON ERROR behavior. + */ +static JsonTablePlan * +makeJsonTablePathScan(JsonTablePathSpec *pathspec, bool errorOnError) +{ + JsonTablePathScan *scan = makeNode(JsonTablePathScan); + char *pathstring; + Const *value; + + Assert(IsA(pathspec->string, A_Const)); + pathstring = castNode(A_Const, pathspec->string)->val.sval.sval; + value = makeConst(JSONPATHOID, -1, InvalidOid, -1, + DirectFunctionCall1(jsonpath_in, + CStringGetDatum(pathstring)), + false, false); + + scan->plan.type = T_JsonTablePathScan; + scan->path = makeJsonTablePath(value, pathspec->name); + scan->errorOnError = errorOnError; + + return (JsonTablePlan *) scan; +} |