aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/jsonb_gin.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/jsonb_gin.c')
-rw-r--r--src/backend/utils/adt/jsonb_gin.c646
1 files changed, 646 insertions, 0 deletions
diff --git a/src/backend/utils/adt/jsonb_gin.c b/src/backend/utils/adt/jsonb_gin.c
new file mode 100644
index 00000000000..4a6b8fd6888
--- /dev/null
+++ b/src/backend/utils/adt/jsonb_gin.c
@@ -0,0 +1,646 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonb_gin.c
+ * GIN support functions for jsonb
+ *
+ * Copyright (c) 2014, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/jsonb_gin.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gin.h"
+#include "access/skey.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_type.h"
+#include "utils/builtins.h"
+#include "utils/jsonb.h"
+
+typedef struct PathHashStack
+{
+ uint32 hash;
+ struct PathHashStack *parent;
+} PathHashStack;
+
+static text *make_text_key(const char *str, int len, char flag);
+static text *make_scalar_key(const JsonbValue * scalarVal, char flag);
+
+/*
+ *
+ * jsonb_ops GIN opclass support functions
+ *
+ */
+Datum
+gin_compare_jsonb(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ int32 result;
+ char *a1p,
+ *a2p;
+ int len1,
+ len2;
+
+ a1p = VARDATA_ANY(arg1);
+ a2p = VARDATA_ANY(arg2);
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ /* Compare text as bttextcmp does, but always using C collation */
+ result = varstr_cmp(a1p, len1, a2p, len2, C_COLLATION_OID);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_INT32(result);
+}
+
+Datum
+gin_extract_jsonb(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = (Jsonb *) PG_GETARG_JSONB(0);
+ int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
+ Datum *entries = NULL;
+ int total = 2 * JB_ROOT_COUNT(jb);
+ int i = 0,
+ r;
+ JsonbIterator *it;
+ JsonbValue v;
+
+ if (total == 0)
+ {
+ *nentries = 0;
+ PG_RETURN_POINTER(NULL);
+ }
+
+ entries = (Datum *) palloc(sizeof(Datum) * total);
+
+ it = JsonbIteratorInit(VARDATA(jb));
+
+ while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+ {
+ if (i >= total)
+ {
+ total *= 2;
+ entries = (Datum *) repalloc(entries, sizeof(Datum) * total);
+ }
+
+ /*
+ * Serialize keys and elements equivalently, but only when elements
+ * are Jsonb strings. Otherwise, serialize elements as values. Array
+ * elements are indexed as keys, for the benefit of
+ * JsonbExistsStrategyNumber. Our definition of existence does not
+ * allow for checking the existence of a non-jbvString element (just
+ * like the definition of the underlying operator), because the
+ * operator takes a text rhs argument (which is taken as a proxy for an
+ * equivalent Jsonb string).
+ *
+ * The way existence is represented does not preclude an alternative
+ * existence operator, that takes as its rhs value an arbitrarily
+ * internally-typed Jsonb. The only reason that isn't the case here is
+ * that the existence operator is only really intended to determine if
+ * an object has a certain key (object pair keys are of course
+ * invariably strings), which is extended to jsonb arrays. You could
+ * think of the default Jsonb definition of existence as being
+ * equivalent to a definition where all types of scalar array elements
+ * are keys that we can check the existence of, while just forbidding
+ * non-string notation. This inflexibility prevents the user from
+ * having to qualify that the rhs string is a raw scalar string (that
+ * is, naturally no internal string quoting in required for the text
+ * argument), and allows us to not set the reset flag for
+ * JsonbExistsStrategyNumber, since we know that keys are strings for
+ * both objects and arrays, and don't have to further account for type
+ * mismatch. Not having to set the reset flag makes it less than
+ * tempting to tighten up the definition of existence to preclude array
+ * elements entirely, which would arguably be a simpler alternative.
+ * In any case the infrastructure used to implement the existence
+ * operator could trivially support this hypothetical, slightly
+ * distinct definition of existence.
+ */
+ switch (r)
+ {
+ case WJB_KEY:
+ /* Serialize key separately, for existence strategies */
+ entries[i++] = PointerGetDatum(make_scalar_key(&v, JKEYELEM));
+ break;
+ case WJB_ELEM:
+ if (v.type == jbvString)
+ entries[i++] = PointerGetDatum(make_scalar_key(&v, JKEYELEM));
+ else
+ entries[i++] = PointerGetDatum(make_scalar_key(&v, JVAL));
+ break;
+ case WJB_VALUE:
+ entries[i++] = PointerGetDatum(make_scalar_key(&v, JVAL));
+ break;
+ default:
+ continue;
+ }
+ }
+
+ *nentries = i;
+
+ PG_RETURN_POINTER(entries);
+}
+
+Datum
+gin_extract_jsonb_query(PG_FUNCTION_ARGS)
+{
+ int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
+ StrategyNumber strategy = PG_GETARG_UINT16(2);
+ int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
+ Datum *entries;
+
+ if (strategy == JsonbContainsStrategyNumber)
+ {
+ /* Query is a jsonb, so just apply gin_extract_jsonb... */
+ entries = (Datum *)
+ DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb,
+ PG_GETARG_DATUM(0),
+ PointerGetDatum(nentries)));
+ /* ...although "contains {}" requires a full index scan */
+ if (entries == NULL)
+ *searchMode = GIN_SEARCH_MODE_ALL;
+ }
+ else if (strategy == JsonbExistsStrategyNumber)
+ {
+ text *query = PG_GETARG_TEXT_PP(0);
+ text *item;
+
+ *nentries = 1;
+ entries = (Datum *) palloc(sizeof(Datum));
+ item = make_text_key(VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query),
+ JKEYELEM);
+ entries[0] = PointerGetDatum(item);
+ }
+ else if (strategy == JsonbExistsAnyStrategyNumber ||
+ strategy == JsonbExistsAllStrategyNumber)
+ {
+ ArrayType *query = PG_GETARG_ARRAYTYPE_P(0);
+ Datum *key_datums;
+ bool *key_nulls;
+ int key_count;
+ int i,
+ j;
+ text *item;
+
+ deconstruct_array(query,
+ TEXTOID, -1, false, 'i',
+ &key_datums, &key_nulls, &key_count);
+
+ entries = (Datum *) palloc(sizeof(Datum) * key_count);
+
+ for (i = 0, j = 0; i < key_count; ++i)
+ {
+ /* Nulls in the array are ignored */
+ if (key_nulls[i])
+ continue;
+ item = make_text_key(VARDATA(key_datums[i]),
+ VARSIZE(key_datums[i]) - VARHDRSZ,
+ JKEYELEM);
+ entries[j++] = PointerGetDatum(item);
+ }
+
+ *nentries = j;
+ /* ExistsAll with no keys should match everything */
+ if (j == 0 && strategy == JsonbExistsAllStrategyNumber)
+ *searchMode = GIN_SEARCH_MODE_ALL;
+ }
+ else
+ {
+ elog(ERROR, "unrecognized strategy number: %d", strategy);
+ entries = NULL; /* keep compiler quiet */
+ }
+
+ PG_RETURN_POINTER(entries);
+}
+
+Datum
+gin_consistent_jsonb(PG_FUNCTION_ARGS)
+{
+ bool *check = (bool *) PG_GETARG_POINTER(0);
+ StrategyNumber strategy = PG_GETARG_UINT16(1);
+
+ /* Jsonb *query = PG_GETARG_JSONB(2); */
+ int32 nkeys = PG_GETARG_INT32(3);
+
+ /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
+ bool *recheck = (bool *) PG_GETARG_POINTER(5);
+ bool res = true;
+ int32 i;
+
+ if (strategy == JsonbContainsStrategyNumber)
+ {
+ /*
+ * Index doesn't have information about correspondence of Jsonb keys
+ * and values (as distinct from GIN keys, which a key/value pair is
+ * stored as), so invariably we recheck. Besides, there are some
+ * special rules around the containment of raw scalar arrays and
+ * regular arrays that are not represented here. However, if all of
+ * the keys are not present, that's sufficient reason to return false
+ * and finish immediately.
+ */
+ *recheck = true;
+ for (i = 0; i < nkeys; i++)
+ {
+ if (!check[i])
+ {
+ res = false;
+ break;
+ }
+ }
+ }
+ else if (strategy == JsonbExistsStrategyNumber)
+ {
+ /* Existence of key guaranteed in default search mode */
+ *recheck = false;
+ res = true;
+ }
+ else if (strategy == JsonbExistsAnyStrategyNumber)
+ {
+ /* Existence of key guaranteed in default search mode */
+ *recheck = false;
+ res = true;
+ }
+ else if (strategy == JsonbExistsAllStrategyNumber)
+ {
+ /* Testing for the presence of all keys gives an exact result */
+ *recheck = false;
+ for (i = 0; i < nkeys; i++)
+ {
+ if (!check[i])
+ {
+ res = false;
+ break;
+ }
+ }
+ }
+ else
+ elog(ERROR, "unrecognized strategy number: %d", strategy);
+
+ PG_RETURN_BOOL(res);
+}
+
+Datum
+gin_triconsistent_jsonb(PG_FUNCTION_ARGS)
+{
+ GinLogicValue *check = (GinLogicValue *) PG_GETARG_POINTER(0);
+ StrategyNumber strategy = PG_GETARG_UINT16(1);
+ /* Jsonb *query = PG_GETARG_JSONB(2); */
+ int32 nkeys = PG_GETARG_INT32(3);
+ /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
+ GinLogicValue res = GIN_TRUE;
+
+ int32 i;
+
+ if (strategy == JsonbContainsStrategyNumber)
+ {
+ bool has_maybe = false;
+
+ /*
+ * All extracted keys must be present. Combination of GIN_MAYBE and
+ * GIN_TRUE gives GIN_MAYBE result because then all keys may be
+ * present.
+ */
+ for (i = 0; i < nkeys; i++)
+ {
+ if (check[i] == GIN_FALSE)
+ {
+ res = GIN_FALSE;
+ break;
+ }
+ if (check[i] == GIN_MAYBE)
+ {
+ res = GIN_MAYBE;
+ has_maybe = true;
+ }
+ }
+
+ /*
+ * Index doesn't have information about correspondence of Jsonb keys
+ * and values (as distinct from GIN keys, which a key/value pair is
+ * stored as), so invariably we recheck. This is also reflected in how
+ * GIN_MAYBE is given in response to there being no GIN_MAYBE input.
+ */
+ if (!has_maybe && res == GIN_TRUE)
+ res = GIN_MAYBE;
+ }
+ else if (strategy == JsonbExistsStrategyNumber ||
+ strategy == JsonbExistsAnyStrategyNumber)
+ {
+ /* Existence of key guaranteed in default search mode */
+ res = GIN_FALSE;
+ for (i = 0; i < nkeys; i++)
+ {
+ if (check[i] == GIN_TRUE)
+ {
+ res = GIN_TRUE;
+ break;
+ }
+ if (check[i] == GIN_MAYBE)
+ {
+ res = GIN_MAYBE;
+ }
+ }
+ }
+ else if (strategy == JsonbExistsAllStrategyNumber)
+ {
+ /* Testing for the presence of all keys gives an exact result */
+ for (i = 0; i < nkeys; i++)
+ {
+ if (check[i] == GIN_FALSE)
+ {
+ res = GIN_FALSE;
+ break;
+ }
+ if (check[i] == GIN_MAYBE)
+ {
+ res = GIN_MAYBE;
+ }
+ }
+ }
+ else
+ elog(ERROR, "unrecognized strategy number: %d", strategy);
+
+ PG_RETURN_GIN_LOGIC_VALUE(res);
+}
+
+/*
+ *
+ * jsonb_hash_ops GIN opclass support functions
+ *
+ */
+Datum
+gin_consistent_jsonb_hash(PG_FUNCTION_ARGS)
+{
+ bool *check = (bool *) PG_GETARG_POINTER(0);
+ StrategyNumber strategy = PG_GETARG_UINT16(1);
+ /* Jsonb *query = PG_GETARG_JSONB(2); */
+ int32 nkeys = PG_GETARG_INT32(3);
+ /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
+ bool *recheck = (bool *) PG_GETARG_POINTER(5);
+ bool res = true;
+ int32 i;
+
+ if (strategy != JsonbContainsStrategyNumber)
+ elog(ERROR, "unrecognized strategy number: %d", strategy);
+
+ /*
+ * jsonb_hash_ops index doesn't have information about correspondence
+ * of Jsonb keys and values (as distinct from GIN keys, which a
+ * key/value pair is stored as), so invariably we recheck. Besides,
+ * there are some special rules around the containment of raw scalar
+ * arrays and regular arrays that are not represented here. However,
+ * if all of the keys are not present, that's sufficient reason to
+ * return false and finish immediately.
+ */
+ *recheck = true;
+ for (i = 0; i < nkeys; i++)
+ {
+ if (!check[i])
+ {
+ res = false;
+ break;
+ }
+ }
+
+ PG_RETURN_BOOL(res);
+}
+
+Datum
+gin_triconsistent_jsonb_hash(PG_FUNCTION_ARGS)
+{
+ GinLogicValue *check = (GinLogicValue *) PG_GETARG_POINTER(0);
+ StrategyNumber strategy = PG_GETARG_UINT16(1);
+ /* Jsonb *query = PG_GETARG_JSONB(2); */
+ int32 nkeys = PG_GETARG_INT32(3);
+ /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
+ GinLogicValue res = GIN_TRUE;
+ int32 i;
+ bool has_maybe = false;
+
+ if (strategy != JsonbContainsStrategyNumber)
+ elog(ERROR, "unrecognized strategy number: %d", strategy);
+
+ /*
+ * All extracted keys must be present. A combination of GIN_MAYBE and
+ * GIN_TRUE induces a GIN_MAYBE result, because then all keys may be
+ * present.
+ */
+ for (i = 0; i < nkeys; i++)
+ {
+ if (check[i] == GIN_FALSE)
+ {
+ res = GIN_FALSE;
+ break;
+ }
+ if (check[i] == GIN_MAYBE)
+ {
+ res = GIN_MAYBE;
+ has_maybe = true;
+ }
+ }
+
+ /*
+ * jsonb_hash_ops index doesn't have information about correspondence of
+ * Jsonb keys and values (as distinct from GIN keys, which for this opclass
+ * are a hash of a pair, or a hash of just an element), so invariably we
+ * recheck. This is also reflected in how GIN_MAYBE is given in response
+ * to there being no GIN_MAYBE input.
+ */
+ if (!has_maybe && res == GIN_TRUE)
+ res = GIN_MAYBE;
+
+ PG_RETURN_GIN_LOGIC_VALUE(res);
+}
+
+Datum
+gin_extract_jsonb_hash(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB(0);
+ int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
+ int total = 2 * JB_ROOT_COUNT(jb);
+ JsonbIterator *it;
+ JsonbValue v;
+ PathHashStack tail;
+ PathHashStack *stack;
+ int i = 0,
+ r;
+ Datum *entries = NULL;
+
+ if (total == 0)
+ {
+ *nentries = 0;
+ PG_RETURN_POINTER(NULL);
+ }
+
+ entries = (Datum *) palloc(sizeof(Datum) * total);
+
+ it = JsonbIteratorInit(VARDATA(jb));
+
+ tail.parent = NULL;
+ tail.hash = 0;
+ stack = &tail;
+
+ while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+ {
+ PathHashStack *tmp;
+
+ if (i >= total)
+ {
+ total *= 2;
+ entries = (Datum *) repalloc(entries, sizeof(Datum) * total);
+ }
+
+ switch (r)
+ {
+ case WJB_BEGIN_ARRAY:
+ case WJB_BEGIN_OBJECT:
+ tmp = stack;
+ stack = (PathHashStack *) palloc(sizeof(PathHashStack));
+
+ /*
+ * Nesting an array within another array will not alter
+ * innermost scalar element hash values, but that seems
+ * inconsequential
+ */
+ if (tmp->parent)
+ {
+ /*
+ * We pass forward hashes from previous container nesting
+ * levels so that nested arrays with an outermost nested
+ * object will have element hashes mixed with the outermost
+ * key. It's also somewhat useful to have nested objects
+ * innermost values have hashes that are a function of not
+ * just their own key, but outer keys too.
+ */
+ stack->hash = tmp->hash;
+ }
+ else
+ {
+ /*
+ * At least nested level, initialize with stable container
+ * type proxy value
+ */
+ stack->hash = (r == WJB_BEGIN_ARRAY)? JB_FARRAY:JB_FOBJECT;
+ }
+ stack->parent = tmp;
+ break;
+ case WJB_KEY:
+ /* Initialize hash from parent */
+ stack->hash = stack->parent->hash;
+ JsonbHashScalarValue(&v, &stack->hash);
+ break;
+ case WJB_ELEM:
+ /* Elements have parent hash mixed in separately */
+ stack->hash = stack->parent->hash;
+ case WJB_VALUE:
+ /* Element/value case */
+ JsonbHashScalarValue(&v, &stack->hash);
+ entries[i++] = stack->hash;
+ break;
+ case WJB_END_ARRAY:
+ case WJB_END_OBJECT:
+ /* Pop the stack */
+ tmp = stack->parent;
+ pfree(stack);
+ stack = tmp;
+ break;
+ default:
+ elog(ERROR, "invalid JsonbIteratorNext rc: %d", r);
+ }
+ }
+
+ *nentries = i;
+
+ PG_RETURN_POINTER(entries);
+}
+
+Datum
+gin_extract_jsonb_query_hash(PG_FUNCTION_ARGS)
+{
+ int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
+ StrategyNumber strategy = PG_GETARG_UINT16(2);
+ int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
+ Datum *entries;
+
+ if (strategy != JsonbContainsStrategyNumber)
+ elog(ERROR, "unrecognized strategy number: %d", strategy);
+
+ /* Query is a jsonb, so just apply gin_extract_jsonb... */
+ entries = (Datum *)
+ DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_hash,
+ PG_GETARG_DATUM(0),
+ PointerGetDatum(nentries)));
+
+ /* ...although "contains {}" requires a full index scan */
+ if (entries == NULL)
+ *searchMode = GIN_SEARCH_MODE_ALL;
+
+ PG_RETURN_POINTER(entries);
+}
+
+/*
+ * Build a text value from a cstring and flag suitable for storage as a key
+ * value
+ */
+static text *
+make_text_key(const char *str, int len, char flag)
+{
+ text *item;
+
+ item = (text *) palloc(VARHDRSZ + len + 1);
+ SET_VARSIZE(item, VARHDRSZ + len + 1);
+
+ *VARDATA(item) = flag;
+
+ memcpy(VARDATA(item) + 1, str, len);
+
+ return item;
+}
+
+/*
+ * Create a textual representation of a jsonbValue for GIN storage.
+ */
+static text *
+make_scalar_key(const JsonbValue * scalarVal, char flag)
+{
+ text *item;
+ char *cstr;
+
+ switch (scalarVal->type)
+ {
+ case jbvNull:
+ item = make_text_key("n", 1, flag);
+ break;
+ case jbvBool:
+ item = make_text_key(scalarVal->boolean ? "t" : "f", 1, flag);
+ break;
+ case jbvNumeric:
+ /*
+ * A normalized textual representation, free of trailing zeroes is
+ * is required.
+ *
+ * It isn't ideal that numerics are stored in a relatively bulky
+ * textual format. However, it's a notationally convenient way of
+ * storing a "union" type in the GIN B-Tree, and indexing Jsonb
+ * strings takes precedence.
+ */
+ cstr = numeric_normalize(scalarVal->numeric);
+ item = make_text_key(cstr, strlen(cstr), flag);
+ pfree(cstr);
+ break;
+ case jbvString:
+ item = make_text_key(scalarVal->string.val, scalarVal->string.len,
+ flag);
+ break;
+ default:
+ elog(ERROR, "invalid jsonb scalar type");
+ }
+
+ return item;
+}