aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/utils/adt/jsonb_gin.c461
-rw-r--r--src/include/catalog/catversion.h2
-rw-r--r--src/include/utils/jsonb.h52
-rw-r--r--src/test/regress/data/jsonb.data3
-rw-r--r--src/test/regress/expected/jsonb.out28
-rw-r--r--src/test/regress/expected/jsonb_1.out28
-rw-r--r--src/test/regress/sql/jsonb.sql2
7 files changed, 299 insertions, 277 deletions
diff --git a/src/backend/utils/adt/jsonb_gin.c b/src/backend/utils/adt/jsonb_gin.c
index 592036ac585..57a0b2c8a3c 100644
--- a/src/backend/utils/adt/jsonb_gin.c
+++ b/src/backend/utils/adt/jsonb_gin.c
@@ -14,6 +14,7 @@
#include "postgres.h"
#include "access/gin.h"
+#include "access/hash.h"
#include "access/skey.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_type.h"
@@ -26,14 +27,15 @@ typedef struct PathHashStack
struct PathHashStack *parent;
} PathHashStack;
-static text *make_text_key(const char *str, int len, char flag);
-static text *make_scalar_key(const JsonbValue *scalarVal, char flag);
+static Datum make_text_key(char flag, const char *str, int len);
+static Datum make_scalar_key(const JsonbValue *scalarVal, bool is_key);
/*
*
* jsonb_ops GIN opclass support functions
*
*/
+
Datum
gin_compare_jsonb(PG_FUNCTION_ARGS)
{
@@ -65,80 +67,49 @@ gin_extract_jsonb(PG_FUNCTION_ARGS)
{
Jsonb *jb = (Jsonb *) PG_GETARG_JSONB(0);
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
- Datum *entries = NULL;
int total = 2 * JB_ROOT_COUNT(jb);
- int i = 0,
- r;
JsonbIterator *it;
JsonbValue v;
+ int i = 0,
+ r;
+ Datum *entries;
+ /* If the root level is empty, we certainly have no keys */
if (total == 0)
{
*nentries = 0;
PG_RETURN_POINTER(NULL);
}
+ /* Otherwise, use 2 * root count as initial estimate of result size */
entries = (Datum *) palloc(sizeof(Datum) * total);
it = JsonbIteratorInit(&jb->root);
while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
{
+ /* Since we recurse into the object, we might need more space */
if (i >= total)
{
total *= 2;
entries = (Datum *) repalloc(entries, sizeof(Datum) * total);
}
- /*
- * Serialize keys and elements equivalently, but only when elements
- * are Jsonb strings. Otherwise, serialize elements as values. Array
- * elements are indexed as keys, for the benefit of
- * JsonbExistsStrategyNumber. Our definition of existence does not
- * allow for checking the existence of a non-jbvString element (just
- * like the definition of the underlying operator), because the
- * operator takes a text rhs argument (which is taken as a proxy for
- * an equivalent Jsonb string).
- *
- * The way existence is represented does not preclude an alternative
- * existence operator, that takes as its rhs value an arbitrarily
- * internally-typed Jsonb. The only reason that isn't the case here
- * is that the existence operator is only really intended to determine
- * if an object has a certain key (object pair keys are of course
- * invariably strings), which is extended to jsonb arrays. You could
- * think of the default Jsonb definition of existence as being
- * equivalent to a definition where all types of scalar array elements
- * are keys that we can check the existence of, while just forbidding
- * non-string notation. This inflexibility prevents the user from
- * having to qualify that the rhs string is a raw scalar string (that
- * is, naturally no internal string quoting in required for the text
- * argument), and allows us to not set the reset flag for
- * JsonbExistsStrategyNumber, since we know that keys are strings for
- * both objects and arrays, and don't have to further account for type
- * mismatch. Not having to set the reset flag makes it less than
- * tempting to tighten up the definition of existence to preclude
- * array elements entirely, which would arguably be a simpler
- * alternative. In any case the infrastructure used to implement the
- * existence operator could trivially support this hypothetical,
- * slightly distinct definition of existence.
- */
switch (r)
{
case WJB_KEY:
- /* Serialize key separately, for existence strategies */
- entries[i++] = PointerGetDatum(make_scalar_key(&v, JKEYELEM));
+ entries[i++] = make_scalar_key(&v, true);
break;
case WJB_ELEM:
- if (v.type == jbvString)
- entries[i++] = PointerGetDatum(make_scalar_key(&v, JKEYELEM));
- else
- entries[i++] = PointerGetDatum(make_scalar_key(&v, JVAL));
+ /* Pretend string array elements are keys, see jsonb.h */
+ entries[i++] = make_scalar_key(&v, (v.type == jbvString));
break;
case WJB_VALUE:
- entries[i++] = PointerGetDatum(make_scalar_key(&v, JVAL));
+ entries[i++] = make_scalar_key(&v, false);
break;
default:
- continue;
+ /* we can ignore structural items */
+ break;
}
}
@@ -163,30 +134,30 @@ gin_extract_jsonb_query(PG_FUNCTION_ARGS)
PG_GETARG_DATUM(0),
PointerGetDatum(nentries)));
/* ...although "contains {}" requires a full index scan */
- if (entries == NULL)
+ if (*nentries == 0)
*searchMode = GIN_SEARCH_MODE_ALL;
}
else if (strategy == JsonbExistsStrategyNumber)
{
+ /* Query is a text string, which we treat as a key */
text *query = PG_GETARG_TEXT_PP(0);
- text *item;
*nentries = 1;
entries = (Datum *) palloc(sizeof(Datum));
- item = make_text_key(VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query),
- JKEYELEM);
- entries[0] = PointerGetDatum(item);
+ entries[0] = make_text_key(JGINFLAG_KEY,
+ VARDATA_ANY(query),
+ VARSIZE_ANY_EXHDR(query));
}
else if (strategy == JsonbExistsAnyStrategyNumber ||
strategy == JsonbExistsAllStrategyNumber)
{
+ /* Query is a text array; each element is treated as a key */
ArrayType *query = PG_GETARG_ARRAYTYPE_P(0);
Datum *key_datums;
bool *key_nulls;
int key_count;
int i,
j;
- text *item;
deconstruct_array(query,
TEXTOID, -1, false, 'i',
@@ -194,15 +165,14 @@ gin_extract_jsonb_query(PG_FUNCTION_ARGS)
entries = (Datum *) palloc(sizeof(Datum) * key_count);
- for (i = 0, j = 0; i < key_count; ++i)
+ for (i = 0, j = 0; i < key_count; i++)
{
/* Nulls in the array are ignored */
if (key_nulls[i])
continue;
- item = make_text_key(VARDATA(key_datums[i]),
- VARSIZE(key_datums[i]) - VARHDRSZ,
- JKEYELEM);
- entries[j++] = PointerGetDatum(item);
+ entries[j++] = make_text_key(JGINFLAG_KEY,
+ VARDATA_ANY(key_datums[i]),
+ VARSIZE_ANY_EXHDR(key_datums[i]));
}
*nentries = j;
@@ -236,13 +206,12 @@ gin_consistent_jsonb(PG_FUNCTION_ARGS)
if (strategy == JsonbContainsStrategyNumber)
{
/*
- * Index doesn't have information about correspondence of Jsonb keys
- * and values (as distinct from GIN keys, which a key/value pair is
- * stored as), so invariably we recheck. Besides, there are some
- * special rules around the containment of raw scalar arrays and
- * regular arrays that are not represented here. However, if all of
- * the keys are not present, that's sufficient reason to return false
- * and finish immediately.
+ * We must always recheck, since we can't tell from the index whether
+ * the positions of the matched items match the structure of the query
+ * object. (Even if we could, we'd also have to worry about hashed
+ * keys and the index's failure to distinguish keys from string array
+ * elements.) However, the tuple certainly doesn't match unless it
+ * contains all the query keys.
*/
*recheck = true;
for (i = 0; i < nkeys; i++)
@@ -256,20 +225,27 @@ gin_consistent_jsonb(PG_FUNCTION_ARGS)
}
else if (strategy == JsonbExistsStrategyNumber)
{
- /* Existence of key guaranteed in default search mode */
- *recheck = false;
+ /*
+ * Although the key is certainly present in the index, we must recheck
+ * because (1) the key might be hashed, and (2) the index match might
+ * be for a key that's not at top level of the JSON object. For (1),
+ * we could look at the query key to see if it's hashed and not
+ * recheck if not, but the index lacks enough info to tell about (2).
+ */
+ *recheck = true;
res = true;
}
else if (strategy == JsonbExistsAnyStrategyNumber)
{
- /* Existence of key guaranteed in default search mode */
- *recheck = false;
+ /* As for plain exists, we must recheck */
+ *recheck = true;
res = true;
}
else if (strategy == JsonbExistsAllStrategyNumber)
{
- /* Testing for the presence of all keys gives an exact result */
- *recheck = false;
+ /* As for plain exists, we must recheck */
+ *recheck = true;
+ /* ... but unless all the keys are present, we can say "false" */
for (i = 0; i < nkeys; i++)
{
if (!check[i])
@@ -295,19 +271,18 @@ gin_triconsistent_jsonb(PG_FUNCTION_ARGS)
int32 nkeys = PG_GETARG_INT32(3);
/* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
- GinTernaryValue res = GIN_TRUE;
-
+ GinTernaryValue res = GIN_MAYBE;
int32 i;
- if (strategy == JsonbContainsStrategyNumber)
+ /*
+ * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this
+ * corresponds to always forcing recheck in the regular consistent
+ * function, for the reasons listed there.
+ */
+ if (strategy == JsonbContainsStrategyNumber ||
+ strategy == JsonbExistsAllStrategyNumber)
{
- bool has_maybe = false;
-
- /*
- * All extracted keys must be present. Combination of GIN_MAYBE and
- * GIN_TRUE gives GIN_MAYBE result because then all keys may be
- * present.
- */
+ /* All extracted keys must be present */
for (i = 0; i < nkeys; i++)
{
if (check[i] == GIN_FALSE)
@@ -315,55 +290,21 @@ gin_triconsistent_jsonb(PG_FUNCTION_ARGS)
res = GIN_FALSE;
break;
}
- if (check[i] == GIN_MAYBE)
- {
- res = GIN_MAYBE;
- has_maybe = true;
- }
}
-
- /*
- * Index doesn't have information about correspondence of Jsonb keys
- * and values (as distinct from GIN keys, which a key/value pair is
- * stored as), so invariably we recheck. This is also reflected in
- * how GIN_MAYBE is given in response to there being no GIN_MAYBE
- * input.
- */
- if (!has_maybe && res == GIN_TRUE)
- res = GIN_MAYBE;
}
else if (strategy == JsonbExistsStrategyNumber ||
strategy == JsonbExistsAnyStrategyNumber)
{
- /* Existence of key guaranteed in default search mode */
+ /* At least one extracted key must be present */
res = GIN_FALSE;
for (i = 0; i < nkeys; i++)
{
- if (check[i] == GIN_TRUE)
- {
- res = GIN_TRUE;
- break;
- }
- if (check[i] == GIN_MAYBE)
+ if (check[i] == GIN_TRUE ||
+ check[i] == GIN_MAYBE)
{
res = GIN_MAYBE;
- }
- }
- }
- else if (strategy == JsonbExistsAllStrategyNumber)
- {
- /* Testing for the presence of all keys gives an exact result */
- for (i = 0; i < nkeys; i++)
- {
- if (check[i] == GIN_FALSE)
- {
- res = GIN_FALSE;
break;
}
- if (check[i] == GIN_MAYBE)
- {
- res = GIN_MAYBE;
- }
}
}
else
@@ -376,94 +317,13 @@ gin_triconsistent_jsonb(PG_FUNCTION_ARGS)
*
* jsonb_hash_ops GIN opclass support functions
*
+ * In a jsonb_hash_ops index, the GIN keys are uint32 hashes, one per JSON
+ * value; but the JSON key(s) leading to each value are also included in its
+ * hash computation. This means we can only support containment queries,
+ * but the index can distinguish, for example, {"foo": 42} from {"bar": 42}
+ * since different hashes will be generated.
+ *
*/
-Datum
-gin_consistent_jsonb_hash(PG_FUNCTION_ARGS)
-{
- bool *check = (bool *) PG_GETARG_POINTER(0);
- StrategyNumber strategy = PG_GETARG_UINT16(1);
-
- /* Jsonb *query = PG_GETARG_JSONB(2); */
- int32 nkeys = PG_GETARG_INT32(3);
-
- /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
- bool *recheck = (bool *) PG_GETARG_POINTER(5);
- bool res = true;
- int32 i;
-
- if (strategy != JsonbContainsStrategyNumber)
- elog(ERROR, "unrecognized strategy number: %d", strategy);
-
- /*
- * jsonb_hash_ops index doesn't have information about correspondence of
- * Jsonb keys and values (as distinct from GIN keys, which a key/value
- * pair is stored as), so invariably we recheck. Besides, there are some
- * special rules around the containment of raw scalar arrays and regular
- * arrays that are not represented here. However, if all of the keys are
- * not present, that's sufficient reason to return false and finish
- * immediately.
- */
- *recheck = true;
- for (i = 0; i < nkeys; i++)
- {
- if (!check[i])
- {
- res = false;
- break;
- }
- }
-
- PG_RETURN_BOOL(res);
-}
-
-Datum
-gin_triconsistent_jsonb_hash(PG_FUNCTION_ARGS)
-{
- GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
- StrategyNumber strategy = PG_GETARG_UINT16(1);
-
- /* Jsonb *query = PG_GETARG_JSONB(2); */
- int32 nkeys = PG_GETARG_INT32(3);
-
- /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
- GinTernaryValue res = GIN_TRUE;
- int32 i;
- bool has_maybe = false;
-
- if (strategy != JsonbContainsStrategyNumber)
- elog(ERROR, "unrecognized strategy number: %d", strategy);
-
- /*
- * All extracted keys must be present. A combination of GIN_MAYBE and
- * GIN_TRUE induces a GIN_MAYBE result, because then all keys may be
- * present.
- */
- for (i = 0; i < nkeys; i++)
- {
- if (check[i] == GIN_FALSE)
- {
- res = GIN_FALSE;
- break;
- }
- if (check[i] == GIN_MAYBE)
- {
- res = GIN_MAYBE;
- has_maybe = true;
- }
- }
-
- /*
- * jsonb_hash_ops index doesn't have information about correspondence of
- * Jsonb keys and values (as distinct from GIN keys, which for this
- * opclass are a hash of a pair, or a hash of just an element), so
- * invariably we recheck. This is also reflected in how GIN_MAYBE is
- * given in response to there being no GIN_MAYBE input.
- */
- if (!has_maybe && res == GIN_TRUE)
- res = GIN_MAYBE;
-
- PG_RETURN_GIN_TERNARY_VALUE(res);
-}
Datum
gin_extract_jsonb_hash(PG_FUNCTION_ARGS)
@@ -477,26 +337,30 @@ gin_extract_jsonb_hash(PG_FUNCTION_ARGS)
PathHashStack *stack;
int i = 0,
r;
- Datum *entries = NULL;
+ Datum *entries;
+ /* If the root level is empty, we certainly have no keys */
if (total == 0)
{
*nentries = 0;
PG_RETURN_POINTER(NULL);
}
+ /* Otherwise, use 2 * root count as initial estimate of result size */
entries = (Datum *) palloc(sizeof(Datum) * total);
- it = JsonbIteratorInit(&jb->root);
-
+ /* We keep a stack of hashes corresponding to parent key levels */
tail.parent = NULL;
tail.hash = 0;
stack = &tail;
+ it = JsonbIteratorInit(&jb->root);
+
while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
{
- PathHashStack *tmp;
+ PathHashStack *parent;
+ /* Since we recurse into the object, we might need more space */
if (i >= total)
{
total *= 2;
@@ -507,55 +371,62 @@ gin_extract_jsonb_hash(PG_FUNCTION_ARGS)
{
case WJB_BEGIN_ARRAY:
case WJB_BEGIN_OBJECT:
- tmp = stack;
+ /* Push a stack level for this object */
+ parent = stack;
stack = (PathHashStack *) palloc(sizeof(PathHashStack));
- /*
- * Nesting an array within another array will not alter
- * innermost scalar element hash values, but that seems
- * inconsequential
- */
- if (tmp->parent)
+ if (parent->parent)
{
/*
* We pass forward hashes from previous container nesting
* levels so that nested arrays with an outermost nested
* object will have element hashes mixed with the
* outermost key. It's also somewhat useful to have
- * nested objects innermost values have hashes that are a
+ * nested objects' innermost values have hashes that are a
* function of not just their own key, but outer keys too.
+ *
+ * Nesting an array within another array will not alter
+ * innermost scalar element hash values, but that seems
+ * inconsequential.
*/
- stack->hash = tmp->hash;
+ stack->hash = parent->hash;
}
else
{
/*
- * At least nested level, initialize with stable container
- * type proxy value
+ * At the outermost level, initialize hash with container
+ * type proxy value. Note that this makes JB_FARRAY and
+ * JB_FOBJECT part of the on-disk representation, but they
+ * are that in the base jsonb object storage already.
*/
stack->hash = (r == WJB_BEGIN_ARRAY) ? JB_FARRAY : JB_FOBJECT;
}
- stack->parent = tmp;
+ stack->parent = parent;
break;
case WJB_KEY:
- /* Initialize hash from parent */
+ /* initialize hash from parent */
stack->hash = stack->parent->hash;
+ /* and mix in this key */
JsonbHashScalarValue(&v, &stack->hash);
+ /* hash is now ready to incorporate the value */
break;
case WJB_ELEM:
- /* Elements have parent hash mixed in separately */
+ /* array elements use parent hash mixed with element's hash */
stack->hash = stack->parent->hash;
+ /* FALL THRU */
case WJB_VALUE:
- /* Element/value case */
+ /* mix the element or value's hash into the prepared hash */
JsonbHashScalarValue(&v, &stack->hash);
+ /* and emit an index entry */
entries[i++] = UInt32GetDatum(stack->hash);
+ /* Note: we assume we'll see KEY before another VALUE */
break;
case WJB_END_ARRAY:
case WJB_END_OBJECT:
/* Pop the stack */
- tmp = stack->parent;
+ parent = stack->parent;
pfree(stack);
- stack = tmp;
+ stack = parent;
break;
default:
elog(ERROR, "invalid JsonbIteratorNext rc: %d", r);
@@ -578,28 +449,119 @@ gin_extract_jsonb_query_hash(PG_FUNCTION_ARGS)
if (strategy != JsonbContainsStrategyNumber)
elog(ERROR, "unrecognized strategy number: %d", strategy);
- /* Query is a jsonb, so just apply gin_extract_jsonb... */
+ /* Query is a jsonb, so just apply gin_extract_jsonb_hash ... */
entries = (Datum *)
DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_hash,
PG_GETARG_DATUM(0),
PointerGetDatum(nentries)));
- /* ...although "contains {}" requires a full index scan */
- if (entries == NULL)
+ /* ... although "contains {}" requires a full index scan */
+ if (*nentries == 0)
*searchMode = GIN_SEARCH_MODE_ALL;
PG_RETURN_POINTER(entries);
}
+Datum
+gin_consistent_jsonb_hash(PG_FUNCTION_ARGS)
+{
+ bool *check = (bool *) PG_GETARG_POINTER(0);
+ StrategyNumber strategy = PG_GETARG_UINT16(1);
+
+ /* Jsonb *query = PG_GETARG_JSONB(2); */
+ int32 nkeys = PG_GETARG_INT32(3);
+
+ /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
+ bool *recheck = (bool *) PG_GETARG_POINTER(5);
+ bool res = true;
+ int32 i;
+
+ if (strategy != JsonbContainsStrategyNumber)
+ elog(ERROR, "unrecognized strategy number: %d", strategy);
+
+ /*
+ * jsonb_hash_ops is necessarily lossy, not only because of hash
+ * collisions but also because it doesn't preserve complete information
+ * about the structure of the JSON object. Besides, there are some
+ * special rules around the containment of raw scalar arrays and regular
+ * arrays that are not handled here. So we must always recheck a match.
+ * However, if not all of the keys are present, the tuple certainly
+ * doesn't match.
+ */
+ *recheck = true;
+ for (i = 0; i < nkeys; i++)
+ {
+ if (!check[i])
+ {
+ res = false;
+ break;
+ }
+ }
+
+ PG_RETURN_BOOL(res);
+}
+
+Datum
+gin_triconsistent_jsonb_hash(PG_FUNCTION_ARGS)
+{
+ GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
+ StrategyNumber strategy = PG_GETARG_UINT16(1);
+
+ /* Jsonb *query = PG_GETARG_JSONB(2); */
+ int32 nkeys = PG_GETARG_INT32(3);
+
+ /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
+ GinTernaryValue res = GIN_MAYBE;
+ int32 i;
+
+ if (strategy != JsonbContainsStrategyNumber)
+ elog(ERROR, "unrecognized strategy number: %d", strategy);
+
+ /*
+ * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this
+ * corresponds to always forcing recheck in the regular consistent
+ * function, for the reasons listed there.
+ */
+ for (i = 0; i < nkeys; i++)
+ {
+ if (check[i] == GIN_FALSE)
+ {
+ res = GIN_FALSE;
+ break;
+ }
+ }
+
+ PG_RETURN_GIN_TERNARY_VALUE(res);
+}
+
/*
- * Build a text value from a cstring and flag suitable for storage as a key
- * value
+ * Construct a jsonb_ops GIN key from a flag byte and a textual representation
+ * (which need not be null-terminated). This function is responsible
+ * for hashing overlength text representations; it will add the
+ * JGINFLAG_HASHED bit to the flag value if it does that.
*/
-static text *
-make_text_key(const char *str, int len, char flag)
+static Datum
+make_text_key(char flag, const char *str, int len)
{
text *item;
+ char hashbuf[10];
+
+ if (len > JGIN_MAXLENGTH)
+ {
+ uint32 hashval;
+
+ hashval = DatumGetUInt32(hash_any((const unsigned char *) str, len));
+ snprintf(hashbuf, sizeof(hashbuf), "%08x", hashval);
+ str = hashbuf;
+ len = 8;
+ flag |= JGINFLAG_HASHED;
+ }
+ /*
+ * Now build the text Datum. For simplicity we build a 4-byte-header
+ * varlena text Datum here, but we expect it will get converted to short
+ * header format when stored in the index.
+ */
item = (text *) palloc(VARHDRSZ + len + 1);
SET_VARSIZE(item, VARHDRSZ + len + 1);
@@ -607,31 +569,39 @@ make_text_key(const char *str, int len, char flag)
memcpy(VARDATA(item) + 1, str, len);
- return item;
+ return PointerGetDatum(item);
}
/*
- * Create a textual representation of a jsonbValue for GIN storage.
+ * Create a textual representation of a JsonbValue that will serve as a GIN
+ * key in a jsonb_ops index. is_key is true if the JsonbValue is a key,
+ * or if it is a string array element (since we pretend those are keys,
+ * see jsonb.h).
*/
-static text *
-make_scalar_key(const JsonbValue *scalarVal, char flag)
+static Datum
+make_scalar_key(const JsonbValue *scalarVal, bool is_key)
{
- text *item;
+ Datum item;
char *cstr;
switch (scalarVal->type)
{
case jbvNull:
- item = make_text_key("n", 1, flag);
+ Assert(!is_key);
+ item = make_text_key(JGINFLAG_NULL, "", 0);
break;
case jbvBool:
- item = make_text_key(scalarVal->val.boolean ? "t" : "f", 1, flag);
+ Assert(!is_key);
+ item = make_text_key(JGINFLAG_BOOL,
+ scalarVal->val.boolean ? "t" : "f", 1);
break;
case jbvNumeric:
+ Assert(!is_key);
/*
- * A normalized textual representation, free of trailing zeroes is
- * is required.
+ * A normalized textual representation, free of trailing zeroes,
+ * is required so that numerically equal values will produce equal
+ * strings.
*
* It isn't ideal that numerics are stored in a relatively bulky
* textual format. However, it's a notationally convenient way of
@@ -639,15 +609,18 @@ make_scalar_key(const JsonbValue *scalarVal, char flag)
* strings takes precedence.
*/
cstr = numeric_normalize(scalarVal->val.numeric);
- item = make_text_key(cstr, strlen(cstr), flag);
+ item = make_text_key(JGINFLAG_NUM, cstr, strlen(cstr));
pfree(cstr);
break;
case jbvString:
- item = make_text_key(scalarVal->val.string.val, scalarVal->val.string.len,
- flag);
+ item = make_text_key(is_key ? JGINFLAG_KEY : JGINFLAG_STR,
+ scalarVal->val.string.val,
+ scalarVal->val.string.len);
break;
default:
- elog(ERROR, "invalid jsonb scalar type");
+ elog(ERROR, "unrecognized jsonb scalar type: %d", scalarVal->type);
+ item = 0; /* keep compiler quiet */
+ break;
}
return item;
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 2eb78128be8..f37a78a264f 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201405051
+#define CATALOG_VERSION_NO 201405091
#endif
diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h
index fc746c8b742..1a6409ac0de 100644
--- a/src/include/utils/jsonb.h
+++ b/src/include/utils/jsonb.h
@@ -29,25 +29,41 @@ typedef enum
WJB_END_OBJECT
} JsonbIteratorToken;
-/*
- * When using a GIN index for jsonb, we choose to index both keys and values.
- * The storage format is text, with K, or V prepended to the string to indicate
- * key/element or value/element.
- *
- * Jsonb Keys and string array elements are treated equivalently when
- * serialized to text index storage. One day we may wish to create an opclass
- * that only indexes values, but for now keys and values are stored in GIN
- * indexes in a way that doesn't really consider their relationship to each
- * other.
- */
-#define JKEYELEM 'K'
-#define JVAL 'V'
-
+/* Strategy numbers for GIN index opclasses */
#define JsonbContainsStrategyNumber 7
#define JsonbExistsStrategyNumber 9
#define JsonbExistsAnyStrategyNumber 10
#define JsonbExistsAllStrategyNumber 11
+/*
+ * In the standard jsonb_ops GIN opclass for jsonb, we choose to index both
+ * keys and values. The storage format is text. The first byte of the text
+ * string distinguishes whether this is a key (always a string), null value,
+ * boolean value, numeric value, or string value. However, array elements
+ * that are strings are marked as though they were keys; this imprecision
+ * supports the definition of the "exists" operator, which treats array
+ * elements like keys. The remainder of the text string is empty for a null
+ * value, "t" or "f" for a boolean value, a normalized print representation of
+ * a numeric value, or the text of a string value. However, if the length of
+ * this text representation would exceed JGIN_MAXLENGTH bytes, we instead hash
+ * the text representation and store an 8-hex-digit representation of the
+ * uint32 hash value, marking the prefix byte with an additional bit to
+ * distinguish that this has happened. Hashing long strings saves space and
+ * ensures that we won't overrun the maximum entry length for a GIN index.
+ * (But JGIN_MAXLENGTH is quite a bit shorter than GIN's limit. It's chosen
+ * to ensure that the on-disk text datum will have a short varlena header.)
+ * Note that when any hashed item appears in a query, we must recheck index
+ * matches against the heap tuple; currently, this costs nothing because we
+ * must always recheck for other reasons.
+ */
+#define JGINFLAG_KEY 0x01 /* key (or string array element) */
+#define JGINFLAG_NULL 0x02 /* null value */
+#define JGINFLAG_BOOL 0x03 /* boolean value */
+#define JGINFLAG_NUM 0x04 /* numeric value */
+#define JGINFLAG_STR 0x05 /* string value (if not an array element) */
+#define JGINFLAG_HASHED 0x10 /* OR'd into flag if value was hashed */
+#define JGIN_MAXLENGTH 125 /* max length of text part before hashing */
+
/* Convenience macros */
#define DatumGetJsonb(d) ((Jsonb *) PG_DETOAST_DATUM(d))
#define JsonbGetDatum(p) PointerGetDatum(p)
@@ -332,12 +348,12 @@ extern Datum gin_consistent_jsonb_hash(PG_FUNCTION_ARGS);
extern Datum gin_triconsistent_jsonb_hash(PG_FUNCTION_ARGS);
/* Support functions */
-extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b);
+extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b);
extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *sheader,
- uint32 flags,
- JsonbValue *key);
+ uint32 flags,
+ JsonbValue *key);
extern JsonbValue *getIthJsonbValueFromContainer(JsonbContainer *sheader,
- uint32 i);
+ uint32 i);
extern JsonbValue *pushJsonbValue(JsonbParseState **pstate,
JsonbIteratorToken seq, JsonbValue *scalarVal);
extern JsonbIterator *JsonbIteratorInit(JsonbContainer *container);
diff --git a/src/test/regress/data/jsonb.data b/src/test/regress/data/jsonb.data
index 1352ebe3ac7..622501b236a 100644
--- a/src/test/regress/data/jsonb.data
+++ b/src/test/regress/data/jsonb.data
@@ -1006,4 +1006,7 @@
{"wait":null, "line":1000}
{"age":25}
{"age":25.0}
+{"foo": {"bar": "baz"}}
+{"foo": {"blah": "baz"}}
+{"fool": {"bar": "baz"}}
{}
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index 8bd0131100d..c5a7d64ae46 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -1483,6 +1483,12 @@ SELECT count(*) FROM testjsonb WHERE j ? 'public';
194
(1 row)
+SELECT count(*) FROM testjsonb WHERE j ? 'bar';
+ count
+-------
+ 0
+(1 row)
+
SELECT count(*) FROM testjsonb WHERE j ?| ARRAY['public','disabled'];
count
-------
@@ -1543,7 +1549,7 @@ SELECT count(*) FROM testjsonb WHERE j @> '{"array":["bar"]}';
SELECT count(*) FROM testjsonb WHERE j @> '{}';
count
-------
- 1009
+ 1012
(1 row)
SELECT count(*) FROM testjsonb WHERE j ? 'public';
@@ -1552,6 +1558,12 @@ SELECT count(*) FROM testjsonb WHERE j ? 'public';
194
(1 row)
+SELECT count(*) FROM testjsonb WHERE j ? 'bar';
+ count
+-------
+ 0
+(1 row)
+
SELECT count(*) FROM testjsonb WHERE j ?| ARRAY['public','disabled'];
count
-------
@@ -1591,7 +1603,7 @@ RESET enable_seqscan;
SELECT count(*) FROM (SELECT (jsonb_each(j)).key FROM testjsonb) AS wow;
count
-------
- 4788
+ 4791
(1 row)
SELECT key, count(*) FROM (SELECT (jsonb_each(j)).key FROM testjsonb) AS wow GROUP BY key ORDER BY count DESC, key;
@@ -1621,20 +1633,22 @@ SELECT key, count(*) FROM (SELECT (jsonb_each(j)).key FROM testjsonb) AS wow GRO
abstract | 161
array | 5
age | 2
-(24 rows)
+ foo | 2
+ fool | 1
+(26 rows)
-- sort/hash
SELECT count(distinct j) FROM testjsonb;
count
-------
- 891
+ 894
(1 row)
SET enable_hashagg = off;
SELECT count(*) FROM (SELECT j FROM (SELECT * FROM testjsonb UNION ALL SELECT * FROM testjsonb) js GROUP BY j) js2;
count
-------
- 891
+ 894
(1 row)
SET enable_hashagg = on;
@@ -1642,7 +1656,7 @@ SET enable_sort = off;
SELECT count(*) FROM (SELECT j FROM (SELECT * FROM testjsonb UNION ALL SELECT * FROM testjsonb) js GROUP BY j) js2;
count
-------
- 891
+ 894
(1 row)
SELECT distinct * FROM (values (jsonb '{}' || ''),('{}')) v(j);
@@ -1709,7 +1723,7 @@ SELECT count(*) FROM testjsonb WHERE j @> '{"age":25.0}';
SELECT count(*) FROM testjsonb WHERE j @> '{}';
count
-------
- 1009
+ 1012
(1 row)
RESET enable_seqscan;
diff --git a/src/test/regress/expected/jsonb_1.out b/src/test/regress/expected/jsonb_1.out
index 35524fb9a7e..0e3ebd161ef 100644
--- a/src/test/regress/expected/jsonb_1.out
+++ b/src/test/regress/expected/jsonb_1.out
@@ -1483,6 +1483,12 @@ SELECT count(*) FROM testjsonb WHERE j ? 'public';
194
(1 row)
+SELECT count(*) FROM testjsonb WHERE j ? 'bar';
+ count
+-------
+ 0
+(1 row)
+
SELECT count(*) FROM testjsonb WHERE j ?| ARRAY['public','disabled'];
count
-------
@@ -1543,7 +1549,7 @@ SELECT count(*) FROM testjsonb WHERE j @> '{"array":["bar"]}';
SELECT count(*) FROM testjsonb WHERE j @> '{}';
count
-------
- 1009
+ 1012
(1 row)
SELECT count(*) FROM testjsonb WHERE j ? 'public';
@@ -1552,6 +1558,12 @@ SELECT count(*) FROM testjsonb WHERE j ? 'public';
194
(1 row)
+SELECT count(*) FROM testjsonb WHERE j ? 'bar';
+ count
+-------
+ 0
+(1 row)
+
SELECT count(*) FROM testjsonb WHERE j ?| ARRAY['public','disabled'];
count
-------
@@ -1591,7 +1603,7 @@ RESET enable_seqscan;
SELECT count(*) FROM (SELECT (jsonb_each(j)).key FROM testjsonb) AS wow;
count
-------
- 4788
+ 4791
(1 row)
SELECT key, count(*) FROM (SELECT (jsonb_each(j)).key FROM testjsonb) AS wow GROUP BY key ORDER BY count DESC, key;
@@ -1621,20 +1633,22 @@ SELECT key, count(*) FROM (SELECT (jsonb_each(j)).key FROM testjsonb) AS wow GRO
abstract | 161
array | 5
age | 2
-(24 rows)
+ foo | 2
+ fool | 1
+(26 rows)
-- sort/hash
SELECT count(distinct j) FROM testjsonb;
count
-------
- 891
+ 894
(1 row)
SET enable_hashagg = off;
SELECT count(*) FROM (SELECT j FROM (SELECT * FROM testjsonb UNION ALL SELECT * FROM testjsonb) js GROUP BY j) js2;
count
-------
- 891
+ 894
(1 row)
SET enable_hashagg = on;
@@ -1642,7 +1656,7 @@ SET enable_sort = off;
SELECT count(*) FROM (SELECT j FROM (SELECT * FROM testjsonb UNION ALL SELECT * FROM testjsonb) js GROUP BY j) js2;
count
-------
- 891
+ 894
(1 row)
SELECT distinct * FROM (values (jsonb '{}' || ''),('{}')) v(j);
@@ -1709,7 +1723,7 @@ SELECT count(*) FROM testjsonb WHERE j @> '{"age":25.0}';
SELECT count(*) FROM testjsonb WHERE j @> '{}';
count
-------
- 1009
+ 1012
(1 row)
RESET enable_seqscan;
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index 3ee43e93470..3e9048911be 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -334,6 +334,7 @@ SELECT count(*) FROM testjsonb WHERE j @> '{"wait":"CC", "public":true}';
SELECT count(*) FROM testjsonb WHERE j @> '{"age":25}';
SELECT count(*) FROM testjsonb WHERE j @> '{"age":25.0}';
SELECT count(*) FROM testjsonb WHERE j ? 'public';
+SELECT count(*) FROM testjsonb WHERE j ? 'bar';
SELECT count(*) FROM testjsonb WHERE j ?| ARRAY['public','disabled'];
SELECT count(*) FROM testjsonb WHERE j ?& ARRAY['public','disabled'];
@@ -350,6 +351,7 @@ SELECT count(*) FROM testjsonb WHERE j @> '{"array":["bar"]}';
-- excercise GIN_SEARCH_MODE_ALL
SELECT count(*) FROM testjsonb WHERE j @> '{}';
SELECT count(*) FROM testjsonb WHERE j ? 'public';
+SELECT count(*) FROM testjsonb WHERE j ? 'bar';
SELECT count(*) FROM testjsonb WHERE j ?| ARRAY['public','disabled'];
SELECT count(*) FROM testjsonb WHERE j ?& ARRAY['public','disabled'];