From 4d867458fce3743adc95ad6513c9d2dea87cd7f4 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 5 Nov 2015 18:15:48 -0500 Subject: Fix erroneous hash calculations in gin_extract_jsonb_path(). The jsonb_path_ops code calculated hash values inconsistently in some cases involving nested arrays and objects. This would result in queries possibly not finding entries that they should find, when using a jsonb_path_ops GIN index for the search. The problem cases involve JSONB values that contain both scalars and sub-objects at the same nesting level, for example an array containing both scalars and sub-arrays. To fix, reset the current stack->hash after processing each value or sub-object, not before; and don't try to be cute about the outermost level's initial hash. Correcting this means that existing jsonb_path_ops indexes may now be inconsistent with the new hash calculation code. The symptom is the same --- searches not finding entries they should find --- but the specific rows affected are likely to be different. Users will need to REINDEX jsonb_path_ops indexes to make sure that all searches work as expected. Per bug #13756 from Daniel Cheng. Back-patch to 9.4 where the faulty logic was introduced. --- src/backend/utils/adt/jsonb_gin.c | 51 ++++++++++++++------------------------- 1 file changed, 18 insertions(+), 33 deletions(-) (limited to 'src/backend/utils/adt/jsonb_gin.c') diff --git a/src/backend/utils/adt/jsonb_gin.c b/src/backend/utils/adt/jsonb_gin.c index 204fb8b5919..1a8d646d74d 100644 --- a/src/backend/utils/adt/jsonb_gin.c +++ b/src/backend/utils/adt/jsonb_gin.c @@ -375,51 +375,31 @@ gin_extract_jsonb_path(PG_FUNCTION_ARGS) parent = stack; stack = (PathHashStack *) palloc(sizeof(PathHashStack)); - if (parent->parent) - { - /* - * We pass forward hashes from previous container nesting - * levels so that nested arrays with an outermost nested - * object will have element hashes mixed with the - * outermost key. It's also somewhat useful to have - * nested objects' innermost values have hashes that are a - * function of not just their own key, but outer keys too. - * - * Nesting an array within another array will not alter - * innermost scalar element hash values, but that seems - * inconsequential. - */ - stack->hash = parent->hash; - } - else - { - /* - * At the outermost level, initialize hash with container - * type proxy value. Note that this makes JB_FARRAY and - * JB_FOBJECT part of the on-disk representation, but they - * are that in the base jsonb object storage already. - */ - stack->hash = (r == WJB_BEGIN_ARRAY) ? JB_FARRAY : JB_FOBJECT; - } + /* + * We pass forward hashes from outer nesting levels so that + * the hashes for nested values will include outer keys as + * well as their own keys. + * + * Nesting an array within another array will not alter + * innermost scalar element hash values, but that seems + * inconsequential. + */ + stack->hash = parent->hash; stack->parent = parent; break; case WJB_KEY: - /* initialize hash from parent */ - stack->hash = stack->parent->hash; - /* and mix in this key */ + /* mix this key into the current outer hash */ JsonbHashScalarValue(&v, &stack->hash); /* hash is now ready to incorporate the value */ break; case WJB_ELEM: - /* array elements use parent hash mixed with element's hash */ - stack->hash = stack->parent->hash; - /* FALL THRU */ case WJB_VALUE: /* mix the element or value's hash into the prepared hash */ JsonbHashScalarValue(&v, &stack->hash); /* and emit an index entry */ entries[i++] = UInt32GetDatum(stack->hash); - /* Note: we assume we'll see KEY before another VALUE */ + /* reset hash for next key, value, or sub-object */ + stack->hash = stack->parent->hash; break; case WJB_END_ARRAY: case WJB_END_OBJECT: @@ -427,6 +407,11 @@ gin_extract_jsonb_path(PG_FUNCTION_ARGS) parent = stack->parent; pfree(stack); stack = parent; + /* reset hash for next key, value, or sub-object */ + if (stack->parent) + stack->hash = stack->parent->hash; + else + stack->hash = 0; break; default: elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r); -- cgit v1.2.3