aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/gin/ginarrayproc.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2011-01-07 19:16:24 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2011-01-07 19:16:24 -0500
commit73912e7fbd1b52c51d914214abbec1cda64595f2 (patch)
treef6ae2849198dd7a17ae6a5ec174796848ec07cdb /src/backend/access/gin/ginarrayproc.c
parent9b4271deb97270d336c9d34ac911748faa5a4892 (diff)
downloadpostgresql-73912e7fbd1b52c51d914214abbec1cda64595f2.tar.gz
postgresql-73912e7fbd1b52c51d914214abbec1cda64595f2.zip
Fix GIN to support null keys, empty and null items, and full index scans.
Per my recent proposal(s). Null key datums can now be returned by extractValue and extractQuery functions, and will be stored in the index. Also, placeholder entries are made for indexable items that are NULL or contain no keys according to extractValue. This means that the index is now always complete, having at least one entry for every indexed heap TID, and so we can get rid of the prohibition on full-index scans. A full-index scan is implemented much the same way as partial-match scans were already: we build a bitmap representing all the TIDs found in the index, and then drive the results off that. Also, introduce a concept of a "search mode" that can be requested by extractQuery when the operator requires matching to empty items (this is just as cheap as matching to a single key) or requires a full index scan (which is not so cheap, but it sure beats failing or giving wrong answers). The behavior remains backward compatible for opclasses that don't return any null keys or request a non-default search mode. Using these features, we can now make the GIN index opclass for anyarray behave in a way that matches the actual anyarray operators for &&, <@, @>, and = ... which it failed to do before in assorted corner cases. This commit fixes the core GIN code and ginarrayprocs.c, updates the documentation, and adds some simple regression test cases for the new behaviors using the array operators. The tsearch and contrib GIN opclass support functions still need to be looked over and probably fixed. Another thing I intend to fix separately is that this is pretty inefficient for cases where more than one scan condition needs a full-index search: we'll run duplicate GinScanEntrys, each one of which builds a large bitmap. There is some existing logic to merge duplicate GinScanEntrys but it needs refactoring to make it work for entries belonging to different scan keys. Note that most of gin.h has been split out into a new file gin_private.h, so that gin.h doesn't export anything that's not supposed to be used by GIN opclasses or the rest of the backend. I did quite a bit of other code beautification work as well, mostly fixing comments and choosing more appropriate names for things.
Diffstat (limited to 'src/backend/access/gin/ginarrayproc.c')
-rw-r--r--src/backend/access/gin/ginarrayproc.c168
1 files changed, 105 insertions, 63 deletions
diff --git a/src/backend/access/gin/ginarrayproc.c b/src/backend/access/gin/ginarrayproc.c
index 1837a0d5a10..2100c5fd0e4 100644
--- a/src/backend/access/gin/ginarrayproc.c
+++ b/src/backend/access/gin/ginarrayproc.c
@@ -14,7 +14,9 @@
#include "postgres.h"
#include "access/gin.h"
+#include "access/skey.h"
#include "utils/array.h"
+#include "utils/builtins.h"
#include "utils/lsyscache.h"
@@ -23,34 +25,23 @@
#define GinContainedStrategy 3
#define GinEqualStrategy 4
-#define ARRAYCHECK(x) do { \
- if ( ARR_HASNULL(x) ) \
- ereport(ERROR, \
- (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), \
- errmsg("array must not contain null values"))); \
-} while(0)
-
/*
- * Function used as extractValue and extractQuery both
+ * extractValue support function
*/
Datum
ginarrayextract(PG_FUNCTION_ARGS)
{
- ArrayType *array;
- int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
- Datum *entries = NULL;
+ /* Make copy of array input to ensure it doesn't disappear while in use */
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P_COPY(0);
+ int32 *nkeys = (int32 *) PG_GETARG_POINTER(1);
+ bool **nullFlags = (bool **) PG_GETARG_POINTER(2);
int16 elmlen;
bool elmbyval;
char elmalign;
-
- /*
- * we should guarantee that array will not be destroyed during all
- * operation
- */
- array = PG_GETARG_ARRAYTYPE_P_COPY(0);
-
- ARRAYCHECK(array);
+ Datum *elems;
+ bool *nulls;
+ int nelems;
get_typlenbyvalalign(ARR_ELEMTYPE(array),
&elmlen, &elmbyval, &elmalign);
@@ -58,89 +49,140 @@ ginarrayextract(PG_FUNCTION_ARGS)
deconstruct_array(array,
ARR_ELEMTYPE(array),
elmlen, elmbyval, elmalign,
- &entries, NULL, (int *) nentries);
+ &elems, &nulls, &nelems);
- if (*nentries == 0 && PG_NARGS() == 3)
- {
- switch (PG_GETARG_UINT16(2)) /* StrategyNumber */
- {
- case GinOverlapStrategy:
- *nentries = -1; /* nobody can be found */
- break;
- case GinContainsStrategy:
- case GinContainedStrategy:
- case GinEqualStrategy:
- default: /* require fullscan: GIN can't find void
- * arrays */
- break;
- }
- }
+ *nkeys = nelems;
+ *nullFlags = nulls;
- /* we should not free array, entries[i] points into it */
- PG_RETURN_POINTER(entries);
+ /* we should not free array, elems[i] points into it */
+ PG_RETURN_POINTER(elems);
}
+/*
+ * extractQuery support function
+ */
Datum
ginqueryarrayextract(PG_FUNCTION_ARGS)
{
- PG_RETURN_DATUM(DirectFunctionCall3(ginarrayextract,
- PG_GETARG_DATUM(0),
- PG_GETARG_DATUM(1),
- PG_GETARG_DATUM(2)));
+ /* Make copy of array input to ensure it doesn't disappear while in use */
+ ArrayType *array = PG_GETARG_ARRAYTYPE_P_COPY(0);
+ int32 *nkeys = (int32 *) PG_GETARG_POINTER(1);
+ StrategyNumber strategy = PG_GETARG_UINT16(2);
+ /* bool **pmatch = (bool **) PG_GETARG_POINTER(3); */
+ /* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
+ bool **nullFlags = (bool **) PG_GETARG_POINTER(5);
+ int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
+ int16 elmlen;
+ bool elmbyval;
+ char elmalign;
+ Datum *elems;
+ bool *nulls;
+ int nelems;
+
+ get_typlenbyvalalign(ARR_ELEMTYPE(array),
+ &elmlen, &elmbyval, &elmalign);
+
+ deconstruct_array(array,
+ ARR_ELEMTYPE(array),
+ elmlen, elmbyval, elmalign,
+ &elems, &nulls, &nelems);
+
+ *nkeys = nelems;
+ *nullFlags = nulls;
+
+ switch (strategy)
+ {
+ case GinOverlapStrategy:
+ *searchMode = GIN_SEARCH_MODE_DEFAULT;
+ break;
+ case GinContainsStrategy:
+ if (nelems > 0)
+ *searchMode = GIN_SEARCH_MODE_DEFAULT;
+ else /* everything contains the empty set */
+ *searchMode = GIN_SEARCH_MODE_ALL;
+ break;
+ case GinContainedStrategy:
+ /* empty set is contained in everything */
+ *searchMode = GIN_SEARCH_MODE_INCLUDE_EMPTY;
+ break;
+ case GinEqualStrategy:
+ if (nelems > 0)
+ *searchMode = GIN_SEARCH_MODE_DEFAULT;
+ else
+ *searchMode = GIN_SEARCH_MODE_INCLUDE_EMPTY;
+ break;
+ default:
+ elog(ERROR, "ginqueryarrayextract: unknown strategy number: %d",
+ strategy);
+ }
+
+ /* we should not free array, elems[i] points into it */
+ PG_RETURN_POINTER(elems);
}
+/*
+ * consistent support function
+ */
Datum
ginarrayconsistent(PG_FUNCTION_ARGS)
{
bool *check = (bool *) PG_GETARG_POINTER(0);
StrategyNumber strategy = PG_GETARG_UINT16(1);
- ArrayType *query = PG_GETARG_ARRAYTYPE_P(2);
-
- /* int32 nkeys = PG_GETARG_INT32(3); */
+ /* ArrayType *query = PG_GETARG_ARRAYTYPE_P(2); */
+ int32 nkeys = PG_GETARG_INT32(3);
/* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
bool *recheck = (bool *) PG_GETARG_POINTER(5);
+ /* Datum *queryKeys = (Datum *) PG_GETARG_POINTER(6); */
+ bool *nullFlags = (bool *) PG_GETARG_POINTER(7);
bool res;
- int i,
- nentries;
-
- /* ARRAYCHECK was already done by previous ginarrayextract call */
+ int32 i;
switch (strategy)
{
case GinOverlapStrategy:
/* result is not lossy */
*recheck = false;
- /* at least one element in check[] is true, so result = true */
- res = true;
- break;
- case GinContainedStrategy:
- /* we will need recheck */
- *recheck = true;
- /* at least one element in check[] is true, so result = true */
- res = true;
+ /* must have a match for at least one non-null element */
+ res = false;
+ for (i = 0; i < nkeys; i++)
+ {
+ if (check[i] && !nullFlags[i])
+ {
+ res = true;
+ break;
+ }
+ }
break;
case GinContainsStrategy:
/* result is not lossy */
*recheck = false;
- /* must have all elements in check[] true */
- nentries = ArrayGetNItems(ARR_NDIM(query), ARR_DIMS(query));
+ /* must have all elements in check[] true, and no nulls */
res = true;
- for (i = 0; i < nentries; i++)
+ for (i = 0; i < nkeys; i++)
{
- if (!check[i])
+ if (!check[i] || nullFlags[i])
{
res = false;
break;
}
}
break;
+ case GinContainedStrategy:
+ /* we will need recheck */
+ *recheck = true;
+ /* can't do anything else useful here */
+ res = true;
+ break;
case GinEqualStrategy:
/* we will need recheck */
*recheck = true;
- /* must have all elements in check[] true */
- nentries = ArrayGetNItems(ARR_NDIM(query), ARR_DIMS(query));
+ /*
+ * Must have all elements in check[] true; no discrimination
+ * against nulls here. This is because array_contain_compare
+ * and array_eq handle nulls differently ...
+ */
res = true;
- for (i = 0; i < nentries; i++)
+ for (i = 0; i < nkeys; i++)
{
if (!check[i])
{