3 files changed, 478 insertions, 20 deletions
diff --git a/src/backend/optimizer/util/predtest.c b/src/backend/optimizer/util/predtest.c
index 3c9f245e4da..fc4fc09a835 100644
--- a/src/backend/optimizer/util/predtest.c
+++ b/src/backend/optimizer/util/predtest.c
@@ -99,7 +99,7 @@ static bool predicate_refuted_by_simple_clause(Expr *predicate, Node *clause,
 								   bool weak);
 static Node *extract_not_arg(Node *clause);
 static Node *extract_strong_not_arg(Node *clause);
-static bool clause_is_strict_for(Node *clause, Node *subexpr);
+static bool clause_is_strict_for(Node *clause, Node *subexpr, bool allow_false);
 static bool operator_predicate_proof(Expr *predicate, Node *clause,
 						 bool refute_it, bool weak);
 static bool operator_same_subexprs_proof(Oid pred_op, Oid clause_op,
@@ -816,7 +816,7 @@ predicate_refuted_by_recurse(Node *clause, Node *predicate,
  * This function also implements enforcement of MAX_SAOP_ARRAY_SIZE: if a
  * ScalarArrayOpExpr's array has too many elements, we just classify it as an
  * atom.  (This will result in its being passed as-is to the simple_clause
- * functions, which will fail to prove anything about it.)	Note that we
+ * functions, many of which will fail to prove anything about it.) Note that we
  * cannot just stop after considering MAX_SAOP_ARRAY_SIZE elements; in general
  * that would result in wrong proofs, rather than failing to prove anything.
  */
@@ -1099,8 +1099,8 @@ arrayexpr_cleanup_fn(PredIterInfo info)
  *
  * If the predicate is of the form "foo IS NOT NULL", and we are considering
  * strong implication, we can conclude that the predicate is implied if the
- * clause is strict for "foo", i.e., it must yield NULL when "foo" is NULL.
- * In that case truth of the clause requires that "foo" isn't NULL.
+ * clause is strict for "foo", i.e., it must yield false or NULL when "foo"
+ * is NULL.  In that case truth of the clause ensures that "foo" isn't NULL.
  * (Again, this is a safe conclusion because "foo" must be immutable.)
  * This doesn't work for weak implication, though.
  *
@@ -1131,7 +1131,7 @@ predicate_implied_by_simple_clause(Expr *predicate, Node *clause,
 			!ntest->argisrow)
 		{
 			/* strictness of clause for foo implies foo IS NOT NULL */
-			if (clause_is_strict_for(clause, (Node *) ntest->arg))
+			if (clause_is_strict_for(clause, (Node *) ntest->arg, true))
 				return true;
 		}
 		return false;			/* we can't succeed below... */
@@ -1160,8 +1160,8 @@ predicate_implied_by_simple_clause(Expr *predicate, Node *clause,
  *
  * A clause "foo IS NULL" refutes a predicate "foo IS NOT NULL" in all cases.
  * If we are considering weak refutation, it also refutes a predicate that
- * is strict for "foo", since then the predicate must yield NULL (and since
- * "foo" appears in the predicate, it's known immutable).
+ * is strict for "foo", since then the predicate must yield false or NULL
+ * (and since "foo" appears in the predicate, it's known immutable).
  *
  * (The main motivation for covering these IS [NOT] NULL cases is to support
  * using IS NULL/IS NOT NULL as partition-defining constraints.)
@@ -1194,7 +1194,7 @@ predicate_refuted_by_simple_clause(Expr *predicate, Node *clause,
 			return false;
 
 		/* strictness of clause for foo refutes foo IS NULL */
-		if (clause_is_strict_for(clause, (Node *) isnullarg))
+		if (clause_is_strict_for(clause, (Node *) isnullarg, true))
 			return true;
 
 		/* foo IS NOT NULL refutes foo IS NULL */
@@ -1226,7 +1226,7 @@ predicate_refuted_by_simple_clause(Expr *predicate, Node *clause,
 
 		/* foo IS NULL weakly refutes any predicate that is strict for foo */
 		if (weak &&
-			clause_is_strict_for((Node *) predicate, (Node *) isnullarg))
+			clause_is_strict_for((Node *) predicate, (Node *) isnullarg, true))
 			return true;
 
 		return false;			/* we can't succeed below... */
@@ -1293,17 +1293,30 @@ extract_strong_not_arg(Node *clause)
 
 
 /*
- * Can we prove that "clause" returns NULL if "subexpr" does?
+ * Can we prove that "clause" returns NULL (or FALSE) if "subexpr" is
+ * assumed to yield NULL?
  *
- * The base case is that clause and subexpr are equal().  (We assume that
- * the caller knows at least one of the input expressions is immutable,
- * as this wouldn't hold for volatile expressions.)
+ * In most places in the planner, "strictness" refers to a guarantee that
+ * an expression yields NULL output for a NULL input, and that's mostly what
+ * we're looking for here.  However, at top level where the clause is known
+ * to yield boolean, it may be sufficient to prove that it cannot return TRUE
+ * when "subexpr" is NULL.  The caller should pass allow_false = true when
+ * this weaker property is acceptable.  (When this function recurses
+ * internally, we pass down allow_false = false since we need to prove actual
+ * nullness of the subexpression.)
+ *
+ * We assume that the caller checked that least one of the input expressions
+ * is immutable.  All of the proof rules here involve matching "subexpr" to
+ * some portion of "clause", so that this allows assuming that "subexpr" is
+ * immutable without a separate check.
+ *
+ * The base case is that clause and subexpr are equal().
  *
  * We can also report success if the subexpr appears as a subexpression
  * of "clause" in a place where it'd force nullness of the overall result.
  */
 static bool
-clause_is_strict_for(Node *clause, Node *subexpr)
+clause_is_strict_for(Node *clause, Node *subexpr, bool allow_false)
 {
 	ListCell   *lc;
 
@@ -1336,7 +1349,7 @@ clause_is_strict_for(Node *clause, Node *subexpr)
 	{
 		foreach(lc, ((OpExpr *) clause)->args)
 		{
-			if (clause_is_strict_for((Node *) lfirst(lc), subexpr))
+			if (clause_is_strict_for((Node *) lfirst(lc), subexpr, false))
 				return true;
 		}
 		return false;
@@ -1346,7 +1359,7 @@ clause_is_strict_for(Node *clause, Node *subexpr)
 	{
 		foreach(lc, ((FuncExpr *) clause)->args)
 		{
-			if (clause_is_strict_for((Node *) lfirst(lc), subexpr))
+			if (clause_is_strict_for((Node *) lfirst(lc), subexpr, false))
 				return true;
 		}
 		return false;
@@ -1362,16 +1375,89 @@ clause_is_strict_for(Node *clause, Node *subexpr)
 	 */
 	if (IsA(clause, CoerceViaIO))
 		return clause_is_strict_for((Node *) ((CoerceViaIO *) clause)->arg,
-									subexpr);
+									subexpr, false);
 	if (IsA(clause, ArrayCoerceExpr))
 		return clause_is_strict_for((Node *) ((ArrayCoerceExpr *) clause)->arg,
-									subexpr);
+									subexpr, false);
 	if (IsA(clause, ConvertRowtypeExpr))
 		return clause_is_strict_for((Node *) ((ConvertRowtypeExpr *) clause)->arg,
-									subexpr);
+									subexpr, false);
 	if (IsA(clause, CoerceToDomain))
 		return clause_is_strict_for((Node *) ((CoerceToDomain *) clause)->arg,
-									subexpr);
+									subexpr, false);
+
+	/*
+	 * ScalarArrayOpExpr is a special case.  Note that we'd only reach here
+	 * with a ScalarArrayOpExpr clause if we failed to deconstruct it into an
+	 * AND or OR tree, as for example if it has too many array elements.
+	 */
+	if (IsA(clause, ScalarArrayOpExpr))
+	{
+		ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
+		Node	   *scalarnode = (Node *) linitial(saop->args);
+		Node	   *arraynode = (Node *) lsecond(saop->args);
+
+		/*
+		 * If we can prove the scalar input to be null, and the operator is
+		 * strict, then the SAOP result has to be null --- unless the array is
+		 * empty.  For an empty array, we'd get either false (for ANY) or true
+		 * (for ALL).  So if allow_false = true then the proof succeeds anyway
+		 * for the ANY case; otherwise we can only make the proof if we can
+		 * prove the array non-empty.
+		 */
+		if (clause_is_strict_for(scalarnode, subexpr, false) &&
+			op_strict(saop->opno))
+		{
+			int			nelems = 0;
+
+			if (allow_false && saop->useOr)
+				return true;	/* can succeed even if array is empty */
+
+			if (arraynode && IsA(arraynode, Const))
+			{
+				Const	   *arrayconst = (Const *) arraynode;
+				ArrayType  *arrval;
+
+				/*
+				 * If array is constant NULL then we can succeed, as in the
+				 * case below.
+				 */
+				if (arrayconst->constisnull)
+					return true;
+
+				/* Otherwise, we can compute the number of elements. */
+				arrval = DatumGetArrayTypeP(arrayconst->constvalue);
+				nelems = ArrayGetNItems(ARR_NDIM(arrval), ARR_DIMS(arrval));
+			}
+			else if (arraynode && IsA(arraynode, ArrayExpr) &&
+					 !((ArrayExpr *) arraynode)->multidims)
+			{
+				/*
+				 * We can also reliably count the number of array elements if
+				 * the input is a non-multidim ARRAY[] expression.
+				 */
+				nelems = list_length(((ArrayExpr *) arraynode)->elements);
+			}
+
+			/* Proof succeeds if array is definitely non-empty */
+			if (nelems > 0)
+				return true;
+		}
+
+		/*
+		 * If we can prove the array input to be null, the proof succeeds in
+		 * all cases, since ScalarArrayOpExpr will always return NULL for a
+		 * NULL array.  Otherwise, we're done here.
+		 */
+		return clause_is_strict_for(arraynode, subexpr, false);
+	}
+
+	/*
+	 * When recursing into an expression, we might find a NULL constant.
+	 * That's certainly NULL, whether it matches subexpr or not.
+	 */
+	if (IsA(clause, Const))
+		return ((Const *) clause)->constisnull;
 
 	return false;
 }
diff --git a/src/test/modules/test_predtest/expected/test_predtest.out b/src/test/modules/test_predtest/expected/test_predtest.out
index 5574e03204f..6d21bcd603e 100644
--- a/src/test/modules/test_predtest/expected/test_predtest.out
+++ b/src/test/modules/test_predtest/expected/test_predtest.out
@@ -19,6 +19,9 @@ from generate_series(0, 11*11-1) i;
 -- and a simple strict function that's opaque to the optimizer
 create function strictf(bool, bool) returns bool
 language plpgsql as $$begin return $1 and not $2; end$$ strict;
+-- a simple function to make arrays opaque to the optimizer
+create function opaque_array(int[]) returns int[]
+language plpgsql as $$begin return $1; end$$ strict;
 -- Basic proof rules for single boolean variables
 select * from test_predtest($$
 select x, x
@@ -837,3 +840,257 @@ w_i_holds         | f
 s_r_holds         | f
 w_r_holds         | f
 
+-- In these tests, we want to prevent predtest.c from breaking down the
+-- ScalarArrayOpExpr into an AND/OR tree, so as to exercise the logic
+-- that handles ScalarArrayOpExpr directly.  We use opaque_array() if
+-- possible, otherwise an array longer than MAX_SAOP_ARRAY_SIZE.
+-- ScalarArrayOpExpr implies scalar IS NOT NULL
+select * from test_predtest($$
+select x is not null, x = any(opaque_array(array[1]))
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | t
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | t
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
+-- but for ALL, we have to be able to prove the array nonempty
+select * from test_predtest($$
+select x is not null, x <> all(opaque_array(array[1]))
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | t
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
+select * from test_predtest($$
+select x is not null, x <> all(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101
+])
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | t
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | t
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
+select * from test_predtest($$
+select x is not null, x <> all(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,y
+])
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | t
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | t
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
+-- check empty-array cases
+select * from test_predtest($$
+select x is not null, x = any(opaque_array(array[]::int[]))
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | t
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | t
+w_i_holds         | t
+s_r_holds         | t
+w_r_holds         | t
+
+select * from test_predtest($$
+select x is not null, x <> all(opaque_array(array[]::int[]))
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | f
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
+-- same thing under a strict function doesn't prove it
+select * from test_predtest($$
+select x is not null, strictf(true, x = any(opaque_array(array[]::int[])))
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | f
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
+-- ScalarArrayOpExpr refutes scalar IS NULL
+select * from test_predtest($$
+select x is null, x = any(opaque_array(array[1]))
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | t
+weak_refuted_by   | t
+s_i_holds         | f
+w_i_holds         | f
+s_r_holds         | t
+w_r_holds         | t
+
+-- but for ALL, we have to be able to prove the array nonempty
+select * from test_predtest($$
+select x is null, x <> all(opaque_array(array[1]))
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | f
+w_i_holds         | f
+s_r_holds         | t
+w_r_holds         | t
+
+select * from test_predtest($$
+select x is null, x <> all(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101
+])
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | t
+weak_refuted_by   | t
+s_i_holds         | f
+w_i_holds         | f
+s_r_holds         | t
+w_r_holds         | t
+
+-- check empty-array cases
+select * from test_predtest($$
+select x is null, x = any(opaque_array(array[]::int[]))
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | t
+weak_refuted_by   | t
+s_i_holds         | t
+w_i_holds         | t
+s_r_holds         | t
+w_r_holds         | t
+
+select * from test_predtest($$
+select x is null, x <> all(opaque_array(array[]::int[]))
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | f
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
+-- same thing under a strict function doesn't prove it
+select * from test_predtest($$
+select x is null, strictf(true, x = any(opaque_array(array[]::int[])))
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | f
+w_i_holds         | f
+s_r_holds         | f
+w_r_holds         | f
+
+-- Also, nullness of the scalar weakly refutes a SAOP
+select * from test_predtest($$
+select x = any(opaque_array(array[1])), x is null
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | t
+s_i_holds         | f
+w_i_holds         | t
+s_r_holds         | f
+w_r_holds         | t
+
+-- as does nullness of the array
+select * from test_predtest($$
+select x = any(opaque_array(array[y])), array[y] is null
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | t
+s_i_holds         | t
+w_i_holds         | t
+s_r_holds         | t
+w_r_holds         | t
+
+-- ... unless we need to prove array empty
+select * from test_predtest($$
+select x = all(opaque_array(array[1])), x is null
+from integers
+$$);
+-[ RECORD 1 ]-----+--
+strong_implied_by | f
+weak_implied_by   | f
+strong_refuted_by | f
+weak_refuted_by   | f
+s_i_holds         | f
+w_i_holds         | t
+s_r_holds         | f
+w_r_holds         | t
+
diff --git a/src/test/modules/test_predtest/sql/test_predtest.sql b/src/test/modules/test_predtest/sql/test_predtest.sql
index 27347358439..072eb5b0d50 100644
--- a/src/test/modules/test_predtest/sql/test_predtest.sql
+++ b/src/test/modules/test_predtest/sql/test_predtest.sql
@@ -25,6 +25,10 @@ from generate_series(0, 11*11-1) i;
 create function strictf(bool, bool) returns bool
 language plpgsql as $$begin return $1 and not $2; end$$ strict;
 
+-- a simple function to make arrays opaque to the optimizer
+create function opaque_array(int[]) returns int[]
+language plpgsql as $$begin return $1; end$$ strict;
+
 -- Basic proof rules for single boolean variables
 
 select * from test_predtest($$
@@ -325,3 +329,114 @@ select * from test_predtest($$
 select x <= y, x = any(array[1,3,y])
 from integers
 $$);
+
+-- In these tests, we want to prevent predtest.c from breaking down the
+-- ScalarArrayOpExpr into an AND/OR tree, so as to exercise the logic
+-- that handles ScalarArrayOpExpr directly.  We use opaque_array() if
+-- possible, otherwise an array longer than MAX_SAOP_ARRAY_SIZE.
+
+-- ScalarArrayOpExpr implies scalar IS NOT NULL
+select * from test_predtest($$
+select x is not null, x = any(opaque_array(array[1]))
+from integers
+$$);
+
+-- but for ALL, we have to be able to prove the array nonempty
+select * from test_predtest($$
+select x is not null, x <> all(opaque_array(array[1]))
+from integers
+$$);
+
+select * from test_predtest($$
+select x is not null, x <> all(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101
+])
+from integers
+$$);
+
+select * from test_predtest($$
+select x is not null, x <> all(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,y
+])
+from integers
+$$);
+
+-- check empty-array cases
+select * from test_predtest($$
+select x is not null, x = any(opaque_array(array[]::int[]))
+from integers
+$$);
+
+select * from test_predtest($$
+select x is not null, x <> all(opaque_array(array[]::int[]))
+from integers
+$$);
+
+-- same thing under a strict function doesn't prove it
+select * from test_predtest($$
+select x is not null, strictf(true, x = any(opaque_array(array[]::int[])))
+from integers
+$$);
+
+-- ScalarArrayOpExpr refutes scalar IS NULL
+select * from test_predtest($$
+select x is null, x = any(opaque_array(array[1]))
+from integers
+$$);
+
+-- but for ALL, we have to be able to prove the array nonempty
+select * from test_predtest($$
+select x is null, x <> all(opaque_array(array[1]))
+from integers
+$$);
+
+select * from test_predtest($$
+select x is null, x <> all(array[
+  1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,
+  29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,
+  54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,
+  79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101
+])
+from integers
+$$);
+
+-- check empty-array cases
+select * from test_predtest($$
+select x is null, x = any(opaque_array(array[]::int[]))
+from integers
+$$);
+
+select * from test_predtest($$
+select x is null, x <> all(opaque_array(array[]::int[]))
+from integers
+$$);
+
+-- same thing under a strict function doesn't prove it
+select * from test_predtest($$
+select x is null, strictf(true, x = any(opaque_array(array[]::int[])))
+from integers
+$$);
+
+-- Also, nullness of the scalar weakly refutes a SAOP
+select * from test_predtest($$
+select x = any(opaque_array(array[1])), x is null
+from integers
+$$);
+
+-- as does nullness of the array
+select * from test_predtest($$
+select x = any(opaque_array(array[y])), array[y] is null
+from integers
+$$);
+
+-- ... unless we need to prove array empty
+select * from test_predtest($$
+select x = all(opaque_array(array[1])), x is null
+from integers
+$$);