9 files changed, 798 insertions, 203 deletions
diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c
index 37a735b06bb..b88b29ec23a 100644
--- a/src/backend/optimizer/path/clausesel.c
+++ b/src/backend/optimizer/path/clausesel.c
@@ -44,6 +44,12 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
 						   bool varonleft, bool isLTsel, Selectivity s2);
 static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
 											   List *clauses);
+static Selectivity clauselist_selectivity_or(PlannerInfo *root,
+											 List *clauses,
+											 int varRelid,
+											 JoinType jointype,
+											 SpecialJoinInfo *sjinfo,
+											 bool use_extended_stats);
 
 /****************************************************************************
  *		ROUTINES TO COMPUTE SELECTIVITIES
@@ -61,64 +67,10 @@ static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
  *
  * The basic approach is to apply extended statistics first, on as many
  * clauses as possible, in order to capture cross-column dependencies etc.
- * The remaining clauses are then estimated using regular statistics tracked
- * for individual columns.  This is done by simply passing the clauses to
- * clauselist_selectivity_simple.
- */
-Selectivity
-clauselist_selectivity(PlannerInfo *root,
-					   List *clauses,
-					   int varRelid,
-					   JoinType jointype,
-					   SpecialJoinInfo *sjinfo)
-{
-	Selectivity s1 = 1.0;
-	RelOptInfo *rel;
-	Bitmapset  *estimatedclauses = NULL;
-
-	/*
-	 * Determine if these clauses reference a single relation.  If so, and if
-	 * it has extended statistics, try to apply those.
-	 */
-	rel = find_single_rel_for_clauses(root, clauses);
-	if (rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
-	{
-		/*
-		 * Estimate as many clauses as possible using extended statistics.
-		 *
-		 * 'estimatedclauses' tracks the 0-based list position index of
-		 * clauses that we've estimated using extended statistics, and that
-		 * should be ignored.
-		 */
-		s1 *= statext_clauselist_selectivity(root, clauses, varRelid,
-											 jointype, sjinfo, rel,
-											 &estimatedclauses);
-	}
-
-	/*
-	 * Apply normal selectivity estimates for the remaining clauses, passing
-	 * 'estimatedclauses' so that it skips already estimated ones.
-	 */
-	return s1 * clauselist_selectivity_simple(root, clauses, varRelid,
-											  jointype, sjinfo,
-											  estimatedclauses);
-}
-
-/*
- * clauselist_selectivity_simple -
- *	  Compute the selectivity of an implicitly-ANDed list of boolean
- *	  expression clauses.  The list can be empty, in which case 1.0
- *	  must be returned.  List elements may be either RestrictInfos
- *	  or bare expression clauses --- the former is preferred since
- *	  it allows caching of results.  The estimatedclauses bitmap tracks
- *	  clauses that have already been estimated by other means.
- *
- * See clause_selectivity() for the meaning of the additional parameters.
- *
- * Our basic approach is to take the product of the selectivities of the
- * subclauses.  However, that's only right if the subclauses have independent
- * probabilities, and in reality they are often NOT independent.  So,
- * we want to be smarter where we can.
+ * The remaining clauses are then estimated by taking the product of their
+ * selectivities, but that's only right if they have independent
+ * probabilities, and in reality they are often NOT independent even if they
+ * only refer to a single column.  So, we want to be smarter where we can.
  *
  * We also recognize "range queries", such as "x > 34 AND x < 42".  Clauses
  * are recognized as possible range query components if they are restriction
@@ -147,28 +99,68 @@ clauselist_selectivity(PlannerInfo *root,
  * selectivity functions; perhaps some day we can generalize the approach.
  */
 Selectivity
-clauselist_selectivity_simple(PlannerInfo *root,
-							  List *clauses,
-							  int varRelid,
-							  JoinType jointype,
-							  SpecialJoinInfo *sjinfo,
-							  Bitmapset *estimatedclauses)
+clauselist_selectivity(PlannerInfo *root,
+					   List *clauses,
+					   int varRelid,
+					   JoinType jointype,
+					   SpecialJoinInfo *sjinfo)
+{
+	return clauselist_selectivity_ext(root, clauses, varRelid,
+									  jointype, sjinfo, true);
+}
+
+/*
+ * clauselist_selectivity_ext -
+ *	  Extended version of clauselist_selectivity().  If "use_extended_stats"
+ *	  is false, all extended statistics will be ignored, and only per-column
+ *	  statistics will be used.
+ */
+Selectivity
+clauselist_selectivity_ext(PlannerInfo *root,
+						   List *clauses,
+						   int varRelid,
+						   JoinType jointype,
+						   SpecialJoinInfo *sjinfo,
+						   bool use_extended_stats)
 {
 	Selectivity s1 = 1.0;
+	RelOptInfo *rel;
+	Bitmapset  *estimatedclauses = NULL;
 	RangeQueryClause *rqlist = NULL;
 	ListCell   *l;
 	int			listidx;
 
 	/*
-	 * If there's exactly one clause (and it was not estimated yet), just go
-	 * directly to clause_selectivity(). None of what we might do below is
-	 * relevant.
+	 * If there's exactly one clause, just go directly to
+	 * clause_selectivity_ext(). None of what we might do below is relevant.
 	 */
-	if (list_length(clauses) == 1 && bms_is_empty(estimatedclauses))
-		return clause_selectivity(root, (Node *) linitial(clauses),
-								  varRelid, jointype, sjinfo);
+	if (list_length(clauses) == 1)
+		return clause_selectivity_ext(root, (Node *) linitial(clauses),
+									  varRelid, jointype, sjinfo,
+									  use_extended_stats);
+
+	/*
+	 * Determine if these clauses reference a single relation.  If so, and if
+	 * it has extended statistics, try to apply those.
+	 */
+	rel = find_single_rel_for_clauses(root, clauses);
+	if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
+	{
+		/*
+		 * Estimate as many clauses as possible using extended statistics.
+		 *
+		 * 'estimatedclauses' is populated with the 0-based list position
+		 * index of clauses estimated here, and that should be ignored below.
+		 */
+		s1 = statext_clauselist_selectivity(root, clauses, varRelid,
+											jointype, sjinfo, rel,
+											&estimatedclauses, false);
+	}
 
 	/*
+	 * Apply normal selectivity estimates for remaining clauses. We'll be
+	 * careful to skip any clauses which were already estimated above.
+	 *
 	 * Anything that doesn't look like a potential rangequery clause gets
 	 * multiplied into s1 and forgotten. Anything that does gets inserted into
 	 * an rqlist entry.
@@ -189,8 +181,9 @@ clauselist_selectivity_simple(PlannerInfo *root,
 		if (bms_is_member(listidx, estimatedclauses))
 			continue;
 
-		/* Always compute the selectivity using clause_selectivity */
-		s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo);
+		/* Compute the selectivity of this clause in isolation */
+		s2 = clause_selectivity_ext(root, clause, varRelid, jointype, sjinfo,
+									use_extended_stats);
 
 		/*
 		 * Check for being passed a RestrictInfo.
@@ -351,6 +344,83 @@ clauselist_selectivity_simple(PlannerInfo *root,
 }
 
 /*
+ * clauselist_selectivity_or -
+ *	  Compute the selectivity of an implicitly-ORed list of boolean
+ *	  expression clauses.  The list can be empty, in which case 0.0
+ *	  must be returned.  List elements may be either RestrictInfos
+ *	  or bare expression clauses --- the former is preferred since
+ *	  it allows caching of results.
+ *
+ * See clause_selectivity() for the meaning of the additional parameters.
+ *
+ * The basic approach is to apply extended statistics first, on as many
+ * clauses as possible, in order to capture cross-column dependencies etc.
+ * The remaining clauses are then estimated as if they were independent.
+ */
+static Selectivity
+clauselist_selectivity_or(PlannerInfo *root,
+						  List *clauses,
+						  int varRelid,
+						  JoinType jointype,
+						  SpecialJoinInfo *sjinfo,
+						  bool use_extended_stats)
+{
+	Selectivity s1 = 0.0;
+	RelOptInfo *rel;
+	Bitmapset  *estimatedclauses = NULL;
+	ListCell   *lc;
+	int			listidx;
+
+	/*
+	 * Determine if these clauses reference a single relation.  If so, and if
+	 * it has extended statistics, try to apply those.
+	 */
+	rel = find_single_rel_for_clauses(root, clauses);
+	if (use_extended_stats && rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
+	{
+		/*
+		 * Estimate as many clauses as possible using extended statistics.
+		 *
+		 * 'estimatedclauses' is populated with the 0-based list position
+		 * index of clauses estimated here, and that should be ignored below.
+		 */
+		s1 = statext_clauselist_selectivity(root, clauses, varRelid,
+											jointype, sjinfo, rel,
+											&estimatedclauses, true);
+	}
+
+	/*
+	 * Estimate the remaining clauses as if they were independent.
+	 *
+	 * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to account
+	 * for the probable overlap of selected tuple sets.
+	 *
+	 * XXX is this too conservative?
+	 */
+	listidx = -1;
+	foreach(lc, clauses)
+	{
+		Selectivity s2;
+
+		listidx++;
+
+		/*
+		 * Skip this clause if it's already been estimated by some other
+		 * statistics above.
+		 */
+		if (bms_is_member(listidx, estimatedclauses))
+			continue;
+
+		s2 = clause_selectivity_ext(root, (Node *) lfirst(lc), varRelid,
+									jointype, sjinfo, use_extended_stats);
+
+		s1 = s1 + s2 - s1 * s2;
+	}
+
+	return s1;
+}
+
+/*
  * addRangeClause --- add a new range clause for clauselist_selectivity
  *
  * Here is where we try to match up pairs of range-query clauses
@@ -602,6 +672,24 @@ clause_selectivity(PlannerInfo *root,
 				   JoinType jointype,
 				   SpecialJoinInfo *sjinfo)
 {
+	return clause_selectivity_ext(root, clause, varRelid,
+								  jointype, sjinfo, true);
+}
+
+/*
+ * clause_selectivity_ext -
+ *	  Extended version of clause_selectivity().  If "use_extended_stats" is
+ *	  false, all extended statistics will be ignored, and only per-column
+ *	  statistics will be used.
+ */
+Selectivity
+clause_selectivity_ext(PlannerInfo *root,
+					   Node *clause,
+					   int varRelid,
+					   JoinType jointype,
+					   SpecialJoinInfo *sjinfo,
+					   bool use_extended_stats)
+{
 	Selectivity s1 = 0.5;		/* default for any unhandled clause type */
 	RestrictInfo *rinfo = NULL;
 	bool		cacheable = false;
@@ -716,42 +804,35 @@ clause_selectivity(PlannerInfo *root,
 	else if (is_notclause(clause))
 	{
 		/* inverse of the selectivity of the underlying clause */
-		s1 = 1.0 - clause_selectivity(root,
-									  (Node *) get_notclausearg((Expr *) clause),
-									  varRelid,
-									  jointype,
-									  sjinfo);
+		s1 = 1.0 - clause_selectivity_ext(root,
+										  (Node *) get_notclausearg((Expr *) clause),
+										  varRelid,
+										  jointype,
+										  sjinfo,
+										  use_extended_stats);
 	}
 	else if (is_andclause(clause))
 	{
 		/* share code with clauselist_selectivity() */
-		s1 = clauselist_selectivity(root,
-									((BoolExpr *) clause)->args,
-									varRelid,
-									jointype,
-									sjinfo);
+		s1 = clauselist_selectivity_ext(root,
+										((BoolExpr *) clause)->args,
+										varRelid,
+										jointype,
+										sjinfo,
+										use_extended_stats);
 	}
 	else if (is_orclause(clause))
 	{
 		/*
-		 * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to
-		 * account for the probable overlap of selected tuple sets.
-		 *
-		 * XXX is this too conservative?
+		 * Almost the same thing as clauselist_selectivity, but with the
+		 * clauses connected by OR.
 		 */
-		ListCell   *arg;
-
-		s1 = 0.0;
-		foreach(arg, ((BoolExpr *) clause)->args)
-		{
-			Selectivity s2 = clause_selectivity(root,
-												(Node *) lfirst(arg),
-												varRelid,
-												jointype,
-												sjinfo);
-
-			s1 = s1 + s2 - s1 * s2;
-		}
+		s1 = clauselist_selectivity_or(root,
+									   ((BoolExpr *) clause)->args,
+									   varRelid,
+									   jointype,
+									   sjinfo,
+									   use_extended_stats);
 	}
 	else if (is_opclause(clause) || IsA(clause, DistinctExpr))
 	{
@@ -852,20 +933,22 @@ clause_selectivity(PlannerInfo *root,
 	else if (IsA(clause, RelabelType))
 	{
 		/* Not sure this case is needed, but it can't hurt */
-		s1 = clause_selectivity(root,
-								(Node *) ((RelabelType *) clause)->arg,
-								varRelid,
-								jointype,
-								sjinfo);
+		s1 = clause_selectivity_ext(root,
+									(Node *) ((RelabelType *) clause)->arg,
+									varRelid,
+									jointype,
+									sjinfo,
+									use_extended_stats);
 	}
 	else if (IsA(clause, CoerceToDomain))
 	{
 		/* Not sure this case is needed, but it can't hurt */
-		s1 = clause_selectivity(root,
-								(Node *) ((CoerceToDomain *) clause)->arg,
-								varRelid,
-								jointype,
-								sjinfo);
+		s1 = clause_selectivity_ext(root,
+									(Node *) ((CoerceToDomain *) clause)->arg,
+									varRelid,
+									jointype,
+									sjinfo,
+									use_extended_stats);
 	}
 	else
 	{
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index d950b4eabe0..b1abcde9687 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -1073,8 +1073,8 @@ clauselist_apply_dependencies(PlannerInfo *root, List *clauses,
 			}
 		}
 
-		simple_sel = clauselist_selectivity_simple(root, attr_clauses, varRelid,
-												   jointype, sjinfo, NULL);
+		simple_sel = clauselist_selectivity_ext(root, attr_clauses, varRelid,
+												jointype, sjinfo, false);
 		attr_sel[attidx++] = simple_sel;
 	}
 
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 36326927c6b..8d3cd091ada 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -1239,10 +1239,10 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
  * One of the main challenges with using MCV lists is how to extrapolate the
  * estimate to the data not covered by the MCV list. To do that, we compute
  * not only the "MCV selectivity" (selectivities for MCV items matching the
- * supplied clauses), but also a couple of derived selectivities:
+ * supplied clauses), but also the following related selectivities:
  *
- * - simple selectivity:  Computed without extended statistic, i.e. as if the
- * columns/clauses were independent
+ * - simple selectivity:  Computed without extended statistics, i.e. as if the
+ * columns/clauses were independent.
  *
  * - base selectivity:  Similar to simple selectivity, but is computed using
  * the extended statistic by adding up the base frequencies (that we compute
@@ -1250,30 +1250,9 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
  *
  * - total selectivity: Selectivity covered by the whole MCV list.
  *
- * - other selectivity: A selectivity estimate for data not covered by the MCV
- * list (i.e. satisfying the clauses, but not common enough to make it into
- * the MCV list)
- *
- * Note: While simple and base selectivities are defined in a quite similar
- * way, the values are computed differently and are not therefore equal. The
- * simple selectivity is computed as a product of per-clause estimates, while
- * the base selectivity is computed by adding up base frequencies of matching
- * items of the multi-column MCV list. So the values may differ for two main
- * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
- * the MCV items did not match the estimated clauses.
- *
- * As both (a) and (b) reduce the base selectivity value, it generally holds
- * that (simple_selectivity >= base_selectivity). If the MCV list covers all
- * the data, the values may be equal.
- *
- * So, (simple_selectivity - base_selectivity) is an estimate for the part
- * not covered by the MCV list, and (mcv_selectivity - base_selectivity) may
- * be seen as a correction for the part covered by the MCV list. Those two
- * statements are actually equivalent.
- *
- * Note: Due to rounding errors and minor differences in how the estimates
- * are computed, the inequality may not always hold. Which is why we clamp
- * the selectivities to prevent strange estimate (negative etc.).
+ * These are passed to mcv_combine_selectivities() which combines them to
+ * produce a selectivity estimate that makes use of both per-column statistics
+ * and the multi-column MCV statistics.
  *
  * 'estimatedclauses' is an input/output parameter.  We set bits for the
  * 0-based 'clauses' indexes we estimate for and also skip clause items that
@@ -1282,16 +1261,17 @@ statext_is_compatible_clause(PlannerInfo *root, Node *clause, Index relid,
 static Selectivity
 statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
 								   JoinType jointype, SpecialJoinInfo *sjinfo,
-								   RelOptInfo *rel, Bitmapset **estimatedclauses)
+								   RelOptInfo *rel, Bitmapset **estimatedclauses,
+								   bool is_or)
 {
 	ListCell   *l;
 	Bitmapset **list_attnums;
 	int			listidx;
-	Selectivity sel = 1.0;
+	Selectivity sel = (is_or) ? 0.0 : 1.0;
 
 	/* check if there's any stats that might be useful for us. */
 	if (!has_stats_of_kind(rel->statlist, STATS_EXT_MCV))
-		return 1.0;
+		return sel;
 
 	list_attnums = (Bitmapset **) palloc(sizeof(Bitmapset *) *
 										 list_length(clauses));
@@ -1327,12 +1307,7 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 	{
 		StatisticExtInfo *stat;
 		List	   *stat_clauses;
-		Selectivity simple_sel,
-					mcv_sel,
-					mcv_basesel,
-					mcv_totalsel,
-					other_sel,
-					stat_sel;
+		Bitmapset  *simple_clauses;
 
 		/* find the best suited statistics object for these attnums */
 		stat = choose_best_statistics(rel->statlist, STATS_EXT_MCV,
@@ -1351,6 +1326,9 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 		/* now filter the clauses to be estimated using the selected MCV */
 		stat_clauses = NIL;
 
+		/* record which clauses are simple (single column) */
+		simple_clauses = NULL;
+
 		listidx = 0;
 		foreach(l, clauses)
 		{
@@ -1361,6 +1339,10 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 			if (list_attnums[listidx] != NULL &&
 				bms_is_subset(list_attnums[listidx], stat->keys))
 			{
+				if (bms_membership(list_attnums[listidx]) == BMS_SINGLETON)
+					simple_clauses = bms_add_member(simple_clauses,
+													list_length(stat_clauses));
+
 				stat_clauses = lappend(stat_clauses, (Node *) lfirst(l));
 				*estimatedclauses = bms_add_member(*estimatedclauses, listidx);
 
@@ -1371,40 +1353,131 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 			listidx++;
 		}
 
-		/*
-		 * First compute "simple" selectivity, i.e. without the extended
-		 * statistics, and essentially assuming independence of the
-		 * columns/clauses. We'll then use the various selectivities computed
-		 * from MCV list to improve it.
-		 */
-		simple_sel = clauselist_selectivity_simple(root, stat_clauses, varRelid,
-												   jointype, sjinfo, NULL);
-
-		/*
-		 * Now compute the multi-column estimate from the MCV list, along with
-		 * the other selectivities (base & total selectivity).
-		 */
-		mcv_sel = mcv_clauselist_selectivity(root, stat, stat_clauses, varRelid,
-											 jointype, sjinfo, rel,
-											 &mcv_basesel, &mcv_totalsel);
+		if (is_or)
+		{
+			bool	   *or_matches = NULL;
+			Selectivity simple_or_sel = 0.0;
+			MCVList    *mcv_list;
 
-		/* Estimated selectivity of values not covered by MCV matches */
-		other_sel = simple_sel - mcv_basesel;
-		CLAMP_PROBABILITY(other_sel);
+			/* Load the MCV list stored in the statistics object */
+			mcv_list = statext_mcv_load(stat->statOid);
 
-		/* The non-MCV selectivity can't exceed the 1 - mcv_totalsel. */
-		if (other_sel > 1.0 - mcv_totalsel)
-			other_sel = 1.0 - mcv_totalsel;
+			/*
+			 * Compute the selectivity of the ORed list of clauses by
+			 * estimating each in turn and combining them using the formula
+			 * P(A OR B) = P(A) + P(B) - P(A AND B).  This allows us to use
+			 * the multivariate MCV stats to better estimate each term.
+			 *
+			 * Each time we iterate this formula, the clause "A" above is
+			 * equal to all the clauses processed so far, combined with "OR".
+			 */
+			listidx = 0;
+			foreach(l, stat_clauses)
+			{
+				Node	   *clause = (Node *) lfirst(l);
+				Selectivity simple_sel,
+							overlap_simple_sel,
+							mcv_sel,
+							mcv_basesel,
+							overlap_mcvsel,
+							overlap_basesel,
+							mcv_totalsel,
+							clause_sel,
+							overlap_sel;
+
+				/*
+				 * "Simple" selectivity of the next clause and its overlap
+				 * with any of the previous clauses.  These are our initial
+				 * estimates of P(B) and P(A AND B), assuming independence of
+				 * columns/clauses.
+				 */
+				simple_sel = clause_selectivity_ext(root, clause, varRelid,
+													jointype, sjinfo, false);
+
+				overlap_simple_sel = simple_or_sel * simple_sel;
+
+				/*
+				 * New "simple" selectivity of all clauses seen so far,
+				 * assuming independence.
+				 */
+				simple_or_sel += simple_sel - overlap_simple_sel;
+				CLAMP_PROBABILITY(simple_or_sel);
+
+				/*
+				 * Multi-column estimate of this clause using MCV statistics,
+				 * along with base and total selectivities, and corresponding
+				 * selectivities for the overlap term P(A AND B).
+				 */
+				mcv_sel = mcv_clause_selectivity_or(root, stat, mcv_list,
+													clause, &or_matches,
+													&mcv_basesel,
+													&overlap_mcvsel,
+													&overlap_basesel,
+													&mcv_totalsel);
+
+				/*
+				 * Combine the simple and multi-column estimates.
+				 *
+				 * If this clause is a simple single-column clause, then we
+				 * just use the simple selectivity estimate for it, since the
+				 * multi-column statistics are unlikely to improve on that
+				 * (and in fact could make it worse).  For the overlap, we
+				 * always make use of the multi-column statistics.
+				 */
+				if (bms_is_member(listidx, simple_clauses))
+					clause_sel = simple_sel;
+				else
+					clause_sel = mcv_combine_selectivities(simple_sel,
+														   mcv_sel,
+														   mcv_basesel,
+														   mcv_totalsel);
+
+				overlap_sel = mcv_combine_selectivities(overlap_simple_sel,
+														overlap_mcvsel,
+														overlap_basesel,
+														mcv_totalsel);
+
+				/* Factor these into the overall result */
+				sel += clause_sel - overlap_sel;
+				CLAMP_PROBABILITY(sel);
+
+				listidx++;
+			}
+		}
+		else					/* Implicitly-ANDed list of clauses */
+		{
+			Selectivity simple_sel,
+						mcv_sel,
+						mcv_basesel,
+						mcv_totalsel,
+						stat_sel;
 
-		/*
-		 * Overall selectivity is the combination of MCV and non-MCV
-		 * estimates.
-		 */
-		stat_sel = mcv_sel + other_sel;
-		CLAMP_PROBABILITY(stat_sel);
+			/*
+			 * "Simple" selectivity, i.e. without any extended statistics,
+			 * essentially assuming independence of the columns/clauses.
+			 */
+			simple_sel = clauselist_selectivity_ext(root, stat_clauses,
+													varRelid, jointype,
+													sjinfo, false);
 
-		/* Factor the estimate from this MCV to the overall estimate. */
-		sel *= stat_sel;
+			/*
+			 * Multi-column estimate using MCV statistics, along with base and
+			 * total selectivities.
+			 */
+			mcv_sel = mcv_clauselist_selectivity(root, stat, stat_clauses,
+												 varRelid, jointype, sjinfo,
+												 rel, &mcv_basesel,
+												 &mcv_totalsel);
+
+			/* Combine the simple and multi-column estimates. */
+			stat_sel = mcv_combine_selectivities(simple_sel,
+												 mcv_sel,
+												 mcv_basesel,
+												 mcv_totalsel);
+
+			/* Factor this into the overall result */
+			sel *= stat_sel;
+		}
 	}
 
 	return sel;
@@ -1417,13 +1490,21 @@ statext_mcv_clauselist_selectivity(PlannerInfo *root, List *clauses, int varReli
 Selectivity
 statext_clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid,
 							   JoinType jointype, SpecialJoinInfo *sjinfo,
-							   RelOptInfo *rel, Bitmapset **estimatedclauses)
+							   RelOptInfo *rel, Bitmapset **estimatedclauses,
+							   bool is_or)
 {
 	Selectivity sel;
 
 	/* First, try estimating clauses using a multivariate MCV list. */
 	sel = statext_mcv_clauselist_selectivity(root, clauses, varRelid, jointype,
-											 sjinfo, rel, estimatedclauses);
+											 sjinfo, rel, estimatedclauses, is_or);
+
+	/*
+	 * Functional dependencies only work for clauses connected by AND, so for
+	 * OR clauses we're done.
+	 */
+	if (is_or)
+		return sel;
 
 	/*
 	 * Then, apply functional dependencies on the remaining clauses by calling
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 6a262f15436..fae792a2ddf 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -32,6 +32,7 @@
 #include "utils/fmgroids.h"
 #include "utils/fmgrprotos.h"
 #include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
 #include "utils/syscache.h"
 #include "utils/typcache.h"
 
@@ -1889,15 +1890,79 @@ mcv_get_match_bitmap(PlannerInfo *root, List *clauses,
 
 
 /*
+ * mcv_combine_selectivities
+ * 		Combine per-column and multi-column MCV selectivity estimates.
+ *
+ * simple_sel is a "simple" selectivity estimate (produced without using any
+ * extended statistics, essentially assuming independence of columns/clauses).
+ *
+ * mcv_sel and mcv_basesel are sums of the frequencies and base frequencies of
+ * all matching MCV items.  The difference (mcv_sel - mcv_basesel) is then
+ * essentially interpreted as a correction to be added to simple_sel, as
+ * described below.
+ *
+ * mcv_totalsel is the sum of the frequencies of all MCV items (not just the
+ * matching ones).  This is used as an upper bound on the portion of the
+ * selectivity estimates not covered by the MCV statistics.
+ *
+ * Note: While simple and base selectivities are defined in a quite similar
+ * way, the values are computed differently and are not therefore equal. The
+ * simple selectivity is computed as a product of per-clause estimates, while
+ * the base selectivity is computed by adding up base frequencies of matching
+ * items of the multi-column MCV list. So the values may differ for two main
+ * reasons - (a) the MCV list may not cover 100% of the data and (b) some of
+ * the MCV items did not match the estimated clauses.
+ *
+ * As both (a) and (b) reduce the base selectivity value, it generally holds
+ * that (simple_sel >= mcv_basesel). If the MCV list covers all the data, the
+ * values may be equal.
+ *
+ * So, other_sel = (simple_sel - mcv_basesel) is an estimate for the part not
+ * covered by the MCV list, and (mcv_sel - mcv_basesel) may be seen as a
+ * correction for the part covered by the MCV list. Those two statements are
+ * actually equivalent.
+ */
+Selectivity
+mcv_combine_selectivities(Selectivity simple_sel,
+						  Selectivity mcv_sel,
+						  Selectivity mcv_basesel,
+						  Selectivity mcv_totalsel)
+{
+	Selectivity other_sel;
+	Selectivity sel;
+
+	/* estimated selectivity of values not covered by MCV matches */
+	other_sel = simple_sel - mcv_basesel;
+	CLAMP_PROBABILITY(other_sel);
+
+	/* this non-MCV selectivity cannot exceed 1 - mcv_totalsel */
+	if (other_sel > 1.0 - mcv_totalsel)
+		other_sel = 1.0 - mcv_totalsel;
+
+	/* overall selectivity is the sum of the MCV and non-MCV parts */
+	sel = mcv_sel + other_sel;
+	CLAMP_PROBABILITY(sel);
+
+	return sel;
+}
+
+
+/*
  * mcv_clauselist_selectivity
- *		Return the selectivity estimate computed using an MCV list.
+ *		Use MCV statistics to estimate the selectivity of an implicitly-ANDed
+ *		list of clauses.
  *
- * First builds a bitmap of MCV items matching the clauses, and then sums
- * the frequencies of matching items.
+ * This determines which MCV items match every clause in the list and returns
+ * the sum of the frequencies of those items.
  *
- * It also produces two additional interesting selectivities - total
- * selectivity of all the MCV items (not just the matching ones), and the
- * base frequency computed on the assumption of independence.
+ * In addition, it returns the sum of the base frequencies of each of those
+ * items (that is the sum of the selectivities that each item would have if
+ * the columns were independent of one another), and the total selectivity of
+ * all the MCV items (not just the matching ones).  These are expected to be
+ * used together with a "simple" selectivity estimate (one based only on
+ * per-column statistics) to produce an overall selectivity estimate that
+ * makes use of both per-column and multi-column statistics --- see
+ * mcv_combine_selectivities().
  */
 Selectivity
 mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
@@ -1928,7 +1993,6 @@ mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
 
 		if (matches[i] != false)
 		{
-			/* XXX Shouldn't the basesel be outside the if condition? */
 			*basesel += mcv->items[i].base_frequency;
 			s += mcv->items[i].frequency;
 		}
@@ -1936,3 +2000,94 @@ mcv_clauselist_selectivity(PlannerInfo *root, StatisticExtInfo *stat,
 
 	return s;
 }
+
+
+/*
+ * mcv_clause_selectivity_or
+ *		Use MCV statistics to estimate the selectivity of a clause that
+ *		appears in an ORed list of clauses.
+ *
+ * As with mcv_clauselist_selectivity() this determines which MCV items match
+ * the clause and returns both the sum of the frequencies and the sum of the
+ * base frequencies of those items, as well as the sum of the frequencies of
+ * all MCV items (not just the matching ones) so that this information can be
+ * used by mcv_combine_selectivities() to produce a selectivity estimate that
+ * makes use of both per-column and multi-column statistics.
+ *
+ * Additionally, we return information to help compute the overall selectivity
+ * of the ORed list of clauses assumed to contain this clause.  This function
+ * is intended to be called for each clause in the ORed list of clauses,
+ * allowing the overall selectivity to be computed using the following
+ * algorithm:
+ *
+ * Suppose P[n] = P(C[1] OR C[2] OR ... OR C[n]) is the combined selectivity
+ * of the first n clauses in the list.  Then the combined selectivity taking
+ * into account the next clause C[n+1] can be written as
+ *
+ *		P[n+1] = P[n] + P(C[n+1]) - P((C[1] OR ... OR C[n]) AND C[n+1])
+ *
+ * The final term above represents the overlap between the clauses examined so
+ * far and the (n+1)'th clause.  To estimate its selectivity, we track the
+ * match bitmap for the ORed list of clauses examined so far and examine its
+ * intersection with the match bitmap for the (n+1)'th clause.
+ *
+ * We then also return the sums of the MCV item frequencies and base
+ * frequencies for the match bitmap intersection corresponding to the overlap
+ * term above, so that they can be combined with a simple selectivity estimate
+ * for that term.
+ *
+ * The parameter "or_matches" is an in/out parameter tracking the match bitmap
+ * for the clauses examined so far.  The caller is expected to set it to NULL
+ * the first time it calls this function.
+ */
+Selectivity
+mcv_clause_selectivity_or(PlannerInfo *root, StatisticExtInfo *stat,
+						  MCVList *mcv, Node *clause, bool **or_matches,
+						  Selectivity *basesel, Selectivity *overlap_mcvsel,
+						  Selectivity *overlap_basesel, Selectivity *totalsel)
+{
+	Selectivity s = 0.0;
+	bool	   *new_matches;
+	int			i;
+
+	/* build the OR-matches bitmap, if not built already */
+	if (*or_matches == NULL)
+		*or_matches = palloc0(sizeof(bool) * mcv->nitems);
+
+	/* build the match bitmap for the new clause */
+	new_matches = mcv_get_match_bitmap(root, list_make1(clause), stat->keys,
+									   mcv, false);
+
+	/*
+	 * Sum the frequencies for all the MCV items matching this clause and also
+	 * those matching the overlap between this clause and any of the preceding
+	 * clauses as described above.
+	 */
+	*basesel = 0.0;
+	*overlap_mcvsel = 0.0;
+	*overlap_basesel = 0.0;
+	*totalsel = 0.0;
+	for (i = 0; i < mcv->nitems; i++)
+	{
+		*totalsel += mcv->items[i].frequency;
+
+		if (new_matches[i])
+		{
+			s += mcv->items[i].frequency;
+			*basesel += mcv->items[i].base_frequency;
+
+			if ((*or_matches)[i])
+			{
+				*overlap_mcvsel += mcv->items[i].frequency;
+				*overlap_basesel += mcv->items[i].base_frequency;
+			}
+		}
+
+		/* update the OR-matches bitmap for the next clause */
+		(*or_matches)[i] = (*or_matches)[i] || new_matches[i];
+	}
+
+	pfree(new_matches);
+
+	return s;
+}
diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h
index 3e4171056e8..dea0e7338d5 100644
--- a/src/include/optimizer/optimizer.h
+++ b/src/include/optimizer/optimizer.h
@@ -58,17 +58,23 @@ extern Selectivity clause_selectivity(PlannerInfo *root,
 									  int varRelid,
 									  JoinType jointype,
 									  SpecialJoinInfo *sjinfo);
-extern Selectivity clauselist_selectivity_simple(PlannerInfo *root,
-												 List *clauses,
-												 int varRelid,
-												 JoinType jointype,
-												 SpecialJoinInfo *sjinfo,
-												 Bitmapset *estimatedclauses);
+extern Selectivity clause_selectivity_ext(PlannerInfo *root,
+										  Node *clause,
+										  int varRelid,
+										  JoinType jointype,
+										  SpecialJoinInfo *sjinfo,
+										  bool use_extended_stats);
 extern Selectivity clauselist_selectivity(PlannerInfo *root,
 										  List *clauses,
 										  int varRelid,
 										  JoinType jointype,
 										  SpecialJoinInfo *sjinfo);
+extern Selectivity clauselist_selectivity_ext(PlannerInfo *root,
+											  List *clauses,
+											  int varRelid,
+											  JoinType jointype,
+											  SpecialJoinInfo *sjinfo,
+											  bool use_extended_stats);
 
 /* in path/costsize.c: */
 
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
index 61e69696cfe..02bf6a05027 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -99,6 +99,11 @@ extern SortItem *build_sorted_items(int numrows, int *nitems, HeapTuple *rows,
 extern bool examine_clause_args(List *args, Var **varp,
 								Const **cstp, bool *varonleftp);
 
+extern Selectivity mcv_combine_selectivities(Selectivity simple_sel,
+											 Selectivity mcv_sel,
+											 Selectivity mcv_basesel,
+											 Selectivity mcv_totalsel);
+
 extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root,
 											  StatisticExtInfo *stat,
 											  List *clauses,
@@ -109,4 +114,14 @@ extern Selectivity mcv_clauselist_selectivity(PlannerInfo *root,
 											  Selectivity *basesel,
 											  Selectivity *totalsel);
 
+extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root,
+											 StatisticExtInfo *stat,
+											 MCVList *mcv,
+											 Node *clause,
+											 bool **or_matches,
+											 Selectivity *basesel,
+											 Selectivity *overlap_mcvsel,
+											 Selectivity *overlap_basesel,
+											 Selectivity *totalsel);
+
 #endif							/* EXTENDED_STATS_INTERNAL_H */
diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h
index 50fce4935f3..c9ed21155cd 100644
--- a/src/include/statistics/statistics.h
+++ b/src/include/statistics/statistics.h
@@ -116,7 +116,8 @@ extern Selectivity statext_clauselist_selectivity(PlannerInfo *root,
 												  JoinType jointype,
 												  SpecialJoinInfo *sjinfo,
 												  RelOptInfo *rel,
-												  Bitmapset **estimatedclauses);
+												  Bitmapset **estimatedclauses,
+												  bool is_or);
 extern bool has_stats_of_kind(List *stats, char requiredkind);
 extern StatisticExtInfo *choose_best_statistics(List *stats, char requiredkind,
 												Bitmapset **clause_attnums,
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 4c3edd213fb..dbbe9844b2e 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -1113,6 +1113,12 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = '
        200 |    200
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
+ estimated | actual 
+-----------+--------
+       200 |    200
+(1 row)
+
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
  estimated | actual 
 -----------+--------
@@ -1173,13 +1179,6 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY
        100 |    100
 (1 row)
 
--- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
-SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
- estimated | actual 
------------+--------
-       343 |    200
-(1 row)
-
 -- check change of unrelated column type does not reset the MCV statistics
 ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
 SELECT d.stxdmcv IS NOT NULL
@@ -1477,6 +1476,134 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND
          1 |      0
 (1 row)
 
+-- mcv covering just a small fraction of data
+CREATE TABLE mcv_lists_partial (
+    a INT,
+    b INT,
+    c INT
+);
+-- 10 frequent groups, each with 100 elements
+INSERT INTO mcv_lists_partial (a, b, c)
+     SELECT
+         mod(i,10),
+         mod(i,10),
+         mod(i,10)
+     FROM generate_series(0,999) s(i);
+-- 100 groups that will make it to the MCV list (includes the 10 frequent ones)
+INSERT INTO mcv_lists_partial (a, b, c)
+     SELECT
+         i,
+         i,
+         i
+     FROM generate_series(0,99) s(i);
+-- 4000 groups in total, most of which won't make it (just a single item)
+INSERT INTO mcv_lists_partial (a, b, c)
+     SELECT
+         i,
+         i,
+         i
+     FROM generate_series(0,3999) s(i);
+ANALYZE mcv_lists_partial;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 AND b = 0 AND c = 0');
+ estimated | actual 
+-----------+--------
+         1 |    102
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 0');
+ estimated | actual 
+-----------+--------
+       300 |    102
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 AND b = 10 AND c = 10');
+ estimated | actual 
+-----------+--------
+         1 |      2
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 OR b = 10 OR c = 10');
+ estimated | actual 
+-----------+--------
+         6 |      2
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 AND b = 0 AND c = 10');
+ estimated | actual 
+-----------+--------
+         1 |      0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 10');
+ estimated | actual 
+-----------+--------
+       204 |    104
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0 AND c = 0) OR (a = 1 AND b = 1 AND c = 1) OR (a = 2 AND b = 2 AND c = 2)');
+ estimated | actual 
+-----------+--------
+         1 |    306
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0) OR (a = 0 AND c = 0) OR (b = 0 AND c = 0)');
+ estimated | actual 
+-----------+--------
+         6 |    102
+(1 row)
+
+CREATE STATISTICS mcv_lists_partial_stats (mcv) ON a, b, c
+  FROM mcv_lists_partial;
+ANALYZE mcv_lists_partial;
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 AND b = 0 AND c = 0');
+ estimated | actual 
+-----------+--------
+       102 |    102
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 0');
+ estimated | actual 
+-----------+--------
+        96 |    102
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 AND b = 10 AND c = 10');
+ estimated | actual 
+-----------+--------
+         2 |      2
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 OR b = 10 OR c = 10');
+ estimated | actual 
+-----------+--------
+         2 |      2
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 AND b = 0 AND c = 10');
+ estimated | actual 
+-----------+--------
+         1 |      0
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 10');
+ estimated | actual 
+-----------+--------
+       102 |    104
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0 AND c = 0) OR (a = 1 AND b = 1 AND c = 1) OR (a = 2 AND b = 2 AND c = 2)');
+ estimated | actual 
+-----------+--------
+       300 |    306
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0) OR (a = 0 AND c = 0) OR (b = 0 AND c = 0)');
+ estimated | actual 
+-----------+--------
+       306 |    102
+(1 row)
+
+DROP TABLE mcv_lists_partial;
 -- check the ability to use multiple MCV lists
 CREATE TABLE mcv_lists_multi (
 	a INTEGER,
@@ -1506,12 +1633,36 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE c = 0 AN
        102 |    714
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 AND c = 0');
+ estimated | actual 
+-----------+--------
+       143 |    142
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 OR c = 0');
+ estimated | actual 
+-----------+--------
+      1571 |   1572
+(1 row)
+
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
  estimated | actual 
 -----------+--------
          4 |    142
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 AND b = 0) OR (c = 0 AND d = 0)');
+ estimated | actual 
+-----------+--------
+       298 |   1572
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0');
+ estimated | actual 
+-----------+--------
+      2649 |   1572
+(1 row)
+
 -- create separate MCV statistics
 CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi;
 CREATE STATISTICS mcv_lists_multi_2 (mcv) ON c, d FROM mcv_lists_multi;
@@ -1528,12 +1679,36 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE c = 0 AN
        714 |    714
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 AND c = 0');
+ estimated | actual 
+-----------+--------
+       143 |    142
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 OR c = 0');
+ estimated | actual 
+-----------+--------
+      1571 |   1572
+(1 row)
+
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
  estimated | actual 
 -----------+--------
        143 |    142
 (1 row)
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 AND b = 0) OR (c = 0 AND d = 0)');
+ estimated | actual 
+-----------+--------
+      1571 |   1572
+(1 row)
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0');
+ estimated | actual 
+-----------+--------
+      1714 |   1572
+(1 row)
+
 DROP TABLE mcv_lists_multi;
 -- Permission tests. Users should not be able to see specific data values in
 -- the extended statistics, if they lack permission to see those values in
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 9781e590a30..7912e733ae6 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -561,6 +561,8 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE 4 >= a AND ''0
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1');
 
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
+
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52) AND b IN ( ''1'', ''2'')');
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a IN (1, 2, 51, 52, NULL) AND b IN ( ''1'', ''2'', NULL)');
@@ -581,9 +583,6 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a < ALL (ARRAY[4, 5]) AND b IN (''1'', ''2'', NULL, ''3'') AND c > ANY (ARRAY[1, 2, NULL, 3])');
 
--- we can't use the statistic for OR clauses that are not fully covered (missing 'd' attribute)
-SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists WHERE a = 1 OR b = ''1'' OR c = 1 OR d IS NOT NULL');
-
 -- check change of unrelated column type does not reset the MCV statistics
 ALTER TABLE mcv_lists ALTER COLUMN d TYPE VARCHAR(64);
 
@@ -777,6 +776,78 @@ SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_bool WHERE NOT a AND b AND NOT c');
 
+-- mcv covering just a small fraction of data
+CREATE TABLE mcv_lists_partial (
+    a INT,
+    b INT,
+    c INT
+);
+
+-- 10 frequent groups, each with 100 elements
+INSERT INTO mcv_lists_partial (a, b, c)
+     SELECT
+         mod(i,10),
+         mod(i,10),
+         mod(i,10)
+     FROM generate_series(0,999) s(i);
+
+-- 100 groups that will make it to the MCV list (includes the 10 frequent ones)
+INSERT INTO mcv_lists_partial (a, b, c)
+     SELECT
+         i,
+         i,
+         i
+     FROM generate_series(0,99) s(i);
+
+-- 4000 groups in total, most of which won't make it (just a single item)
+INSERT INTO mcv_lists_partial (a, b, c)
+     SELECT
+         i,
+         i,
+         i
+     FROM generate_series(0,3999) s(i);
+
+ANALYZE mcv_lists_partial;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 AND b = 0 AND c = 0');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 0');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 AND b = 10 AND c = 10');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 OR b = 10 OR c = 10');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 AND b = 0 AND c = 10');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 10');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0 AND c = 0) OR (a = 1 AND b = 1 AND c = 1) OR (a = 2 AND b = 2 AND c = 2)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0) OR (a = 0 AND c = 0) OR (b = 0 AND c = 0)');
+
+CREATE STATISTICS mcv_lists_partial_stats (mcv) ON a, b, c
+  FROM mcv_lists_partial;
+
+ANALYZE mcv_lists_partial;
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 AND b = 0 AND c = 0');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 0');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 AND b = 10 AND c = 10');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 10 OR b = 10 OR c = 10');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 AND b = 0 AND c = 10');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE a = 0 OR b = 0 OR c = 10');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0 AND c = 0) OR (a = 1 AND b = 1 AND c = 1) OR (a = 2 AND b = 2 AND c = 2)');
+
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_partial WHERE (a = 0 AND b = 0) OR (a = 0 AND c = 0) OR (b = 0 AND c = 0)');
+
+DROP TABLE mcv_lists_partial;
+
 -- check the ability to use multiple MCV lists
 CREATE TABLE mcv_lists_multi (
 	a INTEGER,
@@ -799,7 +870,11 @@ ANALYZE mcv_lists_multi;
 -- estimates without any mcv statistics
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0');
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE c = 0 AND d = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 AND c = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 OR c = 0');
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 AND b = 0) OR (c = 0 AND d = 0)');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0');
 
 -- create separate MCV statistics
 CREATE STATISTICS mcv_lists_multi_1 (mcv) ON a, b FROM mcv_lists_multi;
@@ -809,7 +884,11 @@ ANALYZE mcv_lists_multi;
 
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0');
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE c = 0 AND d = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 AND c = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE b = 0 OR c = 0');
 SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 AND b = 0 AND c = 0 AND d = 0');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE (a = 0 AND b = 0) OR (c = 0 AND d = 0)');
+SELECT * FROM check_estimated_rows('SELECT * FROM mcv_lists_multi WHERE a = 0 OR b = 0 OR c = 0 OR d = 0');
 
 DROP TABLE mcv_lists_multi;