7 files changed, 117 insertions, 55 deletions
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 15669c82386..146ee8dd1ea 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1852,9 +1852,12 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
 
 /*
  * Make a SortGroupClause node for a SetOperationStmt's groupClauses
+ *
+ * If require_hash is true, the caller is indicating that they need hash
+ * support or they will fail.  So look extra hard for hash support.
  */
 SortGroupClause *
-makeSortGroupClauseForSetOp(Oid rescoltype)
+makeSortGroupClauseForSetOp(Oid rescoltype, bool require_hash)
 {
 	SortGroupClause *grpcl = makeNode(SortGroupClause);
 	Oid			sortop;
@@ -1867,6 +1870,15 @@ makeSortGroupClauseForSetOp(Oid rescoltype)
 							 &sortop, &eqop, NULL,
 							 &hashable);
 
+	/*
+	 * The type cache doesn't believe that record is hashable (see
+	 * cache_record_field_properties()), but if the caller really needs hash
+	 * support, we can assume it does.  Worst case, if any components of the
+	 * record don't support hashing, we will fail at execution.
+	 */
+	if (require_hash && (rescoltype == RECORDOID || rescoltype == RECORDARRAYOID))
+		hashable = true;
+
 	/* we don't have a tlist yet, so can't assign sortgrouprefs */
 	grpcl->tleSortGroupRef = 0;
 	grpcl->eqop = eqop;
@@ -2027,6 +2039,8 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
 		ListCell   *ltl;
 		ListCell   *rtl;
 		const char *context;
+		bool recursive = (pstate->p_parent_cte &&
+						  pstate->p_parent_cte->cterecursive);
 
 		context = (stmt->op == SETOP_UNION ? "UNION" :
 				   (stmt->op == SETOP_INTERSECT ? "INTERSECT" :
@@ -2048,9 +2062,7 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
 		 * containing CTE as having those result columns.  We should do this
 		 * only at the topmost setop of the CTE, of course.
 		 */
-		if (isTopLevel &&
-			pstate->p_parent_cte &&
-			pstate->p_parent_cte->cterecursive)
+		if (isTopLevel && recursive)
 			determineRecursiveColTypes(pstate, op->larg, ltargetlist);
 
 		/*
@@ -2182,8 +2194,9 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
 				setup_parser_errposition_callback(&pcbstate, pstate,
 												  bestlocation);
 
+				/* If it's a recursive union, we need to require hashing support. */
 				op->groupClauses = lappend(op->groupClauses,
-										   makeSortGroupClauseForSetOp(rescoltype));
+										   makeSortGroupClauseForSetOp(rescoltype, recursive));
 
 				cancel_parser_errposition_callback(&pcbstate);
 			}
diff --git a/src/backend/rewrite/rewriteSearchCycle.c b/src/backend/rewrite/rewriteSearchCycle.c
index c50ebdba247..ef38f4025a5 100644
--- a/src/backend/rewrite/rewriteSearchCycle.c
+++ b/src/backend/rewrite/rewriteSearchCycle.c
@@ -594,7 +594,7 @@ rewriteSearchAndCycle(CommonTableExpr *cte)
 		sos->colCollations = lappend_oid(sos->colCollations, InvalidOid);
 		if (!sos->all)
 			sos->groupClauses = lappend(sos->groupClauses,
-										makeSortGroupClauseForSetOp(search_seq_type));
+										makeSortGroupClauseForSetOp(search_seq_type, true));
 	}
 	if (cte->cycle_clause)
 	{
@@ -603,14 +603,14 @@ rewriteSearchAndCycle(CommonTableExpr *cte)
 		sos->colCollations = lappend_oid(sos->colCollations, cte->cycle_clause->cycle_mark_collation);
 		if (!sos->all)
 			sos->groupClauses = lappend(sos->groupClauses,
-										makeSortGroupClauseForSetOp(cte->cycle_clause->cycle_mark_type));
+										makeSortGroupClauseForSetOp(cte->cycle_clause->cycle_mark_type, true));
 
 		sos->colTypes = lappend_oid(sos->colTypes, RECORDARRAYOID);
 		sos->colTypmods = lappend_int(sos->colTypmods, -1);
 		sos->colCollations = lappend_oid(sos->colCollations, InvalidOid);
 		if (!sos->all)
 			sos->groupClauses = lappend(sos->groupClauses,
-										makeSortGroupClauseForSetOp(RECORDARRAYOID));
+										makeSortGroupClauseForSetOp(RECORDARRAYOID, true));
 	}
 
 	/*
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index 04d487c5442..bc2f30bae15 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -29,6 +29,7 @@
 #include "utils/arrayaccess.h"
 #include "utils/builtins.h"
 #include "utils/datum.h"
+#include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/selfuncs.h"
@@ -3973,13 +3974,46 @@ hash_array(PG_FUNCTION_ARGS)
 	{
 		typentry = lookup_type_cache(element_type,
 									 TYPECACHE_HASH_PROC_FINFO);
-		if (!OidIsValid(typentry->hash_proc_finfo.fn_oid))
+		if (!OidIsValid(typentry->hash_proc_finfo.fn_oid) && element_type != RECORDOID)
 			ereport(ERROR,
 					(errcode(ERRCODE_UNDEFINED_FUNCTION),
 					 errmsg("could not identify a hash function for type %s",
 							format_type_be(element_type))));
+
+		/*
+		 * The type cache doesn't believe that record is hashable (see
+		 * cache_record_field_properties()), but since we're here, we're
+		 * committed to hashing, so we can assume it does.  Worst case, if any
+		 * components of the record don't support hashing, we will fail at
+		 * execution.
+		 */
+		if (element_type == RECORDOID)
+		{
+			MemoryContext oldcontext;
+			TypeCacheEntry *record_typentry;
+
+			oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
+
+			/*
+			 * Make fake type cache entry structure.  Note that we can't just
+			 * modify typentry, since that points directly into the type cache.
+			 */
+			record_typentry = palloc(sizeof(*record_typentry));
+
+			/* fill in what we need below */
+			record_typentry->typlen = typentry->typlen;
+			record_typentry->typbyval = typentry->typbyval;
+			record_typentry->typalign = typentry->typalign;
+			fmgr_info(F_HASH_RECORD, &record_typentry->hash_proc_finfo);
+
+			MemoryContextSwitchTo(oldcontext);
+
+			typentry = record_typentry;
+		}
+
 		fcinfo->flinfo->fn_extra = (void *) typentry;
 	}
+
 	typlen = typentry->typlen;
 	typbyval = typentry->typbyval;
 	typalign = typentry->typalign;
diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c
index 326fae62e22..70e5c51297d 100644
--- a/src/backend/utils/cache/typcache.c
+++ b/src/backend/utils/cache/typcache.c
@@ -1515,14 +1515,17 @@ cache_record_field_properties(TypeCacheEntry *typentry)
 	/*
 	 * For type RECORD, we can't really tell what will work, since we don't
 	 * have access here to the specific anonymous type.  Just assume that
-	 * everything will (we may get a failure at runtime ...)
+	 * equality and comparison will (we may get a failure at runtime).  We
+	 * could also claim that hashing works, but then if code that has the
+	 * option between a comparison-based (sort-based) and a hash-based plan
+	 * chooses hashing, stuff could fail that would otherwise work if it chose
+	 * a comparison-based plan.  In practice more types support comparison
+	 * than hashing.
 	 */
 	if (typentry->type_id == RECORDOID)
 	{
 		typentry->flags |= (TCFLAGS_HAVE_FIELD_EQUALITY |
-							TCFLAGS_HAVE_FIELD_COMPARE |
-							TCFLAGS_HAVE_FIELD_HASHING |
-							TCFLAGS_HAVE_FIELD_EXTENDED_HASHING);
+							TCFLAGS_HAVE_FIELD_COMPARE);
 	}
 	else if (typentry->typtype == TYPTYPE_COMPOSITE)
 	{
diff --git a/src/include/parser/analyze.h b/src/include/parser/analyze.h
index 6716db6c132..a0f0bd38d7d 100644
--- a/src/include/parser/analyze.h
+++ b/src/include/parser/analyze.h
@@ -48,6 +48,6 @@ extern void applyLockingClause(Query *qry, Index rtindex,
 extern List *BuildOnConflictExcludedTargetlist(Relation targetrel,
 											   Index exclRelIndex);
 
-extern SortGroupClause *makeSortGroupClauseForSetOp(Oid rescoltype);
+extern SortGroupClause *makeSortGroupClauseForSetOp(Oid rescoltype, bool require_hash);
 
 #endif							/* ANALYZE_H */
diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out
index 75f78db8f58..dece7310cfe 100644
--- a/src/test/regress/expected/union.out
+++ b/src/test/regress/expected/union.out
@@ -648,34 +648,37 @@ reset enable_hashagg;
 set enable_hashagg to on;
 explain (costs off)
 select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
-               QUERY PLAN                
------------------------------------------
- HashAggregate
-   Group Key: "*VALUES*".column1
-   ->  Append
-         ->  Values Scan on "*VALUES*"
-         ->  Values Scan on "*VALUES*_1"
-(5 rows)
+                  QUERY PLAN                   
+-----------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: "*VALUES*".column1
+         ->  Append
+               ->  Values Scan on "*VALUES*"
+               ->  Values Scan on "*VALUES*_1"
+(6 rows)
 
 select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
    x   
 -------
- (1,4)
- (1,3)
  (1,2)
+ (1,3)
+ (1,4)
 (3 rows)
 
 explain (costs off)
 select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
-                  QUERY PLAN                   
------------------------------------------------
- HashSetOp Intersect
-   ->  Append
-         ->  Subquery Scan on "*SELECT* 1"
-               ->  Values Scan on "*VALUES*"
-         ->  Subquery Scan on "*SELECT* 2"
-               ->  Values Scan on "*VALUES*_1"
-(6 rows)
+                     QUERY PLAN                      
+-----------------------------------------------------
+ SetOp Intersect
+   ->  Sort
+         Sort Key: "*SELECT* 1".x
+         ->  Append
+               ->  Subquery Scan on "*SELECT* 1"
+                     ->  Values Scan on "*VALUES*"
+               ->  Subquery Scan on "*SELECT* 2"
+                     ->  Values Scan on "*VALUES*_1"
+(8 rows)
 
 select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
    x   
@@ -685,15 +688,17 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (va
 
 explain (costs off)
 select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
-                  QUERY PLAN                   
------------------------------------------------
- HashSetOp Except
-   ->  Append
-         ->  Subquery Scan on "*SELECT* 1"
-               ->  Values Scan on "*VALUES*"
-         ->  Subquery Scan on "*SELECT* 2"
-               ->  Values Scan on "*VALUES*_1"
-(6 rows)
+                     QUERY PLAN                      
+-----------------------------------------------------
+ SetOp Except
+   ->  Sort
+         Sort Key: "*SELECT* 1".x
+         ->  Append
+               ->  Subquery Scan on "*SELECT* 1"
+                     ->  Values Scan on "*VALUES*"
+               ->  Subquery Scan on "*SELECT* 2"
+                     ->  Values Scan on "*VALUES*_1"
+(8 rows)
 
 select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
    x   
@@ -702,21 +707,28 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (value
 (1 row)
 
 -- non-hashable type
--- With an anonymous row type, the typcache reports that the type is
--- hashable, but then it will fail at run time.
+-- With an anonymous row type, the typcache does not report that the
+-- type is hashable.  (Otherwise, this would fail at execution time.)
 explain (costs off)
 select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
-               QUERY PLAN                
------------------------------------------
- HashAggregate
-   Group Key: "*VALUES*".column1
-   ->  Append
-         ->  Values Scan on "*VALUES*"
-         ->  Values Scan on "*VALUES*_1"
-(5 rows)
+                  QUERY PLAN                   
+-----------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: "*VALUES*".column1
+         ->  Append
+               ->  Values Scan on "*VALUES*"
+               ->  Values Scan on "*VALUES*_1"
+(6 rows)
 
 select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
-ERROR:  could not identify a hash function for type money
+     x     
+-----------
+ ($100.00)
+ ($200.00)
+ ($300.00)
+(3 rows)
+
 -- With a defined row type, the typcache can inspect the type's fields
 -- for hashability.
 create type ct1 as (f1 money);
diff --git a/src/test/regress/sql/union.sql b/src/test/regress/sql/union.sql
index ce22f34c719..ca8c9b4d128 100644
--- a/src/test/regress/sql/union.sql
+++ b/src/test/regress/sql/union.sql
@@ -218,8 +218,8 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (value
 
 -- non-hashable type
 
--- With an anonymous row type, the typcache reports that the type is
--- hashable, but then it will fail at run time.
+-- With an anonymous row type, the typcache does not report that the
+-- type is hashable.  (Otherwise, this would fail at execution time.)
 explain (costs off)
 select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
 select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);