16 files changed, 144 insertions, 64 deletions
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 321f427e478..90d04f9228a 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -165,13 +165,14 @@ BuildTupleHashTableExt(PlanState *parent,
 {
 	TupleHashTable hashtable;
 	Size		entrysize = sizeof(TupleHashEntryData) + additionalsize;
+	int			hash_mem = get_hash_mem();
 	MemoryContext oldcontext;
 	bool		allow_jit;
 
 	Assert(nbuckets > 0);
 
-	/* Limit initial table size request to not more than work_mem */
-	nbuckets = Min(nbuckets, (long) ((work_mem * 1024L) / entrysize));
+	/* Limit initial table size request to not more than hash_mem */
+	nbuckets = Min(nbuckets, (long) ((hash_mem * 1024L) / entrysize));
 
 	oldcontext = MemoryContextSwitchTo(metacxt);
 
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index 02a9165c694..9776263ae75 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -203,7 +203,7 @@
  *	  entries (and initialize new transition states), we instead spill them to
  *	  disk to be processed later. The tuples are spilled in a partitioned
  *	  manner, so that subsequent batches are smaller and less likely to exceed
- *	  work_mem (if a batch does exceed work_mem, it must be spilled
+ *	  hash_mem (if a batch does exceed hash_mem, it must be spilled
  *	  recursively).
  *
  *	  Spilled data is written to logical tapes. These provide better control
@@ -212,7 +212,7 @@
  *
  *	  Note that it's possible for transition states to start small but then
  *	  grow very large; for instance in the case of ARRAY_AGG. In such cases,
- *	  it's still possible to significantly exceed work_mem. We try to avoid
+ *	  it's still possible to significantly exceed hash_mem. We try to avoid
  *	  this situation by estimating what will fit in the available memory, and
  *	  imposing a limit on the number of groups separately from the amount of
  *	  memory consumed.
@@ -1516,7 +1516,7 @@ build_hash_table(AggState *aggstate, int setno, long nbuckets)
 
 	/*
 	 * Used to make sure initial hash table allocation does not exceed
-	 * work_mem. Note that the estimate does not include space for
+	 * hash_mem. Note that the estimate does not include space for
 	 * pass-by-reference transition data values, nor for the representative
 	 * tuple of each group.
 	 */
@@ -1782,7 +1782,7 @@ hashagg_recompile_expressions(AggState *aggstate, bool minslot, bool nullcheck)
 }
 
 /*
- * Set limits that trigger spilling to avoid exceeding work_mem. Consider the
+ * Set limits that trigger spilling to avoid exceeding hash_mem. Consider the
  * number of partitions we expect to create (if we do spill).
  *
  * There are two limits: a memory limit, and also an ngroups limit. The
@@ -1796,13 +1796,14 @@ hash_agg_set_limits(double hashentrysize, double input_groups, int used_bits,
 {
 	int			npartitions;
 	Size		partition_mem;
+	int			hash_mem = get_hash_mem();
 
-	/* if not expected to spill, use all of work_mem */
-	if (input_groups * hashentrysize < work_mem * 1024L)
+	/* if not expected to spill, use all of hash_mem */
+	if (input_groups * hashentrysize < hash_mem * 1024L)
 	{
 		if (num_partitions != NULL)
 			*num_partitions = 0;
-		*mem_limit = work_mem * 1024L;
+		*mem_limit = hash_mem * 1024L;
 		*ngroups_limit = *mem_limit / hashentrysize;
 		return;
 	}
@@ -1824,14 +1825,14 @@ hash_agg_set_limits(double hashentrysize, double input_groups, int used_bits,
 		HASHAGG_WRITE_BUFFER_SIZE * npartitions;
 
 	/*
-	 * Don't set the limit below 3/4 of work_mem. In that case, we are at the
+	 * Don't set the limit below 3/4 of hash_mem. In that case, we are at the
 	 * minimum number of partitions, so we aren't going to dramatically exceed
 	 * work mem anyway.
 	 */
-	if (work_mem * 1024L > 4 * partition_mem)
-		*mem_limit = work_mem * 1024L - partition_mem;
+	if (hash_mem * 1024L > 4 * partition_mem)
+		*mem_limit = hash_mem * 1024L - partition_mem;
 	else
-		*mem_limit = work_mem * 1024L * 0.75;
+		*mem_limit = hash_mem * 1024L * 0.75;
 
 	if (*mem_limit > hashentrysize)
 		*ngroups_limit = *mem_limit / hashentrysize;
@@ -1989,19 +1990,20 @@ hash_choose_num_partitions(double input_groups, double hashentrysize,
 	int			partition_limit;
 	int			npartitions;
 	int			partition_bits;
+	int			hash_mem = get_hash_mem();
 
 	/*
 	 * Avoid creating so many partitions that the memory requirements of the
-	 * open partition files are greater than 1/4 of work_mem.
+	 * open partition files are greater than 1/4 of hash_mem.
 	 */
 	partition_limit =
-		(work_mem * 1024L * 0.25 - HASHAGG_READ_BUFFER_SIZE) /
+		(hash_mem * 1024L * 0.25 - HASHAGG_READ_BUFFER_SIZE) /
 		HASHAGG_WRITE_BUFFER_SIZE;
 
 	mem_wanted = HASHAGG_PARTITION_FACTOR * input_groups * hashentrysize;
 
 	/* make enough partitions so that each one is likely to fit in memory */
-	npartitions = 1 + (mem_wanted / (work_mem * 1024L));
+	npartitions = 1 + (mem_wanted / (hash_mem * 1024L));
 
 	if (npartitions > partition_limit)
 		npartitions = partition_limit;
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 45b342011fe..ea69eeb2a1e 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -39,6 +39,7 @@
 #include "port/atomics.h"
 #include "port/pg_bitutils.h"
 #include "utils/dynahash.h"
+#include "utils/guc.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/syscache.h"
@@ -506,7 +507,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations,
 	hashtable->spaceAllowed = space_allowed;
 	hashtable->spaceUsedSkew = 0;
 	hashtable->spaceAllowedSkew =
-		hashtable->spaceAllowed * SKEW_WORK_MEM_PERCENT / 100;
+		hashtable->spaceAllowed * SKEW_HASH_MEM_PERCENT / 100;
 	hashtable->chunks = NULL;
 	hashtable->current_chunk = NULL;
 	hashtable->parallel_state = state->parallel_state;
@@ -665,7 +666,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations,
 
 void
 ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
-						bool try_combined_work_mem,
+						bool try_combined_hash_mem,
 						int parallel_workers,
 						size_t *space_allowed,
 						int *numbuckets,
@@ -682,6 +683,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 	int			nbatch = 1;
 	int			nbuckets;
 	double		dbuckets;
+	int			hash_mem = get_hash_mem();
 
 	/* Force a plausible relation size if no info */
 	if (ntuples <= 0.0)
@@ -698,16 +700,16 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 	inner_rel_bytes = ntuples * tupsize;
 
 	/*
-	 * Target in-memory hashtable size is work_mem kilobytes.
+	 * Target in-memory hashtable size is hash_mem kilobytes.
 	 */
-	hash_table_bytes = work_mem * 1024L;
+	hash_table_bytes = hash_mem * 1024L;
 
 	/*
-	 * Parallel Hash tries to use the combined work_mem of all workers to
-	 * avoid the need to batch.  If that won't work, it falls back to work_mem
+	 * Parallel Hash tries to use the combined hash_mem of all workers to
+	 * avoid the need to batch.  If that won't work, it falls back to hash_mem
 	 * per worker and tries to process batches in parallel.
 	 */
-	if (try_combined_work_mem)
+	if (try_combined_hash_mem)
 		hash_table_bytes += hash_table_bytes * parallel_workers;
 
 	*space_allowed = hash_table_bytes;
@@ -728,7 +730,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 	 */
 	if (useskew)
 	{
-		skew_table_bytes = hash_table_bytes * SKEW_WORK_MEM_PERCENT / 100;
+		skew_table_bytes = hash_table_bytes * SKEW_HASH_MEM_PERCENT / 100;
 
 		/*----------
 		 * Divisor is:
@@ -751,7 +753,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 	/*
 	 * Set nbuckets to achieve an average bucket load of NTUP_PER_BUCKET when
 	 * memory is filled, assuming a single batch; but limit the value so that
-	 * the pointer arrays we'll try to allocate do not exceed work_mem nor
+	 * the pointer arrays we'll try to allocate do not exceed hash_mem nor
 	 * MaxAllocSize.
 	 *
 	 * Note that both nbuckets and nbatch must be powers of 2 to make
@@ -790,10 +792,10 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 		long		bucket_size;
 
 		/*
-		 * If Parallel Hash with combined work_mem would still need multiple
-		 * batches, we'll have to fall back to regular work_mem budget.
+		 * If Parallel Hash with combined hash_mem would still need multiple
+		 * batches, we'll have to fall back to regular hash_mem budget.
 		 */
-		if (try_combined_work_mem)
+		if (try_combined_hash_mem)
 		{
 			ExecChooseHashTableSize(ntuples, tupwidth, useskew,
 									false, parallel_workers,
@@ -805,7 +807,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 		}
 
 		/*
-		 * Estimate the number of buckets we'll want to have when work_mem is
+		 * Estimate the number of buckets we'll want to have when hash_mem is
 		 * entirely full.  Each bucket will contain a bucket pointer plus
 		 * NTUP_PER_BUCKET tuples, whose projected size already includes
 		 * overhead for the hash code, pointer to the next tuple, etc.
@@ -820,8 +822,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 		/*
 		 * Buckets are simple pointers to hashjoin tuples, while tupsize
 		 * includes the pointer, hash code, and MinimalTupleData.  So buckets
-		 * should never really exceed 25% of work_mem (even for
-		 * NTUP_PER_BUCKET=1); except maybe for work_mem values that are not
+		 * should never really exceed 25% of hash_mem (even for
+		 * NTUP_PER_BUCKET=1); except maybe for hash_mem values that are not
 		 * 2^N bytes, where we might get more because of doubling. So let's
 		 * look for 50% here.
 		 */
@@ -1095,15 +1097,17 @@ ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable)
 				/* Figure out how many batches to use. */
 				if (hashtable->nbatch == 1)
 				{
+					int			hash_mem = get_hash_mem();
+
 					/*
 					 * We are going from single-batch to multi-batch.  We need
 					 * to switch from one large combined memory budget to the
-					 * regular work_mem budget.
+					 * regular hash_mem budget.
 					 */
-					pstate->space_allowed = work_mem * 1024L;
+					pstate->space_allowed = hash_mem * 1024L;
 
 					/*
-					 * The combined work_mem of all participants wasn't
+					 * The combined hash_mem of all participants wasn't
 					 * enough. Therefore one batch per participant would be
 					 * approximately equivalent and would probably also be
 					 * insufficient.  So try two batches per participant,
@@ -2855,7 +2859,7 @@ ExecParallelHashTupleAlloc(HashJoinTable hashtable, size_t size,
 
 		/*
 		 * Check if our space limit would be exceeded.  To avoid choking on
-		 * very large tuples or very low work_mem setting, we'll always allow
+		 * very large tuples or very low hash_mem setting, we'll always allow
 		 * each backend to allocate at least one chunk.
 		 */
 		if (hashtable->batches[0].at_least_one_chunk &&
@@ -3366,3 +3370,41 @@ ExecParallelHashTuplePrealloc(HashJoinTable hashtable, int batchno, size_t size)
 
 	return true;
 }
+
+/*
+ * Get a hash_mem value by multiplying the work_mem GUC's value by the
+ * hash_mem_multiplier GUC's value.
+ *
+ * Returns a work_mem style KB value that hash-based nodes (including but not
+ * limited to hash join) use in place of work_mem.  This is subject to the
+ * same restrictions as work_mem itself.  (There is no such thing as the
+ * hash_mem GUC, but it's convenient for our callers to pretend that there
+ * is.)
+ *
+ * Exported for use by the planner, as well as other hash-based executor
+ * nodes.  This is a rather random place for this, but there is no better
+ * place.
+ */
+int
+get_hash_mem(void)
+{
+	double		hash_mem;
+
+	Assert(hash_mem_multiplier >= 1.0);
+
+	hash_mem = (double) work_mem * hash_mem_multiplier;
+
+	/*
+	 * guc.c enforces a MAX_KILOBYTES limitation on work_mem in order to
+	 * support the assumption that raw derived byte values can be stored in
+	 * 'long' variables.  The returned hash_mem value must also meet this
+	 * assumption.
+	 *
+	 * We clamp the final value rather than throw an error because it should
+	 * be possible to set work_mem and hash_mem_multiplier independently.
+	 */
+	if (hash_mem < MAX_KILOBYTES)
+		return (int) hash_mem;
+
+	return MAX_KILOBYTES;
+}
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 9bb23fef1a6..5532b91a71d 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -89,9 +89,9 @@
  * PHJ_BUILD_HASHING_INNER so we can skip loading.
  *
  * Initially we try to plan for a single-batch hash join using the combined
- * work_mem of all participants to create a large shared hash table.  If that
+ * hash_mem of all participants to create a large shared hash table.  If that
  * turns out either at planning or execution time to be impossible then we
- * fall back to regular work_mem sized hash tables.
+ * fall back to regular hash_mem sized hash tables.
  *
  * To avoid deadlocks, we never wait for any barrier unless it is known that
  * all other backends attached to it are actively executing the node or have
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 27ce4cc8069..fda4b2c6e87 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3525,7 +3525,7 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace,
 	 * Get hash table size that executor would use for inner relation.
 	 *
 	 * XXX for the moment, always assume that skew optimization will be
-	 * performed.  As long as SKEW_WORK_MEM_PERCENT is small, it's not worth
+	 * performed.  As long as SKEW_HASH_MEM_PERCENT is small, it's not worth
 	 * trying to determine that for sure.
 	 *
 	 * XXX at some point it might be interesting to try to account for skew
@@ -3534,7 +3534,7 @@ initial_cost_hashjoin(PlannerInfo *root, JoinCostWorkspace *workspace,
 	ExecChooseHashTableSize(inner_path_rows_total,
 							inner_path->pathtarget->width,
 							true,	/* useskew */
-							parallel_hash,	/* try_combined_work_mem */
+							parallel_hash,	/* try_combined_hash_mem */
 							outer_path->parallel_workers,
 							&space_allowed,
 							&numbuckets,
@@ -3597,6 +3597,7 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 	Cost		run_cost = workspace->run_cost;
 	int			numbuckets = workspace->numbuckets;
 	int			numbatches = workspace->numbatches;
+	int			hash_mem;
 	Cost		cpu_per_tuple;
 	QualCost	hash_qual_cost;
 	QualCost	qp_qual_cost;
@@ -3715,16 +3716,17 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 	}
 
 	/*
-	 * If the bucket holding the inner MCV would exceed work_mem, we don't
+	 * If the bucket holding the inner MCV would exceed hash_mem, we don't
 	 * want to hash unless there is really no other alternative, so apply
 	 * disable_cost.  (The executor normally copes with excessive memory usage
 	 * by splitting batches, but obviously it cannot separate equal values
-	 * that way, so it will be unable to drive the batch size below work_mem
+	 * that way, so it will be unable to drive the batch size below hash_mem
 	 * when this is true.)
 	 */
+	hash_mem = get_hash_mem();
 	if (relation_byte_size(clamp_row_est(inner_path_rows * innermcvfreq),
 						   inner_path->pathtarget->width) >
-		(work_mem * 1024L))
+		(hash_mem * 1024L))
 		startup_cost += disable_cost;
 
 	/*
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 1345e522dcf..b40a112c25b 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -4196,16 +4196,17 @@ consider_groupingsets_paths(PlannerInfo *root,
 							double dNumGroups)
 {
 	Query	   *parse = root->parse;
+	int			hash_mem = get_hash_mem();
 
 	/*
 	 * If we're not being offered sorted input, then only consider plans that
 	 * can be done entirely by hashing.
 	 *
-	 * We can hash everything if it looks like it'll fit in work_mem. But if
+	 * We can hash everything if it looks like it'll fit in hash_mem. But if
 	 * the input is actually sorted despite not being advertised as such, we
 	 * prefer to make use of that in order to use less memory.
 	 *
-	 * If none of the grouping sets are sortable, then ignore the work_mem
+	 * If none of the grouping sets are sortable, then ignore the hash_mem
 	 * limit and generate a path anyway, since otherwise we'll just fail.
 	 */
 	if (!is_sorted)
@@ -4257,10 +4258,10 @@ consider_groupingsets_paths(PlannerInfo *root,
 
 		/*
 		 * gd->rollups is empty if we have only unsortable columns to work
-		 * with.  Override work_mem in that case; otherwise, we'll rely on the
+		 * with.  Override hash_mem in that case; otherwise, we'll rely on the
 		 * sorted-input case to generate usable mixed paths.
 		 */
-		if (hashsize > work_mem * 1024L && gd->rollups)
+		if (hashsize > hash_mem * 1024L && gd->rollups)
 			return;				/* nope, won't fit */
 
 		/*
@@ -4379,7 +4380,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 	{
 		List	   *rollups = NIL;
 		List	   *hash_sets = list_copy(gd->unsortable_sets);
-		double		availspace = (work_mem * 1024.0);
+		double		availspace = (hash_mem * 1024.0);
 		ListCell   *lc;
 
 		/*
@@ -4400,7 +4401,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 
 			/*
 			 * We treat this as a knapsack problem: the knapsack capacity
-			 * represents work_mem, the item weights are the estimated memory
+			 * represents hash_mem, the item weights are the estimated memory
 			 * usage of the hashtables needed to implement a single rollup,
 			 * and we really ought to use the cost saving as the item value;
 			 * however, currently the costs assigned to sort nodes don't
@@ -4441,7 +4442,7 @@ consider_groupingsets_paths(PlannerInfo *root,
 																rollup->numGroups);
 
 					/*
-					 * If sz is enormous, but work_mem (and hence scale) is
+					 * If sz is enormous, but hash_mem (and hence scale) is
 					 * small, avoid integer overflow here.
 					 */
 					k_weights[i] = (int) Min(floor(sz / scale),
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index b02fcb9bfe7..9a8f738c9d0 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -200,7 +200,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
 	 * XXX If an ANY subplan is uncorrelated, build_subplan may decide to hash
 	 * its output.  In that case it would've been better to specify full
 	 * retrieval.  At present, however, we can only check hashability after
-	 * we've made the subplan :-(.  (Determining whether it'll fit in work_mem
+	 * we've made the subplan :-(.  (Determining whether it'll fit in hash_mem
 	 * is the really hard part.)  Therefore, we don't want to be too
 	 * optimistic about the percentage of tuples retrieved, for fear of
 	 * selecting a plan that's bad for the materialization case.
@@ -278,7 +278,7 @@ make_subplan(PlannerInfo *root, Query *orig_subquery,
 
 			plan = create_plan(subroot, best_path);
 
-			/* Now we can check if it'll fit in work_mem */
+			/* Now we can check if it'll fit in hash_mem */
 			/* XXX can we check this at the Path stage? */
 			if (subplan_is_hashable(plan))
 			{
@@ -716,16 +716,17 @@ static bool
 subplan_is_hashable(Plan *plan)
 {
 	double		subquery_size;
+	int			hash_mem = get_hash_mem();
 
 	/*
-	 * The estimated size of the subquery result must fit in work_mem. (Note:
+	 * The estimated size of the subquery result must fit in hash_mem. (Note:
 	 * we use heap tuple overhead here even though the tuples will actually be
 	 * stored as MinimalTuples; this provides some fudge factor for hashtable
 	 * overhead.)
 	 */
 	subquery_size = plan->plan_rows *
 		(MAXALIGN(plan->plan_width) + MAXALIGN(SizeofHeapTupleHeader));
-	if (subquery_size > work_mem * 1024L)
+	if (subquery_size > hash_mem * 1024L)
 		return false;
 
 	return true;
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 6588f83d5ec..2ebd4ea3320 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -1018,6 +1018,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
 					const char *construct)
 {
 	int			numGroupCols = list_length(groupClauses);
+	int			hash_mem = get_hash_mem();
 	bool		can_sort;
 	bool		can_hash;
 	Size		hashentrysize;
@@ -1049,15 +1050,17 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
 
 	/*
 	 * Don't do it if it doesn't look like the hashtable will fit into
-	 * work_mem.
+	 * hash_mem.
 	 */
 	hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
 
-	if (hashentrysize * dNumGroups > work_mem * 1024L)
+	if (hashentrysize * dNumGroups > hash_mem * 1024L)
 		return false;
 
 	/*
-	 * See if the estimated cost is no more than doing it the other way.
+	 * See if the estimated cost is no more than doing it the other way.  We
+	 * deliberately give the hash case more memory when hash_mem exceeds
+	 * standard work mem (i.e. when hash_mem_multiplier exceeds 1.0).
 	 *
 	 * We need to consider input_plan + hashagg versus input_plan + sort +
 	 * group.  Note that the actual result plan might involve a SetOp or
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 5110a6b8060..c1fc866cbf9 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1720,8 +1720,9 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 		 * planner.c).
 		 */
 		int			hashentrysize = subpath->pathtarget->width + 64;
+		int			hash_mem = get_hash_mem();
 
-		if (hashentrysize * pathnode->path.rows > work_mem * 1024L)
+		if (hashentrysize * pathnode->path.rows > hash_mem * 1024L)
 		{
 			/*
 			 * We should not try to hash.  Hack the SpecialJoinInfo to
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
index bb49e80d166..06cf16d9d71 100644
--- a/src/backend/utils/adt/ri_triggers.c
+++ b/src/backend/utils/adt/ri_triggers.c
@@ -1450,7 +1450,9 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel)
 	 * enough to not use a multiple of work_mem, and one typically would not
 	 * have many large foreign-key validations happening concurrently.  So
 	 * this seems to meet the criteria for being considered a "maintenance"
-	 * operation, and accordingly we use maintenance_work_mem.
+	 * operation, and accordingly we use maintenance_work_mem.  However, we
+	 * must also set hash_mem_multiplier to 1, since it is surely not okay to
+	 * let that get applied to the maintenance_work_mem value.
 	 *
 	 * We use the equivalent of a function SET option to allow the setting to
 	 * persist for exactly the duration of the check query.  guc.c also takes
@@ -1462,6 +1464,9 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel)
 	(void) set_config_option("work_mem", workmembuf,
 							 PGC_USERSET, PGC_S_SESSION,
 							 GUC_ACTION_SAVE, true, 0, false);
+	(void) set_config_option("hash_mem_multiplier", "1",
+							 PGC_USERSET, PGC_S_SESSION,
+							 GUC_ACTION_SAVE, true, 0, false);
 
 	if (SPI_connect() != SPI_OK_CONNECT)
 		elog(ERROR, "SPI_connect failed");
@@ -1553,7 +1558,7 @@ RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel)
 		elog(ERROR, "SPI_finish failed");
 
 	/*
-	 * Restore work_mem.
+	 * Restore work_mem and hash_mem_multiplier.
 	 */
 	AtEOXact_GUC(true, save_nestlevel);
 
@@ -1685,7 +1690,9 @@ RI_PartitionRemove_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel)
 	 * enough to not use a multiple of work_mem, and one typically would not
 	 * have many large foreign-key validations happening concurrently.  So
 	 * this seems to meet the criteria for being considered a "maintenance"
-	 * operation, and accordingly we use maintenance_work_mem.
+	 * operation, and accordingly we use maintenance_work_mem.  However, we
+	 * must also set hash_mem_multiplier to 1, since it is surely not okay to
+	 * let that get applied to the maintenance_work_mem value.
 	 *
 	 * We use the equivalent of a function SET option to allow the setting to
 	 * persist for exactly the duration of the check query.  guc.c also takes
@@ -1697,6 +1704,9 @@ RI_PartitionRemove_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel)
 	(void) set_config_option("work_mem", workmembuf,
 							 PGC_USERSET, PGC_S_SESSION,
 							 GUC_ACTION_SAVE, true, 0, false);
+	(void) set_config_option("hash_mem_multiplier", "1",
+							 PGC_USERSET, PGC_S_SESSION,
+							 GUC_ACTION_SAVE, true, 0, false);
 
 	if (SPI_connect() != SPI_OK_CONNECT)
 		elog(ERROR, "SPI_connect failed");
@@ -1763,7 +1773,7 @@ RI_PartitionRemove_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel)
 		elog(ERROR, "SPI_finish failed");
 
 	/*
-	 * Restore work_mem.
+	 * Restore work_mem and hash_mem_multiplier.
 	 */
 	AtEOXact_GUC(true, save_nestlevel);
 }
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 497d7c38ae6..6ab82168398 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -119,6 +119,7 @@ int			IntervalStyle = INTSTYLE_POSTGRES;
 bool		enableFsync = true;
 bool		allowSystemTableMods = false;
 int			work_mem = 4096;
+double		hash_mem_multiplier = 1.0;
 int			maintenance_work_mem = 65536;
 int			max_parallel_maintenance_workers = 2;
 
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index abfa95a2314..c20885e97b2 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3543,6 +3543,17 @@ static struct config_real ConfigureNamesReal[] =
 	},
 
 	{
+		{"hash_mem_multiplier", PGC_USERSET, RESOURCES_MEM,
+			gettext_noop("Multiple of work_mem to use for hash tables."),
+			NULL,
+			GUC_EXPLAIN
+		},
+		&hash_mem_multiplier,
+		1.0, 1.0, 1000.0,
+		NULL, NULL, NULL
+	},
+
+	{
 		{"bgwriter_lru_multiplier", PGC_SIGHUP, RESOURCES_BGWRITER,
 			gettext_noop("Multiple of the average buffer usage to free per round."),
 			NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 5a0b8e98217..aa30291ea39 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -130,6 +130,7 @@
 # Caution: it is not advisable to set max_prepared_transactions nonzero unless
 # you actively intend to use prepared transactions.
 #work_mem = 4MB				# min 64kB
+#hash_mem_multiplier = 1.0		# 1-1000.0 multiplier on hash table work_mem
 #maintenance_work_mem = 64MB		# min 1MB
 #autovacuum_work_mem = -1		# min 1MB, or -1 to use maintenance_work_mem
 #logical_decoding_work_mem = 64MB	# min 64kB
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index 79b634e8ed1..eb5daba36b0 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -88,7 +88,7 @@ typedef struct HashJoinTupleData
  * outer relation tuples with these hash values are matched against that
  * table instead of the main one.  Thus, tuples with these hash values are
  * effectively handled as part of the first batch and will never go to disk.
- * The skew hashtable is limited to SKEW_WORK_MEM_PERCENT of the total memory
+ * The skew hashtable is limited to SKEW_HASH_MEM_PERCENT of the total memory
  * allowed for the join; while building the hashtables, we decrease the number
  * of MCVs being specially treated if needed to stay under this limit.
  *
@@ -107,7 +107,7 @@ typedef struct HashSkewBucket
 
 #define SKEW_BUCKET_OVERHEAD  MAXALIGN(sizeof(HashSkewBucket))
 #define INVALID_SKEW_BUCKET_NO	(-1)
-#define SKEW_WORK_MEM_PERCENT  2
+#define SKEW_HASH_MEM_PERCENT  2
 #define SKEW_MIN_OUTER_FRACTION  0.01
 
 /*
diff --git a/src/include/executor/nodeHash.h b/src/include/executor/nodeHash.h
index 64d2ce693ca..2db4e2f6726 100644
--- a/src/include/executor/nodeHash.h
+++ b/src/include/executor/nodeHash.h
@@ -61,7 +61,7 @@ extern bool ExecScanHashTableForUnmatched(HashJoinState *hjstate,
 extern void ExecHashTableReset(HashJoinTable hashtable);
 extern void ExecHashTableResetMatchFlags(HashJoinTable hashtable);
 extern void ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
-									bool try_combined_work_mem,
+									bool try_combined_hash_mem,
 									int parallel_workers,
 									size_t *space_allowed,
 									int *numbuckets,
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 18bc8a7b904..72e33523984 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -243,6 +243,7 @@ extern PGDLLIMPORT int IntervalStyle;
 extern bool enableFsync;
 extern PGDLLIMPORT bool allowSystemTableMods;
 extern PGDLLIMPORT int work_mem;
+extern PGDLLIMPORT double hash_mem_multiplier;
 extern PGDLLIMPORT int maintenance_work_mem;
 extern PGDLLIMPORT int max_parallel_maintenance_workers;
 
@@ -469,4 +470,7 @@ extern bool has_rolreplication(Oid roleid);
 extern bool BackupInProgress(void);
 extern void CancelBackup(void);
 
+/* in executor/nodeHash.c */
+extern int	get_hash_mem(void);
+
 #endif							/* MISCADMIN_H */