185 files changed, 6651 insertions, 5433 deletions
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 01e1db7f856..4204088fa0d 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -68,7 +68,7 @@ typedef struct BrinShared
 	int			scantuplesortstates;
 
 	/* Query ID, for report in worker processes */
-	uint64		queryid;
+	int64		queryid;
 
 	/*
 	 * workersdonecv is used to monitor the progress of workers.  All parallel
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 46c1dce222d..50747c16396 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -1243,8 +1243,9 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace,
 		}
 		else
 		{
-			text	   *t;
+			const char *name;
 			const char *value;
+			text	   *t;
 			Size		len;
 
 			/*
@@ -1291,11 +1292,19 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace,
 			 * have just "name", assume "name=true" is meant.  Note: the
 			 * namespace is not output.
 			 */
+			name = def->defname;
 			if (def->arg != NULL)
 				value = defGetString(def);
 			else
 				value = "true";
 
+			/* Insist that name not contain "=", else "a=b=c" is ambiguous */
+			if (strchr(name, '=') != NULL)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("invalid option name \"%s\": must not contain \"=\"",
+								name)));
+
 			/*
 			 * This is not a great place for this test, but there's no other
 			 * convenient place to filter the option out. As WITH (oids =
@@ -1303,7 +1312,7 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace,
 			 * amount of ugly.
 			 */
 			if (acceptOidsOff && def->defnamespace == NULL &&
-				strcmp(def->defname, "oids") == 0)
+				strcmp(name, "oids") == 0)
 			{
 				if (defGetBoolean(def))
 					ereport(ERROR,
@@ -1313,11 +1322,11 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace,
 				continue;
 			}
 
-			len = VARHDRSZ + strlen(def->defname) + 1 + strlen(value);
+			len = VARHDRSZ + strlen(name) + 1 + strlen(value);
 			/* +1 leaves room for sprintf's trailing null */
 			t = (text *) palloc(len + 1);
 			SET_VARSIZE(t, len);
-			sprintf(VARDATA(t), "%s=%s", def->defname, value);
+			sprintf(VARDATA(t), "%s=%s", name, value);
 
 			astate = accumArrayResult(astate, PointerGetDatum(t),
 									  false, TEXTOID,
diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c
index 21f2f4af97e..926f1e4008a 100644
--- a/src/backend/access/common/toast_compression.c
+++ b/src/backend/access/common/toast_compression.c
@@ -25,11 +25,11 @@
 /* GUC */
 int			default_toast_compression = TOAST_PGLZ_COMPRESSION;
 
-#define NO_LZ4_SUPPORT() \
+#define NO_COMPRESSION_SUPPORT(method) \
 	ereport(ERROR, \
 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
-			 errmsg("compression method lz4 not supported"), \
-			 errdetail("This functionality requires the server to be built with lz4 support.")))
+			 errmsg("compression method %s not supported", method), \
+			 errdetail("This functionality requires the server to be built with %s support.", method)))
 
 /*
  * Compress a varlena using PGLZ.
@@ -139,7 +139,7 @@ struct varlena *
 lz4_compress_datum(const struct varlena *value)
 {
 #ifndef USE_LZ4
-	NO_LZ4_SUPPORT();
+	NO_COMPRESSION_SUPPORT("lz4");
 	return NULL;				/* keep compiler quiet */
 #else
 	int32		valsize;
@@ -182,7 +182,7 @@ struct varlena *
 lz4_decompress_datum(const struct varlena *value)
 {
 #ifndef USE_LZ4
-	NO_LZ4_SUPPORT();
+	NO_COMPRESSION_SUPPORT("lz4");
 	return NULL;				/* keep compiler quiet */
 #else
 	int32		rawsize;
@@ -215,7 +215,7 @@ struct varlena *
 lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength)
 {
 #ifndef USE_LZ4
-	NO_LZ4_SUPPORT();
+	NO_COMPRESSION_SUPPORT("lz4");
 	return NULL;				/* keep compiler quiet */
 #else
 	int32		rawsize;
@@ -289,7 +289,7 @@ CompressionNameToMethod(const char *compression)
 	else if (strcmp(compression, "lz4") == 0)
 	{
 #ifndef USE_LZ4
-		NO_LZ4_SUPPORT();
+		NO_COMPRESSION_SUPPORT("lz4");
 #endif
 		return TOAST_LZ4_COMPRESSION;
 	}
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c
index ffd0c78f905..020d00cd01c 100644
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -142,11 +142,18 @@ void
 verify_compact_attribute(TupleDesc tupdesc, int attnum)
 {
 #ifdef USE_ASSERT_CHECKING
-	CompactAttribute *cattr = &tupdesc->compact_attrs[attnum];
+	CompactAttribute cattr;
 	Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum);
 	CompactAttribute tmp;
 
 	/*
+	 * Make a temp copy of the TupleDesc's CompactAttribute.  This may be a
+	 * shared TupleDesc and the attcacheoff might get changed by another
+	 * backend.
+	 */
+	memcpy(&cattr, &tupdesc->compact_attrs[attnum], sizeof(CompactAttribute));
+
+	/*
 	 * Populate the temporary CompactAttribute from the corresponding
 	 * Form_pg_attribute
 	 */
@@ -156,11 +163,11 @@ verify_compact_attribute(TupleDesc tupdesc, int attnum)
 	 * Make the attcacheoff match since it's been reset to -1 by
 	 * populate_compact_attribute_internal.  Same with attnullability.
 	 */
-	tmp.attcacheoff = cattr->attcacheoff;
-	tmp.attnullability = cattr->attnullability;
+	tmp.attcacheoff = cattr.attcacheoff;
+	tmp.attnullability = cattr.attnullability;
 
 	/* Check the freshly populated CompactAttribute matches the TupleDesc's */
-	Assert(memcmp(&tmp, cattr, sizeof(CompactAttribute)) == 0);
+	Assert(memcmp(&tmp, &cattr, sizeof(CompactAttribute)) == 0);
 #endif
 }
 
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index a6b701943d3..c0aa7d0222f 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -1058,11 +1058,11 @@ gistGetFakeLSN(Relation rel)
 }
 
 /*
- * This is a stratnum support function for GiST opclasses that use the
- * RT*StrategyNumber constants.
+ * This is a stratnum translation support function for GiST opclasses that use
+ * the RT*StrategyNumber constants.
  */
 Datum
-gist_stratnum_common(PG_FUNCTION_ARGS)
+gist_translate_cmptype_common(PG_FUNCTION_ARGS)
 {
 	CompareType cmptype = PG_GETARG_INT32(0);
 
@@ -1090,9 +1090,9 @@ gist_stratnum_common(PG_FUNCTION_ARGS)
 /*
  * Returns the opclass's private stratnum used for the given compare type.
  *
- * Calls the opclass's GIST_STRATNUM_PROC support function, if any,
- * and returns the result.
- * Returns InvalidStrategy if the function is not defined.
+ * Calls the opclass's GIST_TRANSLATE_CMPTYPE_PROC support function, if any,
+ * and returns the result.  Returns InvalidStrategy if the function is not
+ * defined.
  */
 StrategyNumber
 gisttranslatecmptype(CompareType cmptype, Oid opfamily)
@@ -1101,7 +1101,7 @@ gisttranslatecmptype(CompareType cmptype, Oid opfamily)
 	Datum		result;
 
 	/* Check whether the function is provided. */
-	funcid = get_opfamily_proc(opfamily, ANYOID, ANYOID, GIST_STRATNUM_PROC);
+	funcid = get_opfamily_proc(opfamily, ANYOID, ANYOID, GIST_TRANSLATE_CMPTYPE_PROC);
 	if (!OidIsValid(funcid))
 		return InvalidStrategy;
 
diff --git a/src/backend/access/gist/gistvalidate.c b/src/backend/access/gist/gistvalidate.c
index 2a49e6d20f0..2ed6f74fce9 100644
--- a/src/backend/access/gist/gistvalidate.c
+++ b/src/backend/access/gist/gistvalidate.c
@@ -138,7 +138,7 @@ gistvalidate(Oid opclassoid)
 				ok = check_amproc_signature(procform->amproc, VOIDOID, true,
 											1, 1, INTERNALOID);
 				break;
-			case GIST_STRATNUM_PROC:
+			case GIST_TRANSLATE_CMPTYPE_PROC:
 				ok = check_amproc_signature(procform->amproc, INT2OID, true,
 											1, 1, INT4OID) &&
 					procform->amproclefttype == ANYOID &&
@@ -265,7 +265,7 @@ gistvalidate(Oid opclassoid)
 		if (i == GIST_DISTANCE_PROC || i == GIST_FETCH_PROC ||
 			i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC ||
 			i == GIST_OPTIONS_PROC || i == GIST_SORTSUPPORT_PROC ||
-			i == GIST_STRATNUM_PROC)
+			i == GIST_TRANSLATE_CMPTYPE_PROC)
 			continue;			/* optional methods */
 		ereport(INFO,
 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
@@ -336,7 +336,7 @@ gistadjustmembers(Oid opfamilyoid,
 			case GIST_FETCH_PROC:
 			case GIST_OPTIONS_PROC:
 			case GIST_SORTSUPPORT_PROC:
-			case GIST_STRATNUM_PROC:
+			case GIST_TRANSLATE_CMPTYPE_PROC:
 				/* Optional, so force it to be a soft family dependency */
 				op->ref_is_hard = false;
 				op->ref_is_family = true;
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 9ec8cda1c68..0dcd6ee817e 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -213,6 +213,27 @@ static const int MultiXactStatusLock[MaxMultiXactStatus + 1] =
 #define TUPLOCK_from_mxstatus(status) \
 			(MultiXactStatusLock[(status)])
 
+/*
+ * Check that we have a valid snapshot if we might need TOAST access.
+ */
+static inline void
+AssertHasSnapshotForToast(Relation rel)
+{
+#ifdef USE_ASSERT_CHECKING
+
+	/* bootstrap mode in particular breaks this rule */
+	if (!IsNormalProcessingMode())
+		return;
+
+	/* if the relation doesn't have a TOAST table, we are good */
+	if (!OidIsValid(rel->rd_rel->reltoastrelid))
+		return;
+
+	Assert(HaveRegisteredOrActiveSnapshot());
+
+#endif							/* USE_ASSERT_CHECKING */
+}
+
 /* ----------------------------------------------------------------
  *						 heap support routines
  * ----------------------------------------------------------------
@@ -2066,6 +2087,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 	Assert(HeapTupleHeaderGetNatts(tup->t_data) <=
 		   RelationGetNumberOfAttributes(relation));
 
+	AssertHasSnapshotForToast(relation);
+
 	/*
 	 * Fill in tuple header fields and toast the tuple if necessary.
 	 *
@@ -2343,6 +2366,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
 	/* currently not needed (thus unsupported) for heap_multi_insert() */
 	Assert(!(options & HEAP_INSERT_NO_LOGICAL));
 
+	AssertHasSnapshotForToast(relation);
+
 	needwal = RelationNeedsWAL(relation);
 	saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
 												   HEAP_DEFAULT_FILLFACTOR);
@@ -2765,6 +2790,8 @@ heap_delete(Relation relation, ItemPointer tid,
 
 	Assert(ItemPointerIsValid(tid));
 
+	AssertHasSnapshotForToast(relation);
+
 	/*
 	 * Forbid this during a parallel operation, lest it allocate a combo CID.
 	 * Other workers might need that combo CID for visibility checks, and we
@@ -3260,6 +3287,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 	Assert(HeapTupleHeaderGetNatts(newtup->t_data) <=
 		   RelationGetNumberOfAttributes(relation));
 
+	AssertHasSnapshotForToast(relation);
+
 	/*
 	 * Forbid this during a parallel operation, lest it allocate a combo CID.
 	 * Other workers might need that combo CID for visibility checks, and we
@@ -4953,7 +4982,7 @@ l3:
 					case LockWaitError:
 						if (!ConditionalMultiXactIdWait((MultiXactId) xwait,
 														status, infomask, relation,
-														NULL, log_lock_failure))
+														NULL, log_lock_failures))
 							ereport(ERROR,
 									(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
 									 errmsg("could not obtain lock on row in relation \"%s\"",
@@ -4991,7 +5020,7 @@ l3:
 						}
 						break;
 					case LockWaitError:
-						if (!ConditionalXactLockTableWait(xwait, log_lock_failure))
+						if (!ConditionalXactLockTableWait(xwait, log_lock_failures))
 							ereport(ERROR,
 									(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
 									 errmsg("could not obtain lock on row in relation \"%s\"",
@@ -5256,7 +5285,7 @@ heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode,
 			break;
 
 		case LockWaitError:
-			if (!ConditionalLockTupleTuplock(relation, tid, mode, log_lock_failure))
+			if (!ConditionalLockTupleTuplock(relation, tid, mode, log_lock_failures))
 				ereport(ERROR,
 						(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
 						 errmsg("could not obtain lock on row in relation \"%s\"",
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index ac082fefa77..cb4bc35c93e 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -464,7 +464,7 @@ tuple_lock_retry:
 									return TM_WouldBlock;
 								break;
 							case LockWaitError:
-								if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, log_lock_failure))
+								if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, log_lock_failures))
 									ereport(ERROR,
 											(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
 											 errmsg("could not obtain lock on row in relation \"%s\"",
diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c
index 30f4c2d3c67..eb4bd3d6ae3 100644
--- a/src/backend/access/heap/heapam_xlog.c
+++ b/src/backend/access/heap/heapam_xlog.c
@@ -438,6 +438,9 @@ heap_xlog_insert(XLogReaderState *record)
 	ItemPointerSetBlockNumber(&target_tid, blkno);
 	ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
 
+	/* No freezing in the heap_insert() code path */
+	Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET));
+
 	/*
 	 * The visibility map may need to be fixed even if the heap page is
 	 * already up-to-date.
@@ -508,10 +511,6 @@ heap_xlog_insert(XLogReaderState *record)
 		if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
 			PageClearAllVisible(page);
 
-		/* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
-		if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
-			PageSetAllVisible(page);
-
 		MarkBufferDirty(buffer);
 	}
 	if (BufferIsValid(buffer))
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index f28326bad09..14036c27e87 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -423,7 +423,7 @@ typedef struct LVSavedErrInfo
 /* non-export function prototypes */
 static void lazy_scan_heap(LVRelState *vacrel);
 static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
-										 VacuumParams *params);
+										 const VacuumParams params);
 static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
 											void *callback_private_data,
 											void *per_buffer_data);
@@ -431,7 +431,7 @@ static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
 static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
 								   BlockNumber blkno, Page page,
 								   bool sharelock, Buffer vmbuffer);
-static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
+static int	lazy_scan_prune(LVRelState *vacrel, Buffer buf,
 							BlockNumber blkno, Page page,
 							Buffer vmbuffer, bool all_visible_according_to_vm,
 							bool *has_lpdead_items, bool *vm_page_frozen);
@@ -485,7 +485,7 @@ static void restore_vacuum_error_info(LVRelState *vacrel,
  * vacuum options or for relfrozenxid/relminmxid advancement.
  */
 static void
-heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params)
+heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
 {
 	uint32		randseed;
 	BlockNumber allvisible;
@@ -504,7 +504,7 @@ heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params)
 	vacrel->eager_scan_remaining_successes = 0;
 
 	/* If eager scanning is explicitly disabled, just return. */
-	if (params->max_eager_freeze_failure_rate == 0)
+	if (params.max_eager_freeze_failure_rate == 0)
 		return;
 
 	/*
@@ -581,11 +581,11 @@ heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params)
 
 	vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
 
-	Assert(params->max_eager_freeze_failure_rate > 0 &&
-		   params->max_eager_freeze_failure_rate <= 1);
+	Assert(params.max_eager_freeze_failure_rate > 0 &&
+		   params.max_eager_freeze_failure_rate <= 1);
 
 	vacrel->eager_scan_max_fails_per_region =
-		params->max_eager_freeze_failure_rate *
+		params.max_eager_freeze_failure_rate *
 		EAGER_SCAN_REGION_SIZE;
 
 	/*
@@ -612,7 +612,7 @@ heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params)
  *		and locked the relation.
  */
 void
-heap_vacuum_rel(Relation rel, VacuumParams *params,
+heap_vacuum_rel(Relation rel, const VacuumParams params,
 				BufferAccessStrategy bstrategy)
 {
 	LVRelState *vacrel;
@@ -634,9 +634,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	ErrorContextCallback errcallback;
 	char	  **indnames = NULL;
 
-	verbose = (params->options & VACOPT_VERBOSE) != 0;
+	verbose = (params.options & VACOPT_VERBOSE) != 0;
 	instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
-							  params->log_min_duration >= 0));
+							  params.log_min_duration >= 0));
 	if (instrument)
 	{
 		pg_rusage_init(&ru0);
@@ -699,9 +699,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	 * The truncate param allows user to avoid attempting relation truncation,
 	 * though it can't force truncation to happen.
 	 */
-	Assert(params->index_cleanup != VACOPTVALUE_UNSPECIFIED);
-	Assert(params->truncate != VACOPTVALUE_UNSPECIFIED &&
-		   params->truncate != VACOPTVALUE_AUTO);
+	Assert(params.index_cleanup != VACOPTVALUE_UNSPECIFIED);
+	Assert(params.truncate != VACOPTVALUE_UNSPECIFIED &&
+		   params.truncate != VACOPTVALUE_AUTO);
 
 	/*
 	 * While VacuumFailSafeActive is reset to false before calling this, we
@@ -711,14 +711,14 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	vacrel->consider_bypass_optimization = true;
 	vacrel->do_index_vacuuming = true;
 	vacrel->do_index_cleanup = true;
-	vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
-	if (params->index_cleanup == VACOPTVALUE_DISABLED)
+	vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
+	if (params.index_cleanup == VACOPTVALUE_DISABLED)
 	{
 		/* Force disable index vacuuming up-front */
 		vacrel->do_index_vacuuming = false;
 		vacrel->do_index_cleanup = false;
 	}
-	else if (params->index_cleanup == VACOPTVALUE_ENABLED)
+	else if (params.index_cleanup == VACOPTVALUE_ENABLED)
 	{
 		/* Force index vacuuming.  Note that failsafe can still bypass. */
 		vacrel->consider_bypass_optimization = false;
@@ -726,7 +726,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	else
 	{
 		/* Default/auto, make all decisions dynamically */
-		Assert(params->index_cleanup == VACOPTVALUE_AUTO);
+		Assert(params.index_cleanup == VACOPTVALUE_AUTO);
 	}
 
 	/* Initialize page counters explicitly (be tidy) */
@@ -757,7 +757,6 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	vacrel->vm_new_visible_pages = 0;
 	vacrel->vm_new_visible_frozen_pages = 0;
 	vacrel->vm_new_frozen_pages = 0;
-	vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
 
 	/*
 	 * Get cutoffs that determine which deleted tuples are considered DEAD,
@@ -776,7 +775,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	 * to increase the number of dead tuples it can prune away.)
 	 */
 	vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
+	vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
 	vacrel->vistest = GlobalVisTestFor(rel);
+
 	/* Initialize state used to track oldest extant XID/MXID */
 	vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
 	vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
@@ -788,7 +789,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	 */
 	vacrel->skippedallvis = false;
 	skipwithvm = true;
-	if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
+	if (params.options & VACOPT_DISABLE_PAGE_SKIPPING)
 	{
 		/*
 		 * Force aggressive mode, and disable skipping blocks using the
@@ -829,7 +830,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	 * is already dangerously old.)
 	 */
 	lazy_check_wraparound_failsafe(vacrel);
-	dead_items_alloc(vacrel, params->nworkers);
+	dead_items_alloc(vacrel, params.nworkers);
 
 	/*
 	 * Call lazy_scan_heap to perform all required heap pruning, index
@@ -946,9 +947,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	{
 		TimestampTz endtime = GetCurrentTimestamp();
 
-		if (verbose || params->log_min_duration == 0 ||
+		if (verbose || params.log_min_duration == 0 ||
 			TimestampDifferenceExceeds(starttime, endtime,
-									   params->log_min_duration))
+									   params.log_min_duration))
 		{
 			long		secs_dur;
 			int			usecs_dur;
@@ -983,10 +984,10 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 				 * Aggressiveness already reported earlier, in dedicated
 				 * VACUUM VERBOSE ereport
 				 */
-				Assert(!params->is_wraparound);
+				Assert(!params.is_wraparound);
 				msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
 			}
-			else if (params->is_wraparound)
+			else if (params.is_wraparound)
 			{
 				/*
 				 * While it's possible for a VACUUM to be both is_wraparound
@@ -1244,6 +1245,7 @@ lazy_scan_heap(LVRelState *vacrel)
 		Buffer		buf;
 		Page		page;
 		uint8		blk_info = 0;
+		int			ndeleted = 0;
 		bool		has_lpdead_items;
 		void	   *per_buffer_data = NULL;
 		bool		vm_page_frozen = false;
@@ -1386,10 +1388,10 @@ lazy_scan_heap(LVRelState *vacrel)
 		 * line pointers previously marked LP_DEAD.
 		 */
 		if (got_cleanup_lock)
-			lazy_scan_prune(vacrel, buf, blkno, page,
-							vmbuffer,
-							blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM,
-							&has_lpdead_items, &vm_page_frozen);
+			ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
+									   vmbuffer,
+									   blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM,
+									   &has_lpdead_items, &vm_page_frozen);
 
 		/*
 		 * Count an eagerly scanned page as a failure or a success.
@@ -1413,12 +1415,26 @@ lazy_scan_heap(LVRelState *vacrel)
 
 			if (vm_page_frozen)
 			{
-				Assert(vacrel->eager_scan_remaining_successes > 0);
-				vacrel->eager_scan_remaining_successes--;
+				if (vacrel->eager_scan_remaining_successes > 0)
+					vacrel->eager_scan_remaining_successes--;
 
 				if (vacrel->eager_scan_remaining_successes == 0)
 				{
 					/*
+					 * Report only once that we disabled eager scanning. We
+					 * may eagerly read ahead blocks in excess of the success
+					 * or failure caps before attempting to freeze them, so we
+					 * could reach here even after disabling additional eager
+					 * scanning.
+					 */
+					if (vacrel->eager_scan_max_fails_per_region > 0)
+						ereport(vacrel->verbose ? INFO : DEBUG2,
+								(errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
+										orig_eager_scan_success_limit,
+										vacrel->dbname, vacrel->relnamespace,
+										vacrel->relname)));
+
+					/*
 					 * If we hit our success cap, permanently disable eager
 					 * scanning by setting the other eager scan management
 					 * fields to their disabled values.
@@ -1426,19 +1442,10 @@ lazy_scan_heap(LVRelState *vacrel)
 					vacrel->eager_scan_remaining_fails = 0;
 					vacrel->next_eager_scan_region_start = InvalidBlockNumber;
 					vacrel->eager_scan_max_fails_per_region = 0;
-
-					ereport(vacrel->verbose ? INFO : DEBUG2,
-							(errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of \"%s.%s.%s\"",
-									orig_eager_scan_success_limit,
-									vacrel->dbname, vacrel->relnamespace,
-									vacrel->relname)));
 				}
 			}
-			else
-			{
-				Assert(vacrel->eager_scan_remaining_fails > 0);
+			else if (vacrel->eager_scan_remaining_fails > 0)
 				vacrel->eager_scan_remaining_fails--;
-			}
 		}
 
 		/*
@@ -1475,7 +1482,7 @@ lazy_scan_heap(LVRelState *vacrel)
 			 * table has indexes. There will only be newly-freed space if we
 			 * held the cleanup lock and lazy_scan_prune() was called.
 			 */
-			if (got_cleanup_lock && vacrel->nindexes == 0 && has_lpdead_items &&
+			if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
 				blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
 			{
 				FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
@@ -1866,8 +1873,6 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
 		 */
 		if (!PageIsAllVisible(page))
 		{
-			uint8		old_vmbits;
-
 			START_CRIT_SECTION();
 
 			/* mark buffer dirty before writing a WAL record */
@@ -1887,24 +1892,16 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
 				log_newpage_buffer(buf, true);
 
 			PageSetAllVisible(page);
-			old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
-										   InvalidXLogRecPtr,
-										   vmbuffer, InvalidTransactionId,
-										   VISIBILITYMAP_ALL_VISIBLE |
-										   VISIBILITYMAP_ALL_FROZEN);
+			visibilitymap_set(vacrel->rel, blkno, buf,
+							  InvalidXLogRecPtr,
+							  vmbuffer, InvalidTransactionId,
+							  VISIBILITYMAP_ALL_VISIBLE |
+							  VISIBILITYMAP_ALL_FROZEN);
 			END_CRIT_SECTION();
 
-			/*
-			 * If the page wasn't already set all-visible and/or all-frozen in
-			 * the VM, count it as newly set for logging.
-			 */
-			if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
-			{
-				vacrel->vm_new_visible_pages++;
-				vacrel->vm_new_visible_frozen_pages++;
-			}
-			else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0)
-				vacrel->vm_new_frozen_pages++;
+			/* Count the newly all-frozen pages for logging */
+			vacrel->vm_new_visible_pages++;
+			vacrel->vm_new_visible_frozen_pages++;
 		}
 
 		freespace = PageGetHeapFreeSpace(page);
@@ -1940,8 +1937,10 @@ cmpOffsetNumbers(const void *a, const void *b)
  * *vm_page_frozen is set to true if the page is newly set all-frozen in the
  * VM. The caller currently only uses this for determining whether an eagerly
  * scanned page was successfully set all-frozen.
+ *
+ * Returns the number of tuples deleted from the page during HOT pruning.
  */
-static void
+static int
 lazy_scan_prune(LVRelState *vacrel,
 				Buffer buf,
 				BlockNumber blkno,
@@ -2212,6 +2211,8 @@ lazy_scan_prune(LVRelState *vacrel,
 			*vm_page_frozen = true;
 		}
 	}
+
+	return presult.ndeleted;
 }
 
 /*
@@ -2909,7 +2910,6 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
 	if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
 								 &all_frozen))
 	{
-		uint8		old_vmbits;
 		uint8		flags = VISIBILITYMAP_ALL_VISIBLE;
 
 		if (all_frozen)
@@ -2919,25 +2919,15 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
 		}
 
 		PageSetAllVisible(page);
-		old_vmbits = visibilitymap_set(vacrel->rel, blkno, buffer,
-									   InvalidXLogRecPtr,
-									   vmbuffer, visibility_cutoff_xid,
-									   flags);
-
-		/*
-		 * If the page wasn't already set all-visible and/or all-frozen in the
-		 * VM, count it as newly set for logging.
-		 */
-		if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
-		{
-			vacrel->vm_new_visible_pages++;
-			if (all_frozen)
-				vacrel->vm_new_visible_frozen_pages++;
-		}
+		visibilitymap_set(vacrel->rel, blkno, buffer,
+						  InvalidXLogRecPtr,
+						  vmbuffer, visibility_cutoff_xid,
+						  flags);
 
-		else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
-				 all_frozen)
-			vacrel->vm_new_frozen_pages++;
+		/* Count the newly set VM page for logging */
+		vacrel->vm_new_visible_pages++;
+		if (all_frozen)
+			vacrel->vm_new_visible_frozen_pages++;
 	}
 
 	/* Revert to the previous phase information for error traceback */
diff --git a/src/backend/access/nbtree/nbtpreprocesskeys.c b/src/backend/access/nbtree/nbtpreprocesskeys.c
index a136e4bbfdf..21c519cd108 100644
--- a/src/backend/access/nbtree/nbtpreprocesskeys.c
+++ b/src/backend/access/nbtree/nbtpreprocesskeys.c
@@ -16,6 +16,7 @@
 #include "postgres.h"
 
 #include "access/nbtree.h"
+#include "common/int.h"
 #include "lib/qunique.h"
 #include "utils/array.h"
 #include "utils/lsyscache.h"
@@ -56,6 +57,8 @@ static void _bt_skiparray_strat_decrement(IndexScanDesc scan, ScanKey arraysk,
 										  BTArrayKeyInfo *array);
 static void _bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk,
 										  BTArrayKeyInfo *array);
+static void _bt_unmark_keys(IndexScanDesc scan, int *keyDataMap);
+static int	_bt_reorder_array_cmp(const void *a, const void *b);
 static ScanKey _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys);
 static void _bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap);
 static int	_bt_num_array_keys(IndexScanDesc scan, Oid *skip_eq_ops_out,
@@ -96,7 +99,7 @@ static int	_bt_compare_array_elements(const void *a, const void *b, void *arg);
  * incomplete sets of cross-type operators, we may fail to detect redundant
  * or contradictory keys, but we can survive that.)
  *
- * The output keys must be sorted by index attribute.  Presently we expect
+ * Required output keys are sorted by index attribute.  Presently we expect
  * (but verify) that the input keys are already so sorted --- this is done
  * by match_clauses_to_index() in indxpath.c.  Some reordering of the keys
  * within each attribute may be done as a byproduct of the processing here.
@@ -127,29 +130,36 @@ static int	_bt_compare_array_elements(const void *a, const void *b, void *arg);
  * This has the potential to be much more efficient than a full index scan
  * (though it behaves like a full scan when there's many distinct "x" values).
  *
- * If possible, redundant keys are eliminated: we keep only the tightest
+ * Typically, redundant keys are eliminated: we keep only the tightest
  * >/>= bound and the tightest </<= bound, and if there's an = key then
  * that's the only one returned.  (So, we return either a single = key,
  * or one or two boundary-condition keys for each attr.)  However, if we
  * cannot compare two keys for lack of a suitable cross-type operator,
- * we cannot eliminate either.  If there are two such keys of the same
- * operator strategy, the second one is just pushed into the output array
- * without further processing here.  We may also emit both >/>= or both
- * </<= keys if we can't compare them.  The logic about required keys still
- * works if we don't eliminate redundant keys.
- *
- * Note that one reason we need direction-sensitive required-key flags is
- * precisely that we may not be able to eliminate redundant keys.  Suppose
- * we have "x > 4::int AND x > 10::bigint", and we are unable to determine
- * which key is more restrictive for lack of a suitable cross-type operator.
- * _bt_first will arbitrarily pick one of the keys to do the initial
- * positioning with.  If it picks x > 4, then the x > 10 condition will fail
- * until we reach index entries > 10; but we can't stop the scan just because
- * x > 10 is failing.  On the other hand, if we are scanning backwards, then
- * failure of either key is indeed enough to stop the scan.  (In general, when
- * inequality keys are present, the initial-positioning code only promises to
- * position before the first possible match, not exactly at the first match,
- * for a forward scan; or after the last match for a backward scan.)
+ * we cannot eliminate either key.
+ *
+ * When all redundant keys could not be eliminated, we'll output a key array
+ * that can more or less be treated as if it had no redundant keys.  Suppose
+ * we have "x > 4::int AND x > 10::bigint AND x < 70", and we are unable to
+ * determine which > key is more restrictive for lack of a suitable cross-type
+ * operator.  We'll arbitrarily pick one of the > keys; the other > key won't
+ * be marked required.  Obviously, the scan will be less efficient if we
+ * choose x > 4 over x > 10 -- but it can still largely proceed as if there
+ * was only a single > condition.  "x > 10" will be placed at the end of the
+ * so->keyData[] output array.  It'll always be evaluated last, after the keys
+ * that could be marked required in the usual way (after "x > 4 AND x < 70").
+ * This can sometimes result in so->keyData[] keys that aren't even in index
+ * attribute order (if the qual involves multiple attributes).  The scan's
+ * required keys will still be in attribute order, though, so it can't matter.
+ *
+ * This scheme ensures that _bt_first always uses the same set of keys at the
+ * start of a forwards scan as those _bt_checkkeys uses to determine when to
+ * end a similar backwards scan (and vice-versa).  _bt_advance_array_keys
+ * depends on this: it expects to be able to reliably predict what the next
+ * _bt_first call will do by testing whether _bt_checkkeys' routines report
+ * that the final tuple on the page is past the end of matches for the scan's
+ * keys with the scan direction flipped.  If it is (if continuescan=false),
+ * then it follows that calling _bt_first will, at a minimum, relocate the
+ * scan to the very next leaf page (in the current scan direction).
  *
  * As a byproduct of this work, we can detect contradictory quals such
  * as "x = 1 AND x > 2".  If we see that, we return so->qual_ok = false,
@@ -188,7 +198,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
 	int			numberOfEqualCols;
 	ScanKey		inkeys;
 	BTScanKeyPreproc xform[BTMaxStrategyNumber];
-	bool		test_result;
+	bool		test_result,
+				redundant_key_kept = false;
 	AttrNumber	attno;
 	ScanKey		arrayKeyData;
 	int		   *keyDataMap = NULL;
@@ -388,7 +399,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
 						xform[j].inkey = NULL;
 						xform[j].inkeyi = -1;
 					}
-					/* else, cannot determine redundancy, keep both keys */
+					else
+						redundant_key_kept = true;
 				}
 				/* track number of attrs for which we have "=" keys */
 				numberOfEqualCols++;
@@ -409,6 +421,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
 					else
 						xform[BTLessStrategyNumber - 1].inkey = NULL;
 				}
+				else
+					redundant_key_kept = true;
 			}
 
 			/* try to keep only one of >, >= */
@@ -426,6 +440,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
 					else
 						xform[BTGreaterStrategyNumber - 1].inkey = NULL;
 				}
+				else
+					redundant_key_kept = true;
 			}
 
 			/*
@@ -466,25 +482,6 @@ _bt_preprocess_keys(IndexScanDesc scan)
 		/* check strategy this key's operator corresponds to */
 		j = inkey->sk_strategy - 1;
 
-		/* if row comparison, push it directly to the output array */
-		if (inkey->sk_flags & SK_ROW_HEADER)
-		{
-			ScanKey		outkey = &so->keyData[new_numberOfKeys++];
-
-			memcpy(outkey, inkey, sizeof(ScanKeyData));
-			if (arrayKeyData)
-				keyDataMap[new_numberOfKeys - 1] = i;
-			if (numberOfEqualCols == attno - 1)
-				_bt_mark_scankey_required(outkey);
-
-			/*
-			 * We don't support RowCompare using equality; such a qual would
-			 * mess up the numberOfEqualCols tracking.
-			 */
-			Assert(j != (BTEqualStrategyNumber - 1));
-			continue;
-		}
-
 		if (inkey->sk_strategy == BTEqualStrategyNumber &&
 			(inkey->sk_flags & SK_SEARCHARRAY))
 		{
@@ -593,9 +590,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
 				 * the new scan key.
 				 *
 				 * Note: We do things this way around so that our arrays are
-				 * always in the same order as their corresponding scan keys,
-				 * even with incomplete opfamilies.  _bt_advance_array_keys
-				 * depends on this.
+				 * always in the same order as their corresponding scan keys.
+				 * _bt_preprocess_array_keys_final expects this.
 				 */
 				ScanKey		outkey = &so->keyData[new_numberOfKeys++];
 
@@ -607,6 +603,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
 				xform[j].inkey = inkey;
 				xform[j].inkeyi = i;
 				xform[j].arrayidx = arrayidx;
+				redundant_key_kept = true;
 			}
 		}
 	}
@@ -622,6 +619,15 @@ _bt_preprocess_keys(IndexScanDesc scan)
 	if (arrayKeyData)
 		_bt_preprocess_array_keys_final(scan, keyDataMap);
 
+	/*
+	 * If there are remaining redundant inequality keys, we must make sure
+	 * that each index attribute has no more than one required >/>= key, and
+	 * no more than one required </<= key.  Attributes that have one or more
+	 * required = keys now must keep only one required key (the first = key).
+	 */
+	if (unlikely(redundant_key_kept) && so->qual_ok)
+		_bt_unmark_keys(scan, keyDataMap);
+
 	/* Could pfree arrayKeyData/keyDataMap now, but not worth the cycles */
 }
 
@@ -746,9 +752,12 @@ _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption)
  *
  * Depending on the operator type, the key may be required for both scan
  * directions or just one.  Also, if the key is a row comparison header,
- * we have to mark its first subsidiary ScanKey as required.  (Subsequent
- * subsidiary ScanKeys are normally for lower-order columns, and thus
- * cannot be required, since they're after the first non-equality scankey.)
+ * we have to mark the appropriate subsidiary ScanKeys as required.  In such
+ * cases, the first subsidiary key is required, but subsequent ones are
+ * required only as long as they correspond to successive index columns and
+ * match the leading column as to sort direction.  Otherwise the row
+ * comparison ordering is different from the index ordering and so we can't
+ * stop the scan on the basis of those lower-order columns.
  *
  * Note: when we set required-key flag bits in a subsidiary scankey, we are
  * scribbling on a data structure belonging to the index AM's caller, not on
@@ -786,12 +795,25 @@ _bt_mark_scankey_required(ScanKey skey)
 	if (skey->sk_flags & SK_ROW_HEADER)
 	{
 		ScanKey		subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
+		AttrNumber	attno = skey->sk_attno;
 
 		/* First subkey should be same column/operator as the header */
-		Assert(subkey->sk_flags & SK_ROW_MEMBER);
-		Assert(subkey->sk_attno == skey->sk_attno);
+		Assert(subkey->sk_attno == attno);
 		Assert(subkey->sk_strategy == skey->sk_strategy);
-		subkey->sk_flags |= addflags;
+
+		for (;;)
+		{
+			Assert(subkey->sk_flags & SK_ROW_MEMBER);
+			if (subkey->sk_attno != attno)
+				break;			/* non-adjacent key, so not required */
+			if (subkey->sk_strategy != skey->sk_strategy)
+				break;			/* wrong direction, so not required */
+			subkey->sk_flags |= addflags;
+			if (subkey->sk_flags & SK_ROW_END)
+				break;
+			subkey++;
+			attno++;
+		}
 	}
 }
 
@@ -847,8 +869,7 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
 				cmp_op;
 	StrategyNumber strat;
 
-	Assert(!((leftarg->sk_flags | rightarg->sk_flags) &
-			 (SK_ROW_HEADER | SK_ROW_MEMBER)));
+	Assert(!((leftarg->sk_flags | rightarg->sk_flags) & SK_ROW_MEMBER));
 
 	/*
 	 * First, deal with cases where one or both args are NULL.  This should
@@ -925,6 +946,16 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
 	}
 
 	/*
+	 * We don't yet know how to determine redundancy when it involves a row
+	 * compare key (barring simple cases involving IS NULL/IS NOT NULL)
+	 */
+	if ((leftarg->sk_flags | rightarg->sk_flags) & SK_ROW_HEADER)
+	{
+		Assert(!((leftarg->sk_flags | rightarg->sk_flags) & SK_BT_SKIP));
+		return false;
+	}
+
+	/*
 	 * If either leftarg or rightarg are equality-type array scankeys, we need
 	 * specialized handling (since by now we know that IS NULL wasn't used)
 	 */
@@ -1468,6 +1499,283 @@ _bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk,
 }
 
 /*
+ *	_bt_unmark_keys() -- make superfluous required keys nonrequired after all
+ *
+ * When _bt_preprocess_keys fails to eliminate one or more redundant keys, it
+ * calls here to make sure that no index attribute has more than one > or >=
+ * key marked required, and no more than one required < or <= key.  Attributes
+ * with = keys will always get one = key as their required key.  All other
+ * keys that were initially marked required get "unmarked" here.  That way,
+ * _bt_first and _bt_checkkeys will reliably agree on which keys to use to
+ * start and/or to end the scan.
+ *
+ * We also relocate keys that become/started out nonrequired to the end of
+ * so->keyData[].  That way, _bt_first and _bt_checkkeys cannot fail to reach
+ * a required key due to some earlier nonrequired key getting in the way.
+ *
+ * Only call here when _bt_compare_scankey_args returned false at least once
+ * (otherwise, calling here will just waste cycles).
+ */
+static void
+_bt_unmark_keys(IndexScanDesc scan, int *keyDataMap)
+{
+	BTScanOpaque so = (BTScanOpaque) scan->opaque;
+	AttrNumber	attno;
+	bool	   *unmarkikey;
+	int			nunmark,
+				nunmarked,
+				nkept,
+				firsti;
+	ScanKey		keepKeys,
+				unmarkKeys;
+	FmgrInfo   *keepOrderProcs = NULL,
+			   *unmarkOrderProcs = NULL;
+	bool		haveReqEquals,
+				haveReqForward,
+				haveReqBackward;
+
+	/*
+	 * Do an initial pass over so->keyData[] that determines which keys to
+	 * keep as required.  We expect so->keyData[] to still be in attribute
+	 * order when we're called (though we don't expect any particular order
+	 * among each attribute's keys).
+	 *
+	 * When both equality and inequality keys remain on a single attribute, we
+	 * *must* make sure that exactly one of the equalities remains required.
+	 * Any requiredness markings that we might leave on later keys/attributes
+	 * are predicated on there being required = keys on all prior columns.
+	 */
+	unmarkikey = palloc0(so->numberOfKeys * sizeof(bool));
+	nunmark = 0;
+
+	/* Set things up for first key's attribute */
+	attno = so->keyData[0].sk_attno;
+	firsti = 0;
+	haveReqEquals = false;
+	haveReqForward = false;
+	haveReqBackward = false;
+	for (int i = 0; i < so->numberOfKeys; i++)
+	{
+		ScanKey		origkey = &so->keyData[i];
+
+		if (origkey->sk_attno != attno)
+		{
+			/* Reset for next attribute */
+			attno = origkey->sk_attno;
+			firsti = i;
+
+			haveReqEquals = false;
+			haveReqForward = false;
+			haveReqBackward = false;
+		}
+
+		/* Equalities get priority over inequalities */
+		if (haveReqEquals)
+		{
+			/*
+			 * We already found the first "=" key for this attribute.  We've
+			 * already decided that all its other keys will be unmarked.
+			 */
+			Assert(!(origkey->sk_flags & SK_SEARCHNULL));
+			unmarkikey[i] = true;
+			nunmark++;
+			continue;
+		}
+		else if ((origkey->sk_flags & SK_BT_REQFWD) &&
+				 (origkey->sk_flags & SK_BT_REQBKWD))
+		{
+			/*
+			 * Found the first "=" key for attno.  All other attno keys will
+			 * be unmarked.
+			 */
+			Assert(origkey->sk_strategy == BTEqualStrategyNumber);
+
+			haveReqEquals = true;
+			for (int j = firsti; j < i; j++)
+			{
+				/* Unmark any prior inequality keys on attno after all */
+				if (!unmarkikey[j])
+				{
+					unmarkikey[j] = true;
+					nunmark++;
+				}
+			}
+			continue;
+		}
+
+		/* Deal with inequalities next */
+		if ((origkey->sk_flags & SK_BT_REQFWD) && !haveReqForward)
+		{
+			haveReqForward = true;
+			continue;
+		}
+		else if ((origkey->sk_flags & SK_BT_REQBKWD) && !haveReqBackward)
+		{
+			haveReqBackward = true;
+			continue;
+		}
+
+		/*
+		 * We have either a redundant inequality key that will be unmarked, or
+		 * we have a key that wasn't marked required in the first place
+		 */
+		unmarkikey[i] = true;
+		nunmark++;
+	}
+
+	/* Should only be called when _bt_compare_scankey_args reported failure */
+	Assert(nunmark > 0);
+
+	/*
+	 * Next, allocate temp arrays: one for required keys that'll remain
+	 * required, the other for all remaining keys
+	 */
+	unmarkKeys = palloc(nunmark * sizeof(ScanKeyData));
+	keepKeys = palloc((so->numberOfKeys - nunmark) * sizeof(ScanKeyData));
+	nunmarked = 0;
+	nkept = 0;
+	if (so->numArrayKeys)
+	{
+		unmarkOrderProcs = palloc(nunmark * sizeof(FmgrInfo));
+		keepOrderProcs = palloc((so->numberOfKeys - nunmark) * sizeof(FmgrInfo));
+	}
+
+	/*
+	 * Next, copy the contents of so->keyData[] into the appropriate temp
+	 * array.
+	 *
+	 * Scans with = array keys need us to maintain invariants around the order
+	 * of so->orderProcs[] and so->arrayKeys[] relative to so->keyData[].  See
+	 * _bt_preprocess_array_keys_final for a full explanation.
+	 */
+	for (int i = 0; i < so->numberOfKeys; i++)
+	{
+		ScanKey		origkey = &so->keyData[i];
+		ScanKey		unmark;
+
+		if (!unmarkikey[i])
+		{
+			/*
+			 * Key gets to keep its original requiredness markings.
+			 *
+			 * Key will stay in its original position, unless we're going to
+			 * unmark an earlier key (in which case this key gets moved back).
+			 */
+			memcpy(keepKeys + nkept, origkey, sizeof(ScanKeyData));
+
+			if (so->numArrayKeys)
+			{
+				keyDataMap[i] = nkept;
+				memcpy(keepOrderProcs + nkept, &so->orderProcs[i],
+					   sizeof(FmgrInfo));
+			}
+
+			nkept++;
+			continue;
+		}
+
+		/*
+		 * Key will be unmarked as needed, and moved to the end of the array,
+		 * next to other keys that will become (or always were) nonrequired
+		 */
+		unmark = unmarkKeys + nunmarked;
+		memcpy(unmark, origkey, sizeof(ScanKeyData));
+
+		if (so->numArrayKeys)
+		{
+			keyDataMap[i] = (so->numberOfKeys - nunmark) + nunmarked;
+			memcpy(&unmarkOrderProcs[nunmarked], &so->orderProcs[i],
+				   sizeof(FmgrInfo));
+		}
+
+		/*
+		 * Preprocessing only generates skip arrays when it knows that they'll
+		 * be the only required = key on the attr.  We'll never unmark them.
+		 */
+		Assert(!(unmark->sk_flags & SK_BT_SKIP));
+
+		/*
+		 * Also shouldn't have to unmark an IS NULL or an IS NOT NULL key.
+		 * They aren't cross-type, so an incomplete opfamily can't matter.
+		 */
+		Assert(!(unmark->sk_flags & SK_ISNULL) ||
+			   !(unmark->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)));
+
+		/* Clear requiredness flags on redundant key (and on any subkeys) */
+		unmark->sk_flags &= ~(SK_BT_REQFWD | SK_BT_REQBKWD);
+		if (unmark->sk_flags & SK_ROW_HEADER)
+		{
+			ScanKey		subkey = (ScanKey) DatumGetPointer(unmark->sk_argument);
+
+			Assert(subkey->sk_strategy == unmark->sk_strategy);
+			for (;;)
+			{
+				Assert(subkey->sk_flags & SK_ROW_MEMBER);
+				subkey->sk_flags &= ~(SK_BT_REQFWD | SK_BT_REQBKWD);
+				if (subkey->sk_flags & SK_ROW_END)
+					break;
+				subkey++;
+			}
+		}
+
+		nunmarked++;
+	}
+
+	/* Copy both temp arrays back into so->keyData[] to reorder */
+	Assert(nkept == so->numberOfKeys - nunmark);
+	Assert(nunmarked == nunmark);
+	memcpy(so->keyData, keepKeys, sizeof(ScanKeyData) * nkept);
+	memcpy(so->keyData + nkept, unmarkKeys, sizeof(ScanKeyData) * nunmarked);
+
+	/* Done with temp arrays */
+	pfree(unmarkikey);
+	pfree(keepKeys);
+	pfree(unmarkKeys);
+
+	/*
+	 * Now copy so->orderProcs[] temp entries needed by scans with = array
+	 * keys back (just like with the so->keyData[] temp arrays)
+	 */
+	if (so->numArrayKeys)
+	{
+		memcpy(so->orderProcs, keepOrderProcs, sizeof(FmgrInfo) * nkept);
+		memcpy(so->orderProcs + nkept, unmarkOrderProcs,
+			   sizeof(FmgrInfo) * nunmarked);
+
+		/* Also fix-up array->scan_key references */
+		for (int arridx = 0; arridx < so->numArrayKeys; arridx++)
+		{
+			BTArrayKeyInfo *array = &so->arrayKeys[arridx];
+
+			array->scan_key = keyDataMap[array->scan_key];
+		}
+
+		/*
+		 * Sort so->arrayKeys[] based on its new BTArrayKeyInfo.scan_key
+		 * offsets, so that its order matches so->keyData[] order as expected
+		 */
+		qsort(so->arrayKeys, so->numArrayKeys, sizeof(BTArrayKeyInfo),
+			  _bt_reorder_array_cmp);
+
+		/* Done with temp arrays */
+		pfree(unmarkOrderProcs);
+		pfree(keepOrderProcs);
+	}
+}
+
+/*
+ * qsort comparator for reordering so->arrayKeys[] BTArrayKeyInfo entries
+ */
+static int
+_bt_reorder_array_cmp(const void *a, const void *b)
+{
+	BTArrayKeyInfo *arraya = (BTArrayKeyInfo *) a;
+	BTArrayKeyInfo *arrayb = (BTArrayKeyInfo *) b;
+
+	return pg_cmp_s32(arraya->scan_key, arrayb->scan_key);
+}
+
+/*
  *	_bt_preprocess_array_keys() -- Preprocess SK_SEARCHARRAY scan keys
  *
  * If there are any SK_SEARCHARRAY scan keys, deconstruct the array(s) and
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 765659887af..fdff960c130 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -228,6 +228,8 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
 	BTScanOpaque so = (BTScanOpaque) scan->opaque;
 	bool		res;
 
+	Assert(scan->heapRelation != NULL);
+
 	/* btree indexes are never lossy */
 	scan->xs_recheck = false;
 
@@ -289,6 +291,8 @@ btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 	int64		ntids = 0;
 	ItemPointer heapTid;
 
+	Assert(scan->heapRelation == NULL);
+
 	/* Each loop iteration performs another primitive index scan */
 	do
 	{
@@ -393,6 +397,34 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
 		BTScanPosInvalidate(so->currPos);
 	}
 
+	/*
+	 * We prefer to eagerly drop leaf page pins before btgettuple returns.
+	 * This avoids making VACUUM wait to acquire a cleanup lock on the page.
+	 *
+	 * We cannot safely drop leaf page pins during index-only scans due to a
+	 * race condition involving VACUUM setting pages all-visible in the VM.
+	 * It's also unsafe for plain index scans that use a non-MVCC snapshot.
+	 *
+	 * When we drop pins eagerly, the mechanism that marks so->killedItems[]
+	 * index tuples LP_DEAD has to deal with concurrent TID recycling races.
+	 * The scheme used to detect unsafe TID recycling won't work when scanning
+	 * unlogged relations (since it involves saving an affected page's LSN).
+	 * Opt out of eager pin dropping during unlogged relation scans for now
+	 * (this is preferable to opting out of kill_prior_tuple LP_DEAD setting).
+	 *
+	 * Also opt out of dropping leaf page pins eagerly during bitmap scans.
+	 * Pins cannot be held for more than an instant during bitmap scans either
+	 * way, so we might as well avoid wasting cycles on acquiring page LSNs.
+	 *
+	 * See nbtree/README section on making concurrent TID recycling safe.
+	 *
+	 * Note: so->dropPin should never change across rescans.
+	 */
+	so->dropPin = (!scan->xs_want_itup &&
+				   IsMVCCSnapshot(scan->xs_snapshot) &&
+				   RelationNeedsWAL(scan->indexRelation) &&
+				   scan->heapRelation != NULL);
+
 	so->markItemIndex = -1;
 	so->needPrimScan = false;
 	so->scanBehind = false;
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index fe9a3886913..d69798795b4 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -25,7 +25,7 @@
 #include "utils/rel.h"
 
 
-static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp);
+static inline void _bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so);
 static Buffer _bt_moveright(Relation rel, Relation heaprel, BTScanInsert key,
 							Buffer buf, bool forupdate, BTStack stack,
 							int access);
@@ -57,24 +57,29 @@ static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
 /*
  *	_bt_drop_lock_and_maybe_pin()
  *
- * Unlock the buffer; and if it is safe to release the pin, do that, too.
- * This will prevent vacuum from stalling in a blocked state trying to read a
- * page when a cursor is sitting on it.
- *
- * See nbtree/README section on making concurrent TID recycling safe.
+ * Unlock so->currPos.buf.  If scan is so->dropPin, drop the pin, too.
+ * Dropping the pin prevents VACUUM from blocking on acquiring a cleanup lock.
  */
-static void
-_bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp)
+static inline void
+_bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so)
 {
-	_bt_unlockbuf(scan->indexRelation, sp->buf);
-
-	if (IsMVCCSnapshot(scan->xs_snapshot) &&
-		RelationNeedsWAL(scan->indexRelation) &&
-		!scan->xs_want_itup)
+	if (!so->dropPin)
 	{
-		ReleaseBuffer(sp->buf);
-		sp->buf = InvalidBuffer;
+		/* Just drop the lock (not the pin) */
+		_bt_unlockbuf(rel, so->currPos.buf);
+		return;
 	}
+
+	/*
+	 * Drop both the lock and the pin.
+	 *
+	 * Have to set so->currPos.lsn so that _bt_killitems has a way to detect
+	 * when concurrent heap TID recycling by VACUUM might have taken place.
+	 */
+	Assert(RelationNeedsWAL(rel));
+	so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf);
+	_bt_relbuf(rel, so->currPos.buf);
+	so->currPos.buf = InvalidBuffer;
 }
 
 /*
@@ -866,8 +871,8 @@ _bt_compare(Relation rel,
  *		if backwards scan, the last item) in the tree that satisfies the
  *		qualifications in the scan key.  On success exit, data about the
  *		matching tuple(s) on the page has been loaded into so->currPos.  We'll
- *		drop all locks and hold onto a pin on page's buffer, except when
- *		_bt_drop_lock_and_maybe_pin dropped the pin to avoid blocking VACUUM.
+ *		drop all locks and hold onto a pin on page's buffer, except during
+ *		so->dropPin scans, when we drop both the lock and the pin.
  *		_bt_returnitem sets the next item to return to scan on success exit.
  *
  * If there are no matching items in the index, we return false, with no
@@ -887,9 +892,9 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 	OffsetNumber offnum;
 	BTScanInsertData inskey;
 	ScanKey		startKeys[INDEX_MAX_KEYS];
-	ScanKeyData notnullkeys[INDEX_MAX_KEYS];
+	ScanKeyData notnullkey;
 	int			keysz = 0;
-	StrategyNumber strat_total;
+	StrategyNumber strat_total = InvalidStrategy;
 	BlockNumber blkno = InvalidBlockNumber,
 				lastcurrblkno;
 
@@ -955,46 +960,51 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 
 	/*----------
 	 * Examine the scan keys to discover where we need to start the scan.
+	 * The selected scan keys (at most one per index column) are remembered by
+	 * storing their addresses into the local startKeys[] array.  The final
+	 * startKeys[] entry's strategy is set in strat_total. (Actually, there
+	 * are a couple of cases where we force a less/more restrictive strategy.)
 	 *
-	 * We want to identify the keys that can be used as starting boundaries;
-	 * these are =, >, or >= keys for a forward scan or =, <, <= keys for
-	 * a backwards scan.  We can use keys for multiple attributes so long as
-	 * the prior attributes had only =, >= (resp. =, <=) keys.  Once we accept
-	 * a > or < boundary or find an attribute with no boundary (which can be
-	 * thought of as the same as "> -infinity"), we can't use keys for any
-	 * attributes to its right, because it would break our simplistic notion
-	 * of what initial positioning strategy to use.
+	 * We must use the key that was marked required (in the direction opposite
+	 * our own scan's) during preprocessing.  Each index attribute can only
+	 * have one such required key.  In general, the keys that we use to find
+	 * an initial position when scanning forwards are the same keys that end
+	 * the scan on the leaf level when scanning backwards (and vice-versa).
 	 *
 	 * When the scan keys include cross-type operators, _bt_preprocess_keys
-	 * may not be able to eliminate redundant keys; in such cases we will
-	 * arbitrarily pick a usable one for each attribute.  This is correct
-	 * but possibly not optimal behavior.  (For example, with keys like
-	 * "x >= 4 AND x >= 5" we would elect to scan starting at x=4 when
-	 * x=5 would be more efficient.)  Since the situation only arises given
-	 * a poorly-worded query plus an incomplete opfamily, live with it.
+	 * may not be able to eliminate redundant keys; in such cases it will
+	 * arbitrarily pick a usable key for each attribute (and scan direction),
+	 * ensuring that there is no more than one key required in each direction.
+	 * We stop considering further keys once we reach the first nonrequired
+	 * key (which must come after all required keys), so this can't affect us.
+	 *
+	 * The required keys that we use as starting boundaries have to be =, >,
+	 * or >= keys for a forward scan or =, <, <= keys for a backwards scan.
+	 * We can use keys for multiple attributes so long as the prior attributes
+	 * had only =, >= (resp. =, <=) keys.  These rules are very similar to the
+	 * rules that preprocessing used to determine which keys to mark required.
+	 * We cannot always use every required key as a positioning key, though.
+	 * Skip arrays necessitate independently applying our own rules here.
+	 * Skip arrays are always generally considered = array keys, but we'll
+	 * nevertheless treat them as inequalities at certain points of the scan.
+	 * When that happens, it _might_ have implications for the number of
+	 * required keys that we can safely use for initial positioning purposes.
 	 *
-	 * When both equality and inequality keys appear for a single attribute
-	 * (again, only possible when cross-type operators appear), we *must*
-	 * select one of the equality keys for the starting point, because
-	 * _bt_checkkeys() will stop the scan as soon as an equality qual fails.
-	 * For example, if we have keys like "x >= 4 AND x = 10" and we elect to
-	 * start at x=4, we will fail and stop before reaching x=10.  If multiple
-	 * equality quals survive preprocessing, however, it doesn't matter which
-	 * one we use --- by definition, they are either redundant or
-	 * contradictory.
+	 * For example, a forward scan with a skip array on its leading attribute
+	 * (with no low_compare/high_compare) will have at least two required scan
+	 * keys, but we won't use any of them as boundary keys during the scan's
+	 * initial call here.  Our positioning key during the first call here can
+	 * be thought of as representing "> -infinity".  Similarly, if such a skip
+	 * array's low_compare is "a > 'foo'", then we position using "a > 'foo'"
+	 * during the scan's initial call here; a lower-order key such as "b = 42"
+	 * can't be used until the "a" array advances beyond MINVAL/low_compare.
 	 *
-	 * In practice we rarely see any "attribute boundary key gaps" here.
-	 * Preprocessing can usually backfill skip array keys for any attributes
-	 * that were omitted from the original scan->keyData[] input keys.  All
-	 * array keys are always considered = keys, but we'll sometimes need to
-	 * treat the current key value as if we were using an inequality strategy.
-	 * This happens with range skip arrays, which store inequality keys in the
-	 * array's low_compare/high_compare fields (used to find the first/last
-	 * set of matches, when = key will lack a usable sk_argument value).
-	 * These are always preferred over any redundant "standard" inequality
-	 * keys on the same column (per the usual rule about preferring = keys).
-	 * Note also that any column with an = skip array key can never have an
-	 * additional, contradictory = key.
+	 * On the other hand, if such a skip array's low_compare was "a >= 'foo'",
+	 * then we _can_ use "a >= 'foo' AND b = 42" during the initial call here.
+	 * A subsequent call here might have us use "a = 'fop' AND b = 42".  Note
+	 * that we treat = and >= as equivalent when scanning forwards (just as we
+	 * treat = and <= as equivalent when scanning backwards).  We effectively
+	 * do the same thing (though with a distinct "a" element/value) each time.
 	 *
 	 * All keys (with the exception of SK_SEARCHNULL keys and SK_BT_SKIP
 	 * array keys whose array is "null_elem=true") imply a NOT NULL qualifier.
@@ -1006,41 +1016,38 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 	 * traversing a lot of null entries at the start of the scan.
 	 *
 	 * In this loop, row-comparison keys are treated the same as keys on their
-	 * first (leftmost) columns.  We'll add on lower-order columns of the row
-	 * comparison below, if possible.
+	 * first (leftmost) columns.  We'll add all lower-order columns of the row
+	 * comparison that were marked required during preprocessing below.
 	 *
-	 * The selected scan keys (at most one per index column) are remembered by
-	 * storing their addresses into the local startKeys[] array.
-	 *
-	 * _bt_checkkeys/_bt_advance_array_keys decide whether and when to start
-	 * the next primitive index scan (for scans with array keys) based in part
-	 * on an understanding of how it'll enable us to reposition the scan.
-	 * They're directly aware of how we'll sometimes cons up an explicit
-	 * SK_SEARCHNOTNULL key.  They'll even end primitive scans by applying a
-	 * symmetric "deduce NOT NULL" rule of their own.  This allows top-level
-	 * scans to skip large groups of NULLs through repeated deductions about
-	 * key strictness (for a required inequality key) and whether NULLs in the
-	 * key's index column are stored last or first (relative to non-NULLs).
+	 * _bt_advance_array_keys needs to know exactly how we'll reposition the
+	 * scan (should it opt to schedule another primitive index scan).  It is
+	 * critical that primscans only be scheduled when they'll definitely make
+	 * some useful progress.  _bt_advance_array_keys does this by calling
+	 * _bt_checkkeys routines that report whether a tuple is past the end of
+	 * matches for the scan's keys (given the scan's current array elements).
+	 * If the page's final tuple is "after the end of matches" for a scan that
+	 * uses the *opposite* scan direction, then it must follow that it's also
+	 * "before the start of matches" for the actual current scan direction.
+	 * It is therefore essential that all of our initial positioning rules are
+	 * symmetric with _bt_checkkeys's corresponding continuescan=false rule.
 	 * If you update anything here, _bt_checkkeys/_bt_advance_array_keys might
 	 * need to be kept in sync.
 	 *----------
 	 */
-	strat_total = BTEqualStrategyNumber;
 	if (so->numberOfKeys > 0)
 	{
 		AttrNumber	curattr;
-		ScanKey		chosen;
+		ScanKey		bkey;
 		ScanKey		impliesNN;
 		ScanKey		cur;
 
 		/*
-		 * chosen is the so-far-chosen key for the current attribute, if any.
-		 * We don't cast the decision in stone until we reach keys for the
-		 * next attribute.
+		 * bkey will be set to the key that preprocessing left behind as the
+		 * boundary key for this attribute, in this scan direction (if any)
 		 */
 		cur = so->keyData;
 		curattr = 1;
-		chosen = NULL;
+		bkey = NULL;
 		/* Also remember any scankey that implies a NOT NULL constraint */
 		impliesNN = NULL;
 
@@ -1053,23 +1060,29 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 		{
 			if (i >= so->numberOfKeys || cur->sk_attno != curattr)
 			{
+				/* Done looking for the curattr boundary key */
+				Assert(bkey == NULL ||
+					   (bkey->sk_attno == curattr &&
+						(bkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))));
+				Assert(impliesNN == NULL ||
+					   (impliesNN->sk_attno == curattr &&
+						(impliesNN->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))));
+
 				/*
-				 * Done looking at keys for curattr.
-				 *
 				 * If this is a scan key for a skip array whose current
 				 * element is MINVAL, choose low_compare (when scanning
 				 * backwards it'll be MAXVAL, and we'll choose high_compare).
 				 *
-				 * Note: if the array's low_compare key makes 'chosen' NULL,
+				 * Note: if the array's low_compare key makes 'bkey' NULL,
 				 * then we behave as if the array's first element is -inf,
 				 * except when !array->null_elem implies a usable NOT NULL
 				 * constraint.
 				 */
-				if (chosen != NULL &&
-					(chosen->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL)))
+				if (bkey != NULL &&
+					(bkey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL)))
 				{
-					int			ikey = chosen - so->keyData;
-					ScanKey		skipequalitykey = chosen;
+					int			ikey = bkey - so->keyData;
+					ScanKey		skipequalitykey = bkey;
 					BTArrayKeyInfo *array = NULL;
 
 					for (int arridx = 0; arridx < so->numArrayKeys; arridx++)
@@ -1082,42 +1095,41 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 					if (ScanDirectionIsForward(dir))
 					{
 						Assert(!(skipequalitykey->sk_flags & SK_BT_MAXVAL));
-						chosen = array->low_compare;
+						bkey = array->low_compare;
 					}
 					else
 					{
 						Assert(!(skipequalitykey->sk_flags & SK_BT_MINVAL));
-						chosen = array->high_compare;
+						bkey = array->high_compare;
 					}
 
-					Assert(chosen == NULL ||
-						   chosen->sk_attno == skipequalitykey->sk_attno);
+					Assert(bkey == NULL ||
+						   bkey->sk_attno == skipequalitykey->sk_attno);
 
 					if (!array->null_elem)
 						impliesNN = skipequalitykey;
 					else
-						Assert(chosen == NULL && impliesNN == NULL);
+						Assert(bkey == NULL && impliesNN == NULL);
 				}
 
 				/*
 				 * If we didn't find a usable boundary key, see if we can
 				 * deduce a NOT NULL key
 				 */
-				if (chosen == NULL && impliesNN != NULL &&
+				if (bkey == NULL && impliesNN != NULL &&
 					((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
 					 ScanDirectionIsForward(dir) :
 					 ScanDirectionIsBackward(dir)))
 				{
-					/* Yes, so build the key in notnullkeys[keysz] */
-					chosen = &notnullkeys[keysz];
-					ScanKeyEntryInitialize(chosen,
+					/* Final startKeys[] entry will be deduced NOT NULL key */
+					bkey = &notnullkey;
+					ScanKeyEntryInitialize(bkey,
 										   (SK_SEARCHNOTNULL | SK_ISNULL |
 											(impliesNN->sk_flags &
 											 (SK_BT_DESC | SK_BT_NULLS_FIRST))),
 										   curattr,
-										   ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
-											BTGreaterStrategyNumber :
-											BTLessStrategyNumber),
+										   ScanDirectionIsForward(dir) ?
+										   BTGreaterStrategyNumber : BTLessStrategyNumber,
 										   InvalidOid,
 										   InvalidOid,
 										   InvalidOid,
@@ -1125,12 +1137,12 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 				}
 
 				/*
-				 * If we still didn't find a usable boundary key, quit; else
-				 * save the boundary key pointer in startKeys.
+				 * If preprocessing didn't leave a usable boundary key, quit;
+				 * else save the boundary key pointer in startKeys[]
 				 */
-				if (chosen == NULL)
+				if (bkey == NULL)
 					break;
-				startKeys[keysz++] = chosen;
+				startKeys[keysz++] = bkey;
 
 				/*
 				 * We can only consider adding more boundary keys when the one
@@ -1138,7 +1150,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 				 * (during backwards scans we can only do so when the key that
 				 * we just added to startKeys[] uses the = or <= strategy)
 				 */
-				strat_total = chosen->sk_strategy;
+				strat_total = bkey->sk_strategy;
 				if (strat_total == BTGreaterStrategyNumber ||
 					strat_total == BTLessStrategyNumber)
 					break;
@@ -1149,19 +1161,19 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 				 * make strat_total > or < (and stop adding boundary keys).
 				 * This can only happen with opclasses that lack skip support.
 				 */
-				if (chosen->sk_flags & (SK_BT_NEXT | SK_BT_PRIOR))
+				if (bkey->sk_flags & (SK_BT_NEXT | SK_BT_PRIOR))
 				{
-					Assert(chosen->sk_flags & SK_BT_SKIP);
+					Assert(bkey->sk_flags & SK_BT_SKIP);
 					Assert(strat_total == BTEqualStrategyNumber);
 
 					if (ScanDirectionIsForward(dir))
 					{
-						Assert(!(chosen->sk_flags & SK_BT_PRIOR));
+						Assert(!(bkey->sk_flags & SK_BT_PRIOR));
 						strat_total = BTGreaterStrategyNumber;
 					}
 					else
 					{
-						Assert(!(chosen->sk_flags & SK_BT_NEXT));
+						Assert(!(bkey->sk_flags & SK_BT_NEXT));
 						strat_total = BTLessStrategyNumber;
 					}
 
@@ -1175,24 +1187,30 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 
 				/*
 				 * Done if that was the last scan key output by preprocessing.
-				 * Also done if there is a gap index attribute that lacks a
-				 * usable key (only possible when preprocessing was unable to
-				 * generate a skip array key to "fill in the gap").
+				 * Also done if we've now examined all keys marked required.
 				 */
 				if (i >= so->numberOfKeys ||
-					cur->sk_attno != curattr + 1)
+					!(cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
 					break;
 
 				/*
 				 * Reset for next attr.
 				 */
+				Assert(cur->sk_attno == curattr + 1);
 				curattr = cur->sk_attno;
-				chosen = NULL;
+				bkey = NULL;
 				impliesNN = NULL;
 			}
 
 			/*
-			 * Can we use this key as a starting boundary for this attr?
+			 * If we've located the starting boundary key for curattr, we have
+			 * no interest in curattr's other required key
+			 */
+			if (bkey != NULL)
+				continue;
+
+			/*
+			 * Is this key the starting boundary key for curattr?
 			 *
 			 * If not, does it imply a NOT NULL constraint?  (Because
 			 * SK_SEARCHNULL keys are always assigned BTEqualStrategyNumber,
@@ -1202,27 +1220,20 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 			{
 				case BTLessStrategyNumber:
 				case BTLessEqualStrategyNumber:
-					if (chosen == NULL)
-					{
-						if (ScanDirectionIsBackward(dir))
-							chosen = cur;
-						else
-							impliesNN = cur;
-					}
+					if (ScanDirectionIsBackward(dir))
+						bkey = cur;
+					else if (impliesNN == NULL)
+						impliesNN = cur;
 					break;
 				case BTEqualStrategyNumber:
-					/* override any non-equality choice */
-					chosen = cur;
+					bkey = cur;
 					break;
 				case BTGreaterEqualStrategyNumber:
 				case BTGreaterStrategyNumber:
-					if (chosen == NULL)
-					{
-						if (ScanDirectionIsForward(dir))
-							chosen = cur;
-						else
-							impliesNN = cur;
-					}
+					if (ScanDirectionIsForward(dir))
+						bkey = cur;
+					else if (impliesNN == NULL)
+						impliesNN = cur;
 					break;
 			}
 		}
@@ -1248,16 +1259,18 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 	Assert(keysz <= INDEX_MAX_KEYS);
 	for (int i = 0; i < keysz; i++)
 	{
-		ScanKey		cur = startKeys[i];
+		ScanKey		bkey = startKeys[i];
 
-		Assert(cur->sk_attno == i + 1);
+		Assert(bkey->sk_attno == i + 1);
 
-		if (cur->sk_flags & SK_ROW_HEADER)
+		if (bkey->sk_flags & SK_ROW_HEADER)
 		{
 			/*
 			 * Row comparison header: look to the first row member instead
 			 */
-			ScanKey		subkey = (ScanKey) DatumGetPointer(cur->sk_argument);
+			ScanKey		subkey = (ScanKey) DatumGetPointer(bkey->sk_argument);
+			bool		loosen_strat = false,
+						tighten_strat = false;
 
 			/*
 			 * Cannot be a NULL in the first row member: _bt_preprocess_keys
@@ -1265,122 +1278,160 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 			 * ever getting this far
 			 */
 			Assert(subkey->sk_flags & SK_ROW_MEMBER);
-			Assert(subkey->sk_attno == cur->sk_attno);
+			Assert(subkey->sk_attno == bkey->sk_attno);
 			Assert(!(subkey->sk_flags & SK_ISNULL));
 
 			/*
+			 * This is either a > or >= key (during backwards scans it is
+			 * either < or <=) that was marked required during preprocessing.
+			 * Later so->keyData[] keys can't have been marked required, so
+			 * our row compare header key must be the final startKeys[] entry.
+			 */
+			Assert(subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD));
+			Assert(i == keysz - 1);
+
+			/*
 			 * The member scankeys are already in insertion format (ie, they
 			 * have sk_func = 3-way-comparison function)
 			 */
 			memcpy(inskey.scankeys + i, subkey, sizeof(ScanKeyData));
 
 			/*
-			 * If the row comparison is the last positioning key we accepted,
-			 * try to add additional keys from the lower-order row members.
-			 * (If we accepted independent conditions on additional index
-			 * columns, we use those instead --- doesn't seem worth trying to
-			 * determine which is more restrictive.)  Note that this is OK
-			 * even if the row comparison is of ">" or "<" type, because the
-			 * condition applied to all but the last row member is effectively
-			 * ">=" or "<=", and so the extra keys don't break the positioning
-			 * scheme.  But, by the same token, if we aren't able to use all
-			 * the row members, then the part of the row comparison that we
-			 * did use has to be treated as just a ">=" or "<=" condition, and
-			 * so we'd better adjust strat_total accordingly.
+			 * Now look to later row compare members.
+			 *
+			 * If there's an "index attribute gap" between two row compare
+			 * members, the second member won't have been marked required, and
+			 * so can't be used as a starting boundary key here.  The part of
+			 * the row comparison that we do still use has to be treated as a
+			 * ">=" or "<=" condition.  For example, a qual "(a, c) > (1, 42)"
+			 * with an omitted intervening index attribute "b" will use an
+			 * insertion scan key "a >= 1".  Even the first "a = 1" tuple on
+			 * the leaf level might satisfy the row compare qual.
+			 *
+			 * We're able to use a _more_ restrictive strategy when we reach a
+			 * NULL row compare member, since they're always unsatisfiable.
+			 * For example, a qual "(a, b, c) >= (1, NULL, 77)" will use an
+			 * insertion scan key "a > 1".  All tuples where "a = 1" cannot
+			 * possibly satisfy the row compare qual, so this is safe.
 			 */
-			if (i == keysz - 1)
+			Assert(!(subkey->sk_flags & SK_ROW_END));
+			for (;;)
 			{
-				bool		used_all_subkeys = false;
+				subkey++;
+				Assert(subkey->sk_flags & SK_ROW_MEMBER);
 
-				Assert(!(subkey->sk_flags & SK_ROW_END));
-				for (;;)
+				if (subkey->sk_flags & SK_ISNULL)
 				{
-					subkey++;
-					Assert(subkey->sk_flags & SK_ROW_MEMBER);
-					if (subkey->sk_attno != keysz + 1)
-						break;	/* out-of-sequence, can't use it */
-					if (subkey->sk_strategy != cur->sk_strategy)
-						break;	/* wrong direction, can't use it */
-					if (subkey->sk_flags & SK_ISNULL)
-						break;	/* can't use null keys */
-					Assert(keysz < INDEX_MAX_KEYS);
-					memcpy(inskey.scankeys + keysz, subkey,
-						   sizeof(ScanKeyData));
-					keysz++;
-					if (subkey->sk_flags & SK_ROW_END)
-					{
-						used_all_subkeys = true;
-						break;
-					}
+					/*
+					 * NULL member key, can only use earlier keys.
+					 *
+					 * We deliberately avoid checking if this key is marked
+					 * required.  All earlier keys are required, and this key
+					 * is unsatisfiable either way, so we can't miss anything.
+					 */
+					tighten_strat = true;
+					break;
 				}
-				if (!used_all_subkeys)
+
+				if (!(subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
 				{
-					switch (strat_total)
-					{
-						case BTLessStrategyNumber:
-							strat_total = BTLessEqualStrategyNumber;
-							break;
-						case BTGreaterStrategyNumber:
-							strat_total = BTGreaterEqualStrategyNumber;
-							break;
-					}
+					/* nonrequired member key, can only use earlier keys */
+					loosen_strat = true;
+					break;
 				}
-				break;			/* done with outer loop */
+
+				Assert(subkey->sk_attno == keysz + 1);
+				Assert(subkey->sk_strategy == bkey->sk_strategy);
+				Assert(keysz < INDEX_MAX_KEYS);
+
+				memcpy(inskey.scankeys + keysz, subkey,
+					   sizeof(ScanKeyData));
+				keysz++;
+				if (subkey->sk_flags & SK_ROW_END)
+					break;
 			}
-		}
-		else
-		{
-			/*
-			 * Ordinary comparison key.  Transform the search-style scan key
-			 * to an insertion scan key by replacing the sk_func with the
-			 * appropriate btree comparison function.
-			 *
-			 * If scankey operator is not a cross-type comparison, we can use
-			 * the cached comparison function; otherwise gotta look it up in
-			 * the catalogs.  (That can't lead to infinite recursion, since no
-			 * indexscan initiated by syscache lookup will use cross-data-type
-			 * operators.)
-			 *
-			 * We support the convention that sk_subtype == InvalidOid means
-			 * the opclass input type; this is a hack to simplify life for
-			 * ScanKeyInit().
-			 */
-			if (cur->sk_subtype == rel->rd_opcintype[i] ||
-				cur->sk_subtype == InvalidOid)
+			Assert(!(loosen_strat && tighten_strat));
+			if (loosen_strat)
 			{
-				FmgrInfo   *procinfo;
-
-				procinfo = index_getprocinfo(rel, cur->sk_attno, BTORDER_PROC);
-				ScanKeyEntryInitializeWithInfo(inskey.scankeys + i,
-											   cur->sk_flags,
-											   cur->sk_attno,
-											   InvalidStrategy,
-											   cur->sk_subtype,
-											   cur->sk_collation,
-											   procinfo,
-											   cur->sk_argument);
+				/* Use less restrictive strategy (and fewer member keys) */
+				switch (strat_total)
+				{
+					case BTLessStrategyNumber:
+						strat_total = BTLessEqualStrategyNumber;
+						break;
+					case BTGreaterStrategyNumber:
+						strat_total = BTGreaterEqualStrategyNumber;
+						break;
+				}
 			}
-			else
+			if (tighten_strat)
 			{
-				RegProcedure cmp_proc;
-
-				cmp_proc = get_opfamily_proc(rel->rd_opfamily[i],
-											 rel->rd_opcintype[i],
-											 cur->sk_subtype,
-											 BTORDER_PROC);
-				if (!RegProcedureIsValid(cmp_proc))
-					elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"",
-						 BTORDER_PROC, rel->rd_opcintype[i], cur->sk_subtype,
-						 cur->sk_attno, RelationGetRelationName(rel));
-				ScanKeyEntryInitialize(inskey.scankeys + i,
-									   cur->sk_flags,
-									   cur->sk_attno,
-									   InvalidStrategy,
-									   cur->sk_subtype,
-									   cur->sk_collation,
-									   cmp_proc,
-									   cur->sk_argument);
+				/* Use more restrictive strategy (and fewer member keys) */
+				switch (strat_total)
+				{
+					case BTLessEqualStrategyNumber:
+						strat_total = BTLessStrategyNumber;
+						break;
+					case BTGreaterEqualStrategyNumber:
+						strat_total = BTGreaterStrategyNumber;
+						break;
+				}
 			}
+
+			/* done adding to inskey (row comparison keys always come last) */
+			break;
+		}
+
+		/*
+		 * Ordinary comparison key/search-style key.
+		 *
+		 * Transform the search-style scan key to an insertion scan key by
+		 * replacing the sk_func with the appropriate btree 3-way-comparison
+		 * function.
+		 *
+		 * If scankey operator is not a cross-type comparison, we can use the
+		 * cached comparison function; otherwise gotta look it up in the
+		 * catalogs.  (That can't lead to infinite recursion, since no
+		 * indexscan initiated by syscache lookup will use cross-data-type
+		 * operators.)
+		 *
+		 * We support the convention that sk_subtype == InvalidOid means the
+		 * opclass input type; this hack simplifies life for ScanKeyInit().
+		 */
+		if (bkey->sk_subtype == rel->rd_opcintype[i] ||
+			bkey->sk_subtype == InvalidOid)
+		{
+			FmgrInfo   *procinfo;
+
+			procinfo = index_getprocinfo(rel, bkey->sk_attno, BTORDER_PROC);
+			ScanKeyEntryInitializeWithInfo(inskey.scankeys + i,
+										   bkey->sk_flags,
+										   bkey->sk_attno,
+										   InvalidStrategy,
+										   bkey->sk_subtype,
+										   bkey->sk_collation,
+										   procinfo,
+										   bkey->sk_argument);
+		}
+		else
+		{
+			RegProcedure cmp_proc;
+
+			cmp_proc = get_opfamily_proc(rel->rd_opfamily[i],
+										 rel->rd_opcintype[i],
+										 bkey->sk_subtype, BTORDER_PROC);
+			if (!RegProcedureIsValid(cmp_proc))
+				elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"",
+					 BTORDER_PROC, rel->rd_opcintype[i], bkey->sk_subtype,
+					 bkey->sk_attno, RelationGetRelationName(rel));
+			ScanKeyEntryInitialize(inskey.scankeys + i,
+								   bkey->sk_flags,
+								   bkey->sk_attno,
+								   InvalidStrategy,
+								   bkey->sk_subtype,
+								   bkey->sk_collation,
+								   cmp_proc,
+								   bkey->sk_argument);
 		}
 	}
 
@@ -1469,6 +1520,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 
 	if (!BufferIsValid(so->currPos.buf))
 	{
+		Assert(!so->needPrimScan);
+
 		/*
 		 * We only get here if the index is completely empty. Lock relation
 		 * because nothing finer to lock exists.  Without a buffer lock, it's
@@ -1487,7 +1540,6 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
 
 		if (!BufferIsValid(so->currPos.buf))
 		{
-			Assert(!so->needPrimScan);
 			_bt_parallel_done(scan);
 			return false;
 		}
@@ -1610,7 +1662,13 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
 	so->currPos.currPage = BufferGetBlockNumber(so->currPos.buf);
 	so->currPos.prevPage = opaque->btpo_prev;
 	so->currPos.nextPage = opaque->btpo_next;
+	/* delay setting so->currPos.lsn until _bt_drop_lock_and_maybe_pin */
+	so->currPos.dir = dir;
+	so->currPos.nextTupleOffset = 0;
 
+	/* either moreRight or moreLeft should be set now (may be unset later) */
+	Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight :
+		   so->currPos.moreLeft);
 	Assert(!P_IGNORE(opaque));
 	Assert(BTScanPosIsPinned(so->currPos));
 	Assert(!so->needPrimScan);
@@ -1626,14 +1684,6 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
 								 so->currPos.currPage);
 	}
 
-	/* initialize remaining currPos fields related to current page */
-	so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf);
-	so->currPos.dir = dir;
-	so->currPos.nextTupleOffset = 0;
-	/* either moreLeft or moreRight should be set now (may be unset later) */
-	Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight :
-		   so->currPos.moreLeft);
-
 	PredicateLockPage(rel, so->currPos.currPage, scan->xs_snapshot);
 
 	/* initialize local variables */
@@ -2107,10 +2157,9 @@ _bt_returnitem(IndexScanDesc scan, BTScanOpaque so)
  *
  * Wrapper on _bt_readnextpage that performs final steps for the current page.
  *
- * On entry, if so->currPos.buf is valid the buffer is pinned but not locked.
- * If there's no pin held, it's because _bt_drop_lock_and_maybe_pin dropped
- * the pin eagerly earlier on.  The scan must have so->currPos.currPage set to
- * a valid block, in any case.
+ * On entry, so->currPos must be valid.  Its buffer will be pinned, though
+ * never locked. (Actually, when so->dropPin there won't even be a pin held,
+ * though so->currPos.currPage must still be set to a valid block number.)
  */
 static bool
 _bt_steppage(IndexScanDesc scan, ScanDirection dir)
@@ -2251,12 +2300,14 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
 	 */
 	if (_bt_readpage(scan, dir, offnum, true))
 	{
+		Relation	rel = scan->indexRelation;
+
 		/*
 		 * _bt_readpage succeeded.  Drop the lock (and maybe the pin) on
 		 * so->currPos.buf in preparation for btgettuple returning tuples.
 		 */
 		Assert(BTScanPosIsPinned(so->currPos));
-		_bt_drop_lock_and_maybe_pin(scan, &so->currPos);
+		_bt_drop_lock_and_maybe_pin(rel, so);
 		return true;
 	}
 
@@ -2278,9 +2329,12 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
  * previously-saved right link or left link.  lastcurrblkno is the page that
  * was current at the point where the blkno link was saved, which we use to
  * reason about concurrent page splits/page deletions during backwards scans.
+ * In the common case where seized=false, blkno is either so->currPos.nextPage
+ * or so->currPos.prevPage, and lastcurrblkno is so->currPos.currPage.
  *
- * On entry, caller shouldn't hold any locks or pins on any page (we work
- * directly off of blkno and lastcurrblkno instead).  Parallel scan callers
+ * On entry, so->currPos shouldn't be locked by caller.  so->currPos.buf must
+ * be InvalidBuffer/unpinned as needed by caller (note that lastcurrblkno
+ * won't need to be read again in almost all cases).  Parallel scan callers
  * that seized the scan before calling here should pass seized=true; such a
  * caller's blkno and lastcurrblkno arguments come from the seized scan.
  * seized=false callers just pass us the blkno/lastcurrblkno taken from their
@@ -2294,11 +2348,11 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
  *
  * On success exit, so->currPos is updated to contain data from the next
  * interesting page, and we return true.  We hold a pin on the buffer on
- * success exit, except when _bt_drop_lock_and_maybe_pin decided it was safe
- * to eagerly drop the pin (to avoid blocking VACUUM).
+ * success exit (except during so->dropPin index scans, when we drop the pin
+ * eagerly to avoid blocking VACUUM).
  *
- * If there are no more matching records in the given direction, we drop all
- * locks and pins, invalidate so->currPos, and return false.
+ * If there are no more matching records in the given direction, we invalidate
+ * so->currPos (while ensuring it retains no locks or pins), and return false.
  *
  * We always release the scan for a parallel scan caller, regardless of
  * success or failure; we'll call _bt_parallel_release as soon as possible.
@@ -2413,7 +2467,7 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno,
 	 */
 	Assert(so->currPos.currPage == blkno);
 	Assert(BTScanPosIsPinned(so->currPos));
-	_bt_drop_lock_and_maybe_pin(scan, &so->currPos);
+	_bt_drop_lock_and_maybe_pin(rel, so);
 
 	return true;
 }
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 3794cc924ad..9d70e89c1f3 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -105,7 +105,7 @@ typedef struct BTShared
 	int			scantuplesortstates;
 
 	/* Query ID, for report in worker processes */
-	uint64		queryid;
+	int64		queryid;
 
 	/*
 	 * workersdonecv is used to monitor the progress of workers.  All parallel
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index 1a15dfcb7d3..9aed207995f 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -44,7 +44,6 @@ static bool _bt_array_decrement(Relation rel, ScanKey skey, BTArrayKeyInfo *arra
 static bool _bt_array_increment(Relation rel, ScanKey skey, BTArrayKeyInfo *array);
 static bool _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir,
 											 bool *skip_array_set);
-static void _bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir);
 static bool _bt_tuple_before_array_skeys(IndexScanDesc scan, ScanDirection dir,
 										 IndexTuple tuple, TupleDesc tupdesc, int tupnatts,
 										 bool readpagetup, int sktrig, bool *scanBehind);
@@ -52,7 +51,6 @@ static bool _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
 								   IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
 								   int sktrig, bool sktrig_required);
 #ifdef USE_ASSERT_CHECKING
-static bool _bt_verify_arrays_bt_first(IndexScanDesc scan, ScanDirection dir);
 static bool _bt_verify_keys_with_arraykeys(IndexScanDesc scan);
 #endif
 static bool _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir,
@@ -1035,73 +1033,6 @@ _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir,
 }
 
 /*
- * _bt_rewind_nonrequired_arrays() -- Rewind SAOP arrays not marked required
- *
- * Called when _bt_advance_array_keys decides to start a new primitive index
- * scan on the basis of the current scan position being before the position
- * that _bt_first is capable of repositioning the scan to by applying an
- * inequality operator required in the opposite-to-scan direction only.
- *
- * Although equality strategy scan keys (for both arrays and non-arrays alike)
- * are either marked required in both directions or in neither direction,
- * there is a sense in which non-required arrays behave like required arrays.
- * With a qual such as "WHERE a IN (100, 200) AND b >= 3 AND c IN (5, 6, 7)",
- * the scan key on "c" is non-required, but nevertheless enables positioning
- * the scan at the first tuple >= "(100, 3, 5)" on the leaf level during the
- * first descent of the tree by _bt_first.  Later on, there could also be a
- * second descent, that places the scan right before tuples >= "(200, 3, 5)".
- * _bt_first must never be allowed to build an insertion scan key whose "c"
- * entry is set to a value other than 5, the "c" array's first element/value.
- * (Actually, it's the first in the current scan direction.  This example uses
- * a forward scan.)
- *
- * Calling here resets the array scan key elements for the scan's non-required
- * arrays.  This is strictly necessary for correctness in a subset of cases
- * involving "required in opposite direction"-triggered primitive index scans.
- * Not all callers are at risk of _bt_first using a non-required array like
- * this, but advancement always resets the arrays when another primitive scan
- * is scheduled, just to keep things simple.  Array advancement even makes
- * sure to reset non-required arrays during scans that have no inequalities.
- * (Advancement still won't call here when there are no inequalities, though
- * that's just because it's all handled indirectly instead.)
- *
- * Note: _bt_verify_arrays_bt_first is called by an assertion to enforce that
- * everybody got this right.
- *
- * Note: In practice almost all SAOP arrays are marked required during
- * preprocessing (if necessary by generating skip arrays).  It is hardly ever
- * truly necessary to call here, but consistently doing so is simpler.
- */
-static void
-_bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir)
-{
-	Relation	rel = scan->indexRelation;
-	BTScanOpaque so = (BTScanOpaque) scan->opaque;
-	int			arrayidx = 0;
-
-	for (int ikey = 0; ikey < so->numberOfKeys; ikey++)
-	{
-		ScanKey		cur = so->keyData + ikey;
-		BTArrayKeyInfo *array = NULL;
-
-		if (!(cur->sk_flags & SK_SEARCHARRAY) ||
-			cur->sk_strategy != BTEqualStrategyNumber)
-			continue;
-
-		array = &so->arrayKeys[arrayidx++];
-		Assert(array->scan_key == ikey);
-
-		if ((cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
-			continue;
-
-		Assert(array->num_elems != -1); /* No non-required skip arrays */
-
-		_bt_array_set_low_or_high(rel, cur, array,
-								  ScanDirectionIsForward(dir));
-	}
-}
-
-/*
  * _bt_tuple_before_array_skeys() -- too early to advance required arrays?
  *
  * We always compare the tuple using the current array keys (which we assume
@@ -1380,8 +1311,6 @@ _bt_start_prim_scan(IndexScanDesc scan, ScanDirection dir)
 	 */
 	if (so->needPrimScan)
 	{
-		Assert(_bt_verify_arrays_bt_first(scan, dir));
-
 		/*
 		 * Flag was set -- must call _bt_first again, which will reset the
 		 * scan's needPrimScan flag
@@ -2007,14 +1936,7 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
 	 */
 	else if (has_required_opposite_direction_only && pstate->finaltup &&
 			 unlikely(!_bt_oppodir_checkkeys(scan, dir, pstate->finaltup)))
-	{
-		/*
-		 * Make sure that any SAOP arrays that were not marked required by
-		 * preprocessing are reset to their first element for this direction
-		 */
-		_bt_rewind_nonrequired_arrays(scan, dir);
 		goto new_prim_scan;
-	}
 
 continue_scan:
 
@@ -2045,8 +1967,6 @@ continue_scan:
 		 */
 		so->oppositeDirCheck = has_required_opposite_direction_only;
 
-		_bt_rewind_nonrequired_arrays(scan, dir);
-
 		/*
 		 * skip by setting "look ahead" mechanism's offnum for forwards scans
 		 * (backwards scans check scanBehind flag directly instead)
@@ -2143,48 +2063,6 @@ end_toplevel_scan:
 
 #ifdef USE_ASSERT_CHECKING
 /*
- * Verify that the scan's qual state matches what we expect at the point that
- * _bt_start_prim_scan is about to start a just-scheduled new primitive scan.
- *
- * We enforce a rule against non-required array scan keys: they must start out
- * with whatever element is the first for the scan's current scan direction.
- * See _bt_rewind_nonrequired_arrays comments for an explanation.
- */
-static bool
-_bt_verify_arrays_bt_first(IndexScanDesc scan, ScanDirection dir)
-{
-	BTScanOpaque so = (BTScanOpaque) scan->opaque;
-	int			arrayidx = 0;
-
-	for (int ikey = 0; ikey < so->numberOfKeys; ikey++)
-	{
-		ScanKey		cur = so->keyData + ikey;
-		BTArrayKeyInfo *array = NULL;
-		int			first_elem_dir;
-
-		if (!(cur->sk_flags & SK_SEARCHARRAY) ||
-			cur->sk_strategy != BTEqualStrategyNumber)
-			continue;
-
-		array = &so->arrayKeys[arrayidx++];
-
-		if (((cur->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) ||
-			((cur->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)))
-			continue;
-
-		if (ScanDirectionIsForward(dir))
-			first_elem_dir = 0;
-		else
-			first_elem_dir = array->num_elems - 1;
-
-		if (array->cur_elem != first_elem_dir)
-			return false;
-	}
-
-	return _bt_verify_keys_with_arraykeys(scan);
-}
-
-/*
  * Verify that the scan's "so->keyData[]" scan keys are in agreement with
  * its array key state
  */
@@ -2194,6 +2072,7 @@ _bt_verify_keys_with_arraykeys(IndexScanDesc scan)
 	BTScanOpaque so = (BTScanOpaque) scan->opaque;
 	int			last_sk_attno = InvalidAttrNumber,
 				arrayidx = 0;
+	bool		nonrequiredseen = false;
 
 	if (!so->qual_ok)
 		return false;
@@ -2217,8 +2096,16 @@ _bt_verify_keys_with_arraykeys(IndexScanDesc scan)
 		if (array->num_elems != -1 &&
 			cur->sk_argument != array->elem_values[array->cur_elem])
 			return false;
-		if (last_sk_attno > cur->sk_attno)
-			return false;
+		if (cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))
+		{
+			if (last_sk_attno > cur->sk_attno)
+				return false;
+			if (nonrequiredseen)
+				return false;
+		}
+		else
+			nonrequiredseen = true;
+
 		last_sk_attno = cur->sk_attno;
 	}
 
@@ -2551,37 +2438,12 @@ _bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate)
 		if (!(key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
 		{
 			/* Scan key isn't marked required (corner case) */
-			Assert(!(key->sk_flags & SK_ROW_HEADER));
 			break;				/* unsafe */
 		}
 		if (key->sk_flags & SK_ROW_HEADER)
 		{
-			/*
-			 * RowCompare inequality.
-			 *
-			 * Only the first subkey from a RowCompare can ever be marked
-			 * required (that happens when the row header is marked required).
-			 * There is no simple, general way for us to transitively deduce
-			 * whether or not every tuple on the page satisfies a RowCompare
-			 * key based only on firsttup and lasttup -- so we just give up.
-			 */
-			if (!start_past_saop_eq && !so->skipScan)
-				break;			/* unsafe to go further */
-
-			/*
-			 * We have to be even more careful with RowCompares that come
-			 * after an array: we assume it's unsafe to even bypass the array.
-			 * Calling _bt_start_array_keys to recover the scan's arrays
-			 * following use of forcenonrequired mode isn't compatible with
-			 * _bt_check_rowcompare's continuescan=false behavior with NULL
-			 * row compare members.  _bt_advance_array_keys must not make a
-			 * decision on the basis of a key not being satisfied in the
-			 * opposite-to-scan direction until the scan reaches a leaf page
-			 * where the same key begins to be satisfied in scan direction.
-			 * The _bt_first !used_all_subkeys behavior makes this limitation
-			 * hard to work around some other way.
-			 */
-			return;				/* completely unsafe to set pstate.startikey */
+			/* RowCompare inequalities currently aren't supported */
+			break;				/* "unsafe" */
 		}
 		if (key->sk_strategy != BTEqualStrategyNumber)
 		{
@@ -3078,6 +2940,31 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 
 		Assert(subkey->sk_flags & SK_ROW_MEMBER);
 
+		/* When a NULL row member is compared, the row never matches */
+		if (subkey->sk_flags & SK_ISNULL)
+		{
+			/*
+			 * Unlike the simple-scankey case, this isn't a disallowed case
+			 * (except when it's the first row element that has the NULL arg).
+			 * But it can never match.  If all the earlier row comparison
+			 * columns are required for the scan direction, we can stop the
+			 * scan, because there can't be another tuple that will succeed.
+			 */
+			Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument));
+			subkey--;
+			if (forcenonrequired)
+			{
+				/* treating scan's keys as non-required */
+			}
+			else if ((subkey->sk_flags & SK_BT_REQFWD) &&
+					 ScanDirectionIsForward(dir))
+				*continuescan = false;
+			else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
+					 ScanDirectionIsBackward(dir))
+				*continuescan = false;
+			return false;
+		}
+
 		if (subkey->sk_attno > tupnatts)
 		{
 			/*
@@ -3087,11 +2974,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 			 * attribute passes the qual.
 			 */
 			Assert(BTreeTupleIsPivot(tuple));
-			cmpresult = 0;
-			if (subkey->sk_flags & SK_ROW_END)
-				break;
-			subkey++;
-			continue;
+			return true;
 		}
 
 		datum = index_getattr(tuple,
@@ -3101,6 +2984,8 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 
 		if (isNull)
 		{
+			int			reqflags;
+
 			if (forcenonrequired)
 			{
 				/* treating scan's keys as non-required */
@@ -3111,15 +2996,35 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 				 * Since NULLs are sorted before non-NULLs, we know we have
 				 * reached the lower limit of the range of values for this
 				 * index attr.  On a backward scan, we can stop if this qual
-				 * is one of the "must match" subset.  We can stop regardless
-				 * of whether the qual is > or <, so long as it's required,
-				 * because it's not possible for any future tuples to pass. On
-				 * a forward scan, however, we must keep going, because we may
-				 * have initially positioned to the start of the index.
-				 * (_bt_advance_array_keys also relies on this behavior during
-				 * forward scans.)
+				 * is one of the "must match" subset.  However, on a forwards
+				 * scan, we must keep going, because we may have initially
+				 * positioned to the start of the index.
+				 *
+				 * All required NULLS FIRST > row members can use NULL tuple
+				 * values to end backwards scans, just like with other values.
+				 * A qual "WHERE (a, b, c) > (9, 42, 'foo')" can terminate a
+				 * backwards scan upon reaching the index's rightmost "a = 9"
+				 * tuple whose "b" column contains a NULL (if not sooner).
+				 * Since "b" is NULLS FIRST, we can treat its NULLs as "<" 42.
 				 */
-				if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
+				reqflags = SK_BT_REQBKWD;
+
+				/*
+				 * When a most significant required NULLS FIRST < row compare
+				 * member sees NULL tuple values during a backwards scan, it
+				 * signals the end of matches for the whole row compare/scan.
+				 * A qual "WHERE (a, b, c) < (9, 42, 'foo')" will terminate a
+				 * backwards scan upon reaching the rightmost tuple whose "a"
+				 * column has a NULL.  The "a" NULL value is "<" 9, and yet
+				 * our < row compare will still end the scan.  (This isn't
+				 * safe with later/lower-order row members.  Notice that it
+				 * can only happen with an "a" NULL some time after the scan
+				 * completely stops needing to use its "b" and "c" members.)
+				 */
+				if (subkey == (ScanKey) DatumGetPointer(skey->sk_argument))
+					reqflags |= SK_BT_REQFWD;	/* safe, first row member */
+
+				if ((subkey->sk_flags & reqflags) &&
 					ScanDirectionIsBackward(dir))
 					*continuescan = false;
 			}
@@ -3129,15 +3034,35 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 				 * Since NULLs are sorted after non-NULLs, we know we have
 				 * reached the upper limit of the range of values for this
 				 * index attr.  On a forward scan, we can stop if this qual is
-				 * one of the "must match" subset.  We can stop regardless of
-				 * whether the qual is > or <, so long as it's required,
-				 * because it's not possible for any future tuples to pass. On
-				 * a backward scan, however, we must keep going, because we
-				 * may have initially positioned to the end of the index.
-				 * (_bt_advance_array_keys also relies on this behavior during
-				 * backward scans.)
+				 * one of the "must match" subset.  However, on a backward
+				 * scan, we must keep going, because we may have initially
+				 * positioned to the end of the index.
+				 *
+				 * All required NULLS LAST < row members can use NULL tuple
+				 * values to end forwards scans, just like with other values.
+				 * A qual "WHERE (a, b, c) < (9, 42, 'foo')" can terminate a
+				 * forwards scan upon reaching the index's leftmost "a = 9"
+				 * tuple whose "b" column contains a NULL (if not sooner).
+				 * Since "b" is NULLS LAST, we can treat its NULLs as ">" 42.
+				 */
+				reqflags = SK_BT_REQFWD;
+
+				/*
+				 * When a most significant required NULLS LAST > row compare
+				 * member sees NULL tuple values during a forwards scan, it
+				 * signals the end of matches for the whole row compare/scan.
+				 * A qual "WHERE (a, b, c) > (9, 42, 'foo')" will terminate a
+				 * forwards scan upon reaching the leftmost tuple whose "a"
+				 * column has a NULL.  The "a" NULL value is ">" 9, and yet
+				 * our > row compare will end the scan.  (This isn't safe with
+				 * later/lower-order row members.  Notice that it can only
+				 * happen with an "a" NULL some time after the scan completely
+				 * stops needing to use its "b" and "c" members.)
 				 */
-				if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
+				if (subkey == (ScanKey) DatumGetPointer(skey->sk_argument))
+					reqflags |= SK_BT_REQBKWD;	/* safe, first row member */
+
+				if ((subkey->sk_flags & reqflags) &&
 					ScanDirectionIsForward(dir))
 					*continuescan = false;
 			}
@@ -3148,30 +3073,6 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 			return false;
 		}
 
-		if (subkey->sk_flags & SK_ISNULL)
-		{
-			/*
-			 * Unlike the simple-scankey case, this isn't a disallowed case
-			 * (except when it's the first row element that has the NULL arg).
-			 * But it can never match.  If all the earlier row comparison
-			 * columns are required for the scan direction, we can stop the
-			 * scan, because there can't be another tuple that will succeed.
-			 */
-			Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument));
-			subkey--;
-			if (forcenonrequired)
-			{
-				/* treating scan's keys as non-required */
-			}
-			else if ((subkey->sk_flags & SK_BT_REQFWD) &&
-					 ScanDirectionIsForward(dir))
-				*continuescan = false;
-			else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
-					 ScanDirectionIsBackward(dir))
-				*continuescan = false;
-			return false;
-		}
-
 		/* Perform the test --- three-way comparison not bool operator */
 		cmpresult = DatumGetInt32(FunctionCall2Coll(&subkey->sk_func,
 													subkey->sk_collation,
@@ -3330,87 +3231,85 @@ _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate,
  * current page and killed tuples thereon (generally, this should only be
  * called if so->numKilled > 0).
  *
- * The caller does not have a lock on the page and may or may not have the
- * page pinned in a buffer.  Note that read-lock is sufficient for setting
- * LP_DEAD status (which is only a hint).
- *
- * We match items by heap TID before assuming they are the right ones to
- * delete.  We cope with cases where items have moved right due to insertions.
- * If an item has moved off the current page due to a split, we'll fail to
- * find it and do nothing (this is not an error case --- we assume the item
- * will eventually get marked in a future indexscan).
+ * Caller should not have a lock on the so->currPos page, but must hold a
+ * buffer pin when !so->dropPin.  When we return, it still won't be locked.
+ * It'll continue to hold whatever pins were held before calling here.
  *
- * Note that if we hold a pin on the target page continuously from initially
- * reading the items until applying this function, VACUUM cannot have deleted
- * any items from the page, and so there is no need to search left from the
- * recorded offset.  (This observation also guarantees that the item is still
- * the right one to delete, which might otherwise be questionable since heap
- * TIDs can get recycled.)	This holds true even if the page has been modified
- * by inserts and page splits, so there is no need to consult the LSN.
+ * We match items by heap TID before assuming they are the right ones to set
+ * LP_DEAD.  If the scan is one that holds a buffer pin on the target page
+ * continuously from initially reading the items until applying this function
+ * (if it is a !so->dropPin scan), VACUUM cannot have deleted any items on the
+ * page, so the page's TIDs can't have been recycled by now.  There's no risk
+ * that we'll confuse a new index tuple that happens to use a recycled TID
+ * with a now-removed tuple with the same TID (that used to be on this same
+ * page).  We can't rely on that during scans that drop buffer pins eagerly
+ * (so->dropPin scans), though, so we must condition setting LP_DEAD bits on
+ * the page LSN having not changed since back when _bt_readpage saw the page.
+ * We totally give up on setting LP_DEAD bits when the page LSN changed.
  *
- * If the pin was released after reading the page, then we re-read it.  If it
- * has been modified since we read it (as determined by the LSN), we dare not
- * flag any entries because it is possible that the old entry was vacuumed
- * away and the TID was re-used by a completely different heap tuple.
+ * We give up much less often during !so->dropPin scans, but it still happens.
+ * We cope with cases where items have moved right due to insertions.  If an
+ * item has moved off the current page due to a split, we'll fail to find it
+ * and just give up on it.
  */
 void
 _bt_killitems(IndexScanDesc scan)
 {
+	Relation	rel = scan->indexRelation;
 	BTScanOpaque so = (BTScanOpaque) scan->opaque;
 	Page		page;
 	BTPageOpaque opaque;
 	OffsetNumber minoff;
 	OffsetNumber maxoff;
-	int			i;
 	int			numKilled = so->numKilled;
 	bool		killedsomething = false;
-	bool		droppedpin PG_USED_FOR_ASSERTS_ONLY;
+	Buffer		buf;
 
+	Assert(numKilled > 0);
 	Assert(BTScanPosIsValid(so->currPos));
+	Assert(scan->heapRelation != NULL); /* can't be a bitmap index scan */
 
-	/*
-	 * Always reset the scan state, so we don't look for same items on other
-	 * pages.
-	 */
+	/* Always invalidate so->killedItems[] before leaving so->currPos */
 	so->numKilled = 0;
 
-	if (BTScanPosIsPinned(so->currPos))
+	if (!so->dropPin)
 	{
 		/*
 		 * We have held the pin on this page since we read the index tuples,
 		 * so all we need to do is lock it.  The pin will have prevented
-		 * re-use of any TID on the page, so there is no need to check the
-		 * LSN.
+		 * concurrent VACUUMs from recycling any of the TIDs on the page.
 		 */
-		droppedpin = false;
-		_bt_lockbuf(scan->indexRelation, so->currPos.buf, BT_READ);
-
-		page = BufferGetPage(so->currPos.buf);
+		Assert(BTScanPosIsPinned(so->currPos));
+		buf = so->currPos.buf;
+		_bt_lockbuf(rel, buf, BT_READ);
 	}
 	else
 	{
-		Buffer		buf;
+		XLogRecPtr	latestlsn;
 
-		droppedpin = true;
-		/* Attempt to re-read the buffer, getting pin and lock. */
-		buf = _bt_getbuf(scan->indexRelation, so->currPos.currPage, BT_READ);
+		Assert(!BTScanPosIsPinned(so->currPos));
+		Assert(RelationNeedsWAL(rel));
+		buf = _bt_getbuf(rel, so->currPos.currPage, BT_READ);
 
-		page = BufferGetPage(buf);
-		if (BufferGetLSNAtomic(buf) == so->currPos.lsn)
-			so->currPos.buf = buf;
-		else
+		latestlsn = BufferGetLSNAtomic(buf);
+		Assert(!XLogRecPtrIsInvalid(so->currPos.lsn));
+		Assert(so->currPos.lsn <= latestlsn);
+		if (so->currPos.lsn != latestlsn)
 		{
-			/* Modified while not pinned means hinting is not safe. */
-			_bt_relbuf(scan->indexRelation, buf);
+			/* Modified, give up on hinting */
+			_bt_relbuf(rel, buf);
 			return;
 		}
+
+		/* Unmodified, hinting is safe */
 	}
 
+	page = BufferGetPage(buf);
 	opaque = BTPageGetOpaque(page);
 	minoff = P_FIRSTDATAKEY(opaque);
 	maxoff = PageGetMaxOffsetNumber(page);
 
-	for (i = 0; i < numKilled; i++)
+	for (int i = 0; i < numKilled; i++)
 	{
 		int			itemIndex = so->killedItems[i];
 		BTScanPosItem *kitem = &so->currPos.items[itemIndex];
@@ -3442,7 +3341,7 @@ _bt_killitems(IndexScanDesc scan)
 				 * correctness.
 				 *
 				 * Note that the page may have been modified in almost any way
-				 * since we first read it (in the !droppedpin case), so it's
+				 * since we first read it (in the !so->dropPin case), so it's
 				 * possible that this posting list tuple wasn't a posting list
 				 * tuple when we first encountered its heap TIDs.
 				 */
@@ -3458,7 +3357,7 @@ _bt_killitems(IndexScanDesc scan)
 					 * though only in the common case where the page can't
 					 * have been concurrently modified
 					 */
-					Assert(kitem->indexOffset == offnum || !droppedpin);
+					Assert(kitem->indexOffset == offnum || !so->dropPin);
 
 					/*
 					 * Read-ahead to later kitems here.
@@ -3522,10 +3421,13 @@ _bt_killitems(IndexScanDesc scan)
 	if (killedsomething)
 	{
 		opaque->btpo_flags |= BTP_HAS_GARBAGE;
-		MarkBufferDirtyHint(so->currPos.buf, true);
+		MarkBufferDirtyHint(buf, true);
 	}
 
-	_bt_unlockbuf(scan->indexRelation, so->currPos.buf);
+	if (!so->dropPin)
+		_bt_unlockbuf(rel, buf);
+	else
+		_bt_relbuf(rel, buf);
 }
 
 
diff --git a/src/backend/access/rmgrdesc/replorigindesc.c b/src/backend/access/rmgrdesc/replorigindesc.c
index 5dd74233996..35e3af2903e 100644
--- a/src/backend/access/rmgrdesc/replorigindesc.c
+++ b/src/backend/access/rmgrdesc/replorigindesc.c
@@ -29,7 +29,7 @@ replorigin_desc(StringInfo buf, XLogReaderState *record)
 
 				xlrec = (xl_replorigin_set *) rec;
 
-				appendStringInfo(buf, "set %u; lsn %X/%X; force: %d",
+				appendStringInfo(buf, "set %u; lsn %X/%08X; force: %d",
 								 xlrec->node_id,
 								 LSN_FORMAT_ARGS(xlrec->remote_lsn),
 								 xlrec->force);
diff --git a/src/backend/access/rmgrdesc/xactdesc.c b/src/backend/access/rmgrdesc/xactdesc.c
index 305598e2865..f0f696855b9 100644
--- a/src/backend/access/rmgrdesc/xactdesc.c
+++ b/src/backend/access/rmgrdesc/xactdesc.c
@@ -359,7 +359,7 @@ xact_desc_commit(StringInfo buf, uint8 info, xl_xact_commit *xlrec, RepOriginId
 
 	if (parsed.xinfo & XACT_XINFO_HAS_ORIGIN)
 	{
-		appendStringInfo(buf, "; origin: node %u, lsn %X/%X, at %s",
+		appendStringInfo(buf, "; origin: node %u, lsn %X/%08X, at %s",
 						 origin_id,
 						 LSN_FORMAT_ARGS(parsed.origin_lsn),
 						 timestamptz_to_str(parsed.origin_timestamp));
@@ -384,7 +384,7 @@ xact_desc_abort(StringInfo buf, uint8 info, xl_xact_abort *xlrec, RepOriginId or
 
 	if (parsed.xinfo & XACT_XINFO_HAS_ORIGIN)
 	{
-		appendStringInfo(buf, "; origin: node %u, lsn %X/%X, at %s",
+		appendStringInfo(buf, "; origin: node %u, lsn %X/%08X, at %s",
 						 origin_id,
 						 LSN_FORMAT_ARGS(parsed.origin_lsn),
 						 timestamptz_to_str(parsed.origin_timestamp));
@@ -418,7 +418,7 @@ xact_desc_prepare(StringInfo buf, uint8 info, xl_xact_prepare *xlrec, RepOriginI
 	 * way as PrepareRedoAdd().
 	 */
 	if (origin_id != InvalidRepOriginId)
-		appendStringInfo(buf, "; origin: node %u, lsn %X/%X, at %s",
+		appendStringInfo(buf, "; origin: node %u, lsn %X/%08X, at %s",
 						 origin_id,
 						 LSN_FORMAT_ARGS(parsed.origin_lsn),
 						 timestamptz_to_str(parsed.origin_timestamp));
diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c
index 58040f28656..cd6c2a2f650 100644
--- a/src/backend/access/rmgrdesc/xlogdesc.c
+++ b/src/backend/access/rmgrdesc/xlogdesc.c
@@ -65,7 +65,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
 	{
 		CheckPoint *checkpoint = (CheckPoint *) rec;
 
-		appendStringInfo(buf, "redo %X/%X; "
+		appendStringInfo(buf, "redo %X/%08X; "
 						 "tli %u; prev tli %u; fpw %s; wal_level %s; xid %u:%u; oid %u; multi %u; offset %u; "
 						 "oldest xid %u in DB %u; oldest multi %u in DB %u; "
 						 "oldest/newest commit timestamp xid: %u/%u; "
@@ -111,7 +111,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
 		XLogRecPtr	startpoint;
 
 		memcpy(&startpoint, rec, sizeof(XLogRecPtr));
-		appendStringInfo(buf, "%X/%X", LSN_FORMAT_ARGS(startpoint));
+		appendStringInfo(buf, "%X/%08X", LSN_FORMAT_ARGS(startpoint));
 	}
 	else if (info == XLOG_PARAMETER_CHANGE)
 	{
@@ -156,7 +156,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
 		xl_overwrite_contrecord xlrec;
 
 		memcpy(&xlrec, rec, sizeof(xl_overwrite_contrecord));
-		appendStringInfo(buf, "lsn %X/%X; time %s",
+		appendStringInfo(buf, "lsn %X/%08X; time %s",
 						 LSN_FORMAT_ARGS(xlrec.overwritten_lsn),
 						 timestamptz_to_str(xlrec.overwrite_time));
 	}
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 48f10bec91e..e80fbe109cf 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -110,9 +110,7 @@ static SlruCtlData XactCtlData;
 #define XactCtl (&XactCtlData)
 
 
-static int	ZeroCLOGPage(int64 pageno, bool writeXlog);
 static bool CLOGPagePrecedes(int64 page1, int64 page2);
-static void WriteZeroPageXlogRec(int64 pageno);
 static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXact,
 								 Oid oldestXactDb);
 static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
@@ -832,41 +830,8 @@ check_transaction_buffers(int *newval, void **extra, GucSource source)
 void
 BootStrapCLOG(void)
 {
-	int			slotno;
-	LWLock	   *lock = SimpleLruGetBankLock(XactCtl, 0);
-
-	LWLockAcquire(lock, LW_EXCLUSIVE);
-
-	/* Create and zero the first page of the commit log */
-	slotno = ZeroCLOGPage(0, false);
-
-	/* Make sure it's written out */
-	SimpleLruWritePage(XactCtl, slotno);
-	Assert(!XactCtl->shared->page_dirty[slotno]);
-
-	LWLockRelease(lock);
-}
-
-/*
- * Initialize (or reinitialize) a page of CLOG to zeroes.
- * If writeXlog is true, also emit an XLOG record saying we did this.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-ZeroCLOGPage(int64 pageno, bool writeXlog)
-{
-	int			slotno;
-
-	slotno = SimpleLruZeroPage(XactCtl, pageno);
-
-	if (writeXlog)
-		WriteZeroPageXlogRec(pageno);
-
-	return slotno;
+	/* Zero the initial page and flush it to disk */
+	SimpleLruZeroAndWritePage(XactCtl, 0);
 }
 
 /*
@@ -974,8 +939,9 @@ ExtendCLOG(TransactionId newestXact)
 
 	LWLockAcquire(lock, LW_EXCLUSIVE);
 
-	/* Zero the page and make an XLOG entry about it */
-	ZeroCLOGPage(pageno, true);
+	/* Zero the page and make a WAL entry about it */
+	SimpleLruZeroPage(XactCtl, pageno);
+	XLogSimpleInsertInt64(RM_CLOG_ID, CLOG_ZEROPAGE, pageno);
 
 	LWLockRelease(lock);
 }
@@ -1068,17 +1034,6 @@ CLOGPagePrecedes(int64 page1, int64 page2)
 
 
 /*
- * Write a ZEROPAGE xlog record
- */
-static void
-WriteZeroPageXlogRec(int64 pageno)
-{
-	XLogBeginInsert();
-	XLogRegisterData(&pageno, sizeof(pageno));
-	(void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE);
-}
-
-/*
  * Write a TRUNCATE xlog record
  *
  * We must flush the xlog record to disk before returning --- see notes
@@ -1114,19 +1069,9 @@ clog_redo(XLogReaderState *record)
 	if (info == CLOG_ZEROPAGE)
 	{
 		int64		pageno;
-		int			slotno;
-		LWLock	   *lock;
 
 		memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
-
-		lock = SimpleLruGetBankLock(XactCtl, pageno);
-		LWLockAcquire(lock, LW_EXCLUSIVE);
-
-		slotno = ZeroCLOGPage(pageno, false);
-		SimpleLruWritePage(XactCtl, slotno);
-		Assert(!XactCtl->shared->page_dirty[slotno]);
-
-		LWLockRelease(lock);
+		SimpleLruZeroAndWritePage(XactCtl, pageno);
 	}
 	else if (info == CLOG_TRUNCATE)
 	{
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index 113fae1437a..370b38e048b 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -114,11 +114,9 @@ static void SetXidCommitTsInPage(TransactionId xid, int nsubxids,
 static void TransactionIdSetCommitTs(TransactionId xid, TimestampTz ts,
 									 RepOriginId nodeid, int slotno);
 static void error_commit_ts_disabled(void);
-static int	ZeroCommitTsPage(int64 pageno, bool writeXlog);
 static bool CommitTsPagePrecedes(int64 page1, int64 page2);
 static void ActivateCommitTs(void);
 static void DeactivateCommitTs(void);
-static void WriteZeroPageXlogRec(int64 pageno);
 static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXid);
 
 /*
@@ -603,28 +601,6 @@ BootStrapCommitTs(void)
 }
 
 /*
- * Initialize (or reinitialize) a page of CommitTs to zeroes.
- * If writeXlog is true, also emit an XLOG record saying we did this.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-ZeroCommitTsPage(int64 pageno, bool writeXlog)
-{
-	int			slotno;
-
-	slotno = SimpleLruZeroPage(CommitTsCtl, pageno);
-
-	if (writeXlog)
-		WriteZeroPageXlogRec(pageno);
-
-	return slotno;
-}
-
-/*
  * This must be called ONCE during postmaster or standalone-backend startup,
  * after StartupXLOG has initialized TransamVariables->nextXid.
  */
@@ -707,6 +683,13 @@ ActivateCommitTs(void)
 	TransactionId xid;
 	int64		pageno;
 
+	/*
+	 * During bootstrap, we should not register commit timestamps so skip the
+	 * activation in this case.
+	 */
+	if (IsBootstrapProcessingMode())
+		return;
+
 	/* If we've done this already, there's nothing to do */
 	LWLockAcquire(CommitTsLock, LW_EXCLUSIVE);
 	if (commitTsShared->commitTsActive)
@@ -747,16 +730,7 @@ ActivateCommitTs(void)
 
 	/* Create the current segment file, if necessary */
 	if (!SimpleLruDoesPhysicalPageExist(CommitTsCtl, pageno))
-	{
-		LWLock	   *lock = SimpleLruGetBankLock(CommitTsCtl, pageno);
-		int			slotno;
-
-		LWLockAcquire(lock, LW_EXCLUSIVE);
-		slotno = ZeroCommitTsPage(pageno, false);
-		SimpleLruWritePage(CommitTsCtl, slotno);
-		Assert(!CommitTsCtl->shared->page_dirty[slotno]);
-		LWLockRelease(lock);
-	}
+		SimpleLruZeroAndWritePage(CommitTsCtl, pageno);
 
 	/* Change the activation status in shared memory. */
 	LWLockAcquire(CommitTsLock, LW_EXCLUSIVE);
@@ -867,8 +841,12 @@ ExtendCommitTs(TransactionId newestXact)
 
 	LWLockAcquire(lock, LW_EXCLUSIVE);
 
-	/* Zero the page and make an XLOG entry about it */
-	ZeroCommitTsPage(pageno, !InRecovery);
+	/* Zero the page ... */
+	SimpleLruZeroPage(CommitTsCtl, pageno);
+
+	/* and make a WAL entry about that, unless we're in REDO */
+	if (!InRecovery)
+		XLogSimpleInsertInt64(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE, pageno);
 
 	LWLockRelease(lock);
 }
@@ -983,17 +961,6 @@ CommitTsPagePrecedes(int64 page1, int64 page2)
 
 
 /*
- * Write a ZEROPAGE xlog record
- */
-static void
-WriteZeroPageXlogRec(int64 pageno)
-{
-	XLogBeginInsert();
-	XLogRegisterData(&pageno, sizeof(pageno));
-	(void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE);
-}
-
-/*
  * Write a TRUNCATE xlog record
  */
 static void
@@ -1023,19 +990,9 @@ commit_ts_redo(XLogReaderState *record)
 	if (info == COMMIT_TS_ZEROPAGE)
 	{
 		int64		pageno;
-		int			slotno;
-		LWLock	   *lock;
 
 		memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
-
-		lock = SimpleLruGetBankLock(CommitTsCtl, pageno);
-		LWLockAcquire(lock, LW_EXCLUSIVE);
-
-		slotno = ZeroCommitTsPage(pageno, false);
-		SimpleLruWritePage(CommitTsCtl, slotno);
-		Assert(!CommitTsCtl->shared->page_dirty[slotno]);
-
-		LWLockRelease(lock);
+		SimpleLruZeroAndWritePage(CommitTsCtl, pageno);
 	}
 	else if (info == COMMIT_TS_TRUNCATE)
 	{
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 3c06ac45532..3cb09c3d598 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -401,8 +401,6 @@ static void mXactCachePut(MultiXactId multi, int nmembers,
 static char *mxstatus_to_string(MultiXactStatus status);
 
 /* management of SLRU infrastructure */
-static int	ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog);
-static int	ZeroMultiXactMemberPage(int64 pageno, bool writeXlog);
 static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2);
 static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2);
 static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
@@ -413,7 +411,6 @@ static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary,
 									 MultiXactOffset start, uint32 distance);
 static bool SetOffsetVacuumLimit(bool is_startup);
 static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result);
-static void WriteMZeroPageXlogRec(int64 pageno, uint8 info);
 static void WriteMTruncateXlogRec(Oid oldestMultiDB,
 								  MultiXactId startTruncOff,
 								  MultiXactId endTruncOff,
@@ -1847,7 +1844,7 @@ AtPrepare_MultiXact(void)
  *		Clean up after successful PREPARE TRANSACTION
  */
 void
-PostPrepare_MultiXact(TransactionId xid)
+PostPrepare_MultiXact(FullTransactionId fxid)
 {
 	MultiXactId myOldestMember;
 
@@ -1858,7 +1855,7 @@ PostPrepare_MultiXact(TransactionId xid)
 	myOldestMember = OldestMemberMXactId[MyProcNumber];
 	if (MultiXactIdIsValid(myOldestMember))
 	{
-		ProcNumber	dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, false);
+		ProcNumber	dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false);
 
 		/*
 		 * Even though storing MultiXactId is atomic, acquire lock to make
@@ -1896,10 +1893,10 @@ PostPrepare_MultiXact(TransactionId xid)
  *		Recover the state of a prepared transaction at startup
  */
 void
-multixact_twophase_recover(TransactionId xid, uint16 info,
+multixact_twophase_recover(FullTransactionId fxid, uint16 info,
 						   void *recdata, uint32 len)
 {
-	ProcNumber	dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, false);
+	ProcNumber	dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false);
 	MultiXactId oldestMember;
 
 	/*
@@ -1917,10 +1914,10 @@ multixact_twophase_recover(TransactionId xid, uint16 info,
  *		Similar to AtEOXact_MultiXact but for COMMIT PREPARED
  */
 void
-multixact_twophase_postcommit(TransactionId xid, uint16 info,
+multixact_twophase_postcommit(FullTransactionId fxid, uint16 info,
 							  void *recdata, uint32 len)
 {
-	ProcNumber	dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, true);
+	ProcNumber	dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, true);
 
 	Assert(len == sizeof(MultiXactId));
 
@@ -1932,10 +1929,10 @@ multixact_twophase_postcommit(TransactionId xid, uint16 info,
  *		This is actually just the same as the COMMIT case.
  */
 void
-multixact_twophase_postabort(TransactionId xid, uint16 info,
+multixact_twophase_postabort(FullTransactionId fxid, uint16 info,
 							 void *recdata, uint32 len)
 {
-	multixact_twophase_postcommit(xid, info, recdata, len);
+	multixact_twophase_postcommit(fxid, info, recdata, len);
 }
 
 /*
@@ -2033,70 +2030,9 @@ check_multixact_member_buffers(int *newval, void **extra, GucSource source)
 void
 BootStrapMultiXact(void)
 {
-	int			slotno;
-	LWLock	   *lock;
-
-	lock = SimpleLruGetBankLock(MultiXactOffsetCtl, 0);
-	LWLockAcquire(lock, LW_EXCLUSIVE);
-
-	/* Create and zero the first page of the offsets log */
-	slotno = ZeroMultiXactOffsetPage(0, false);
-
-	/* Make sure it's written out */
-	SimpleLruWritePage(MultiXactOffsetCtl, slotno);
-	Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
-
-	LWLockRelease(lock);
-
-	lock = SimpleLruGetBankLock(MultiXactMemberCtl, 0);
-	LWLockAcquire(lock, LW_EXCLUSIVE);
-
-	/* Create and zero the first page of the members log */
-	slotno = ZeroMultiXactMemberPage(0, false);
-
-	/* Make sure it's written out */
-	SimpleLruWritePage(MultiXactMemberCtl, slotno);
-	Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
-
-	LWLockRelease(lock);
-}
-
-/*
- * Initialize (or reinitialize) a page of MultiXactOffset to zeroes.
- * If writeXlog is true, also emit an XLOG record saying we did this.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog)
-{
-	int			slotno;
-
-	slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
-
-	if (writeXlog)
-		WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_OFF_PAGE);
-
-	return slotno;
-}
-
-/*
- * Ditto, for MultiXactMember
- */
-static int
-ZeroMultiXactMemberPage(int64 pageno, bool writeXlog)
-{
-	int			slotno;
-
-	slotno = SimpleLruZeroPage(MultiXactMemberCtl, pageno);
-
-	if (writeXlog)
-		WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_MEM_PAGE);
-
-	return slotno;
+	/* Zero the initial pages and flush them to disk */
+	SimpleLruZeroAndWritePage(MultiXactOffsetCtl, 0);
+	SimpleLruZeroAndWritePage(MultiXactMemberCtl, 0);
 }
 
 /*
@@ -2134,7 +2070,7 @@ MaybeExtendOffsetSlru(void)
 		 * with creating a new segment file even if the page we're writing is
 		 * not the first in it, so this is enough.
 		 */
-		slotno = ZeroMultiXactOffsetPage(pageno, false);
+		slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
 		SimpleLruWritePage(MultiXactOffsetCtl, slotno);
 	}
 
@@ -2568,8 +2504,10 @@ ExtendMultiXactOffset(MultiXactId multi)
 
 	LWLockAcquire(lock, LW_EXCLUSIVE);
 
-	/* Zero the page and make an XLOG entry about it */
-	ZeroMultiXactOffsetPage(pageno, true);
+	/* Zero the page and make a WAL entry about it */
+	SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
+	XLogSimpleInsertInt64(RM_MULTIXACT_ID, XLOG_MULTIXACT_ZERO_OFF_PAGE,
+						  pageno);
 
 	LWLockRelease(lock);
 }
@@ -2611,8 +2549,10 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
 
 			LWLockAcquire(lock, LW_EXCLUSIVE);
 
-			/* Zero the page and make an XLOG entry about it */
-			ZeroMultiXactMemberPage(pageno, true);
+			/* Zero the page and make a WAL entry about it */
+			SimpleLruZeroPage(MultiXactMemberCtl, pageno);
+			XLogSimpleInsertInt64(RM_MULTIXACT_ID,
+								  XLOG_MULTIXACT_ZERO_MEM_PAGE, pageno);
 
 			LWLockRelease(lock);
 		}
@@ -3348,18 +3288,6 @@ MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
 }
 
 /*
- * Write an xlog record reflecting the zeroing of either a MEMBERs or
- * OFFSETs page (info shows which)
- */
-static void
-WriteMZeroPageXlogRec(int64 pageno, uint8 info)
-{
-	XLogBeginInsert();
-	XLogRegisterData(&pageno, sizeof(pageno));
-	(void) XLogInsert(RM_MULTIXACT_ID, info);
-}
-
-/*
  * Write a TRUNCATE xlog record
  *
  * We must flush the xlog record to disk before returning --- see notes in
@@ -3401,36 +3329,16 @@ multixact_redo(XLogReaderState *record)
 	if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
 	{
 		int64		pageno;
-		int			slotno;
-		LWLock	   *lock;
 
 		memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
-
-		lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno);
-		LWLockAcquire(lock, LW_EXCLUSIVE);
-
-		slotno = ZeroMultiXactOffsetPage(pageno, false);
-		SimpleLruWritePage(MultiXactOffsetCtl, slotno);
-		Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]);
-
-		LWLockRelease(lock);
+		SimpleLruZeroAndWritePage(MultiXactOffsetCtl, pageno);
 	}
 	else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
 	{
 		int64		pageno;
-		int			slotno;
-		LWLock	   *lock;
 
 		memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
-
-		lock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno);
-		LWLockAcquire(lock, LW_EXCLUSIVE);
-
-		slotno = ZeroMultiXactMemberPage(pageno, false);
-		SimpleLruWritePage(MultiXactMemberCtl, slotno);
-		Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]);
-
-		LWLockRelease(lock);
+		SimpleLruZeroAndWritePage(MultiXactMemberCtl, pageno);
 	}
 	else if (info == XLOG_MULTIXACT_CREATE_ID)
 	{
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index fe56286d9a9..10ec259f382 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -434,6 +434,31 @@ SimpleLruZeroLSNs(SlruCtl ctl, int slotno)
 }
 
 /*
+ * This is a convenience wrapper for the common case of zeroing a page and
+ * immediately flushing it to disk.
+ *
+ * Control lock is acquired and released here.
+ */
+void
+SimpleLruZeroAndWritePage(SlruCtl ctl, int64 pageno)
+{
+	int			slotno;
+	LWLock	   *lock;
+
+	lock = SimpleLruGetBankLock(ctl, pageno);
+	LWLockAcquire(lock, LW_EXCLUSIVE);
+
+	/* Create and zero the page */
+	slotno = SimpleLruZeroPage(ctl, pageno);
+
+	/* Make sure it's written out */
+	SimpleLruWritePage(ctl, slotno);
+	Assert(!ctl->shared->page_dirty[slotno]);
+
+	LWLockRelease(lock);
+}
+
+/*
  * Wait for any active I/O on a page slot to finish.  (This does not
  * guarantee that new I/O hasn't been started before we return, though.
  * In fact the slot might not even contain the same page anymore.)
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index 15153618fad..09aace9e09f 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -74,7 +74,6 @@ static SlruCtlData SubTransCtlData;
 #define SubTransCtl  (&SubTransCtlData)
 
 
-static int	ZeroSUBTRANSPage(int64 pageno);
 static bool SubTransPagePrecedes(int64 page1, int64 page2);
 
 
@@ -269,33 +268,8 @@ check_subtrans_buffers(int *newval, void **extra, GucSource source)
 void
 BootStrapSUBTRANS(void)
 {
-	int			slotno;
-	LWLock	   *lock = SimpleLruGetBankLock(SubTransCtl, 0);
-
-	LWLockAcquire(lock, LW_EXCLUSIVE);
-
-	/* Create and zero the first page of the subtrans log */
-	slotno = ZeroSUBTRANSPage(0);
-
-	/* Make sure it's written out */
-	SimpleLruWritePage(SubTransCtl, slotno);
-	Assert(!SubTransCtl->shared->page_dirty[slotno]);
-
-	LWLockRelease(lock);
-}
-
-/*
- * Initialize (or reinitialize) a page of SUBTRANS to zeroes.
- *
- * The page is not actually written, just set up in shared memory.
- * The slot number of the new page is returned.
- *
- * Control lock must be held at entry, and will be held at exit.
- */
-static int
-ZeroSUBTRANSPage(int64 pageno)
-{
-	return SimpleLruZeroPage(SubTransCtl, pageno);
+	/* Zero the initial page and flush it to disk */
+	SimpleLruZeroAndWritePage(SubTransCtl, 0);
 }
 
 /*
@@ -335,7 +309,7 @@ StartupSUBTRANS(TransactionId oldestActiveXID)
 			prevlock = lock;
 		}
 
-		(void) ZeroSUBTRANSPage(startPage);
+		(void) SimpleLruZeroPage(SubTransCtl, startPage);
 		if (startPage == endPage)
 			break;
 
@@ -395,7 +369,7 @@ ExtendSUBTRANS(TransactionId newestXact)
 	LWLockAcquire(lock, LW_EXCLUSIVE);
 
 	/* Zero the page */
-	ZeroSUBTRANSPage(pageno);
+	SimpleLruZeroPage(SubTransCtl, pageno);
 
 	LWLockRelease(lock);
 }
diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c
index a27f27cc037..186eb91f609 100644
--- a/src/backend/access/transam/timeline.c
+++ b/src/backend/access/transam/timeline.c
@@ -154,7 +154,7 @@ readTimeLineHistory(TimeLineID targetTLI)
 		if (*ptr == '\0' || *ptr == '#')
 			continue;
 
-		nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo);
+		nfields = sscanf(fline, "%u\t%X/%08X", &tli, &switchpoint_hi, &switchpoint_lo);
 
 		if (nfields < 1)
 		{
@@ -399,7 +399,7 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
 	 * parent file failed to end with one.
 	 */
 	snprintf(buffer, sizeof(buffer),
-			 "%s%u\t%X/%X\t%s\n",
+			 "%s%u\t%X/%08X\t%s\n",
 			 (srcfd < 0) ? "" : "\n",
 			 parentTLI,
 			 LSN_FORMAT_ARGS(switchpoint),
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 73a80559194..85cbe397cb2 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -159,7 +159,7 @@ typedef struct GlobalTransactionData
 	 */
 	XLogRecPtr	prepare_start_lsn;	/* XLOG offset of prepare record start */
 	XLogRecPtr	prepare_end_lsn;	/* XLOG offset of prepare record end */
-	TransactionId xid;			/* The GXACT id */
+	FullTransactionId fxid;		/* The GXACT full xid */
 
 	Oid			owner;			/* ID of user that executed the xact */
 	ProcNumber	locking_backend;	/* backend currently working on the xact */
@@ -197,6 +197,7 @@ static GlobalTransaction MyLockedGxact = NULL;
 
 static bool twophaseExitRegistered = false;
 
+static void PrepareRedoRemoveFull(FullTransactionId fxid, bool giveWarning);
 static void RecordTransactionCommitPrepared(TransactionId xid,
 											int nchildren,
 											TransactionId *children,
@@ -216,19 +217,19 @@ static void RecordTransactionAbortPrepared(TransactionId xid,
 										   int nstats,
 										   xl_xact_stats_item *stats,
 										   const char *gid);
-static void ProcessRecords(char *bufptr, TransactionId xid,
+static void ProcessRecords(char *bufptr, FullTransactionId fxid,
 						   const TwoPhaseCallback callbacks[]);
 static void RemoveGXact(GlobalTransaction gxact);
 
 static void XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len);
-static char *ProcessTwoPhaseBuffer(TransactionId xid,
+static char *ProcessTwoPhaseBuffer(FullTransactionId fxid,
 								   XLogRecPtr prepare_start_lsn,
 								   bool fromdisk, bool setParent, bool setNextXid);
-static void MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid,
+static void MarkAsPreparingGuts(GlobalTransaction gxact, FullTransactionId fxid,
 								const char *gid, TimestampTz prepared_at, Oid owner,
 								Oid databaseid);
-static void RemoveTwoPhaseFile(TransactionId xid, bool giveWarning);
-static void RecreateTwoPhaseFile(TransactionId xid, void *content, int len);
+static void RemoveTwoPhaseFile(FullTransactionId fxid, bool giveWarning);
+static void RecreateTwoPhaseFile(FullTransactionId fxid, void *content, int len);
 
 /*
  * Initialization of shared memory
@@ -356,7 +357,7 @@ PostPrepare_Twophase(void)
  *		Reserve the GID for the given transaction.
  */
 GlobalTransaction
-MarkAsPreparing(TransactionId xid, const char *gid,
+MarkAsPreparing(FullTransactionId fxid, const char *gid,
 				TimestampTz prepared_at, Oid owner, Oid databaseid)
 {
 	GlobalTransaction gxact;
@@ -407,7 +408,7 @@ MarkAsPreparing(TransactionId xid, const char *gid,
 	gxact = TwoPhaseState->freeGXacts;
 	TwoPhaseState->freeGXacts = gxact->next;
 
-	MarkAsPreparingGuts(gxact, xid, gid, prepared_at, owner, databaseid);
+	MarkAsPreparingGuts(gxact, fxid, gid, prepared_at, owner, databaseid);
 
 	gxact->ondisk = false;
 
@@ -430,11 +431,13 @@ MarkAsPreparing(TransactionId xid, const char *gid,
  * Note: This function should be called with appropriate locks held.
  */
 static void
-MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
-					TimestampTz prepared_at, Oid owner, Oid databaseid)
+MarkAsPreparingGuts(GlobalTransaction gxact, FullTransactionId fxid,
+					const char *gid, TimestampTz prepared_at, Oid owner,
+					Oid databaseid)
 {
 	PGPROC	   *proc;
 	int			i;
+	TransactionId xid = XidFromFullTransactionId(fxid);
 
 	Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE));
 
@@ -479,7 +482,7 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid,
 	proc->subxidStatus.count = 0;
 
 	gxact->prepared_at = prepared_at;
-	gxact->xid = xid;
+	gxact->fxid = fxid;
 	gxact->owner = owner;
 	gxact->locking_backend = MyProcNumber;
 	gxact->valid = false;
@@ -797,12 +800,12 @@ pg_prepared_xact(PG_FUNCTION_ARGS)
  * caller had better hold it.
  */
 static GlobalTransaction
-TwoPhaseGetGXact(TransactionId xid, bool lock_held)
+TwoPhaseGetGXact(FullTransactionId fxid, bool lock_held)
 {
 	GlobalTransaction result = NULL;
 	int			i;
 
-	static TransactionId cached_xid = InvalidTransactionId;
+	static FullTransactionId cached_fxid = {InvalidTransactionId};
 	static GlobalTransaction cached_gxact = NULL;
 
 	Assert(!lock_held || LWLockHeldByMe(TwoPhaseStateLock));
@@ -811,7 +814,7 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held)
 	 * During a recovery, COMMIT PREPARED, or ABORT PREPARED, we'll be called
 	 * repeatedly for the same XID.  We can save work with a simple cache.
 	 */
-	if (xid == cached_xid)
+	if (FullTransactionIdEquals(fxid, cached_fxid))
 		return cached_gxact;
 
 	if (!lock_held)
@@ -821,7 +824,7 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held)
 	{
 		GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
 
-		if (gxact->xid == xid)
+		if (FullTransactionIdEquals(gxact->fxid, fxid))
 		{
 			result = gxact;
 			break;
@@ -832,9 +835,10 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held)
 		LWLockRelease(TwoPhaseStateLock);
 
 	if (result == NULL)			/* should not happen */
-		elog(ERROR, "failed to find GlobalTransaction for xid %u", xid);
+		elog(ERROR, "failed to find GlobalTransaction for xid %u",
+			 XidFromFullTransactionId(fxid));
 
-	cached_xid = xid;
+	cached_fxid = fxid;
 	cached_gxact = result;
 
 	return result;
@@ -881,7 +885,7 @@ TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid,
 				*have_more = true;
 				break;
 			}
-			result = gxact->xid;
+			result = XidFromFullTransactionId(gxact->fxid);
 		}
 	}
 
@@ -892,7 +896,7 @@ TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid,
 
 /*
  * TwoPhaseGetDummyProcNumber
- *		Get the dummy proc number for prepared transaction specified by XID
+ *		Get the dummy proc number for prepared transaction
  *
  * Dummy proc numbers are similar to proc numbers of real backends.  They
  * start at MaxBackends, and are unique across all currently active real
@@ -900,24 +904,24 @@ TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid,
  * TwoPhaseStateLock will not be taken, so the caller had better hold it.
  */
 ProcNumber
-TwoPhaseGetDummyProcNumber(TransactionId xid, bool lock_held)
+TwoPhaseGetDummyProcNumber(FullTransactionId fxid, bool lock_held)
 {
-	GlobalTransaction gxact = TwoPhaseGetGXact(xid, lock_held);
+	GlobalTransaction gxact = TwoPhaseGetGXact(fxid, lock_held);
 
 	return gxact->pgprocno;
 }
 
 /*
  * TwoPhaseGetDummyProc
- *		Get the PGPROC that represents a prepared transaction specified by XID
+ *		Get the PGPROC that represents a prepared transaction
  *
  * If lock_held is set to true, TwoPhaseStateLock will not be taken, so the
  * caller had better hold it.
  */
 PGPROC *
-TwoPhaseGetDummyProc(TransactionId xid, bool lock_held)
+TwoPhaseGetDummyProc(FullTransactionId fxid, bool lock_held)
 {
-	GlobalTransaction gxact = TwoPhaseGetGXact(xid, lock_held);
+	GlobalTransaction gxact = TwoPhaseGetGXact(fxid, lock_held);
 
 	return GetPGProcByNumber(gxact->pgprocno);
 }
@@ -942,10 +946,8 @@ AdjustToFullTransactionId(TransactionId xid)
 }
 
 static inline int
-TwoPhaseFilePath(char *path, TransactionId xid)
+TwoPhaseFilePath(char *path, FullTransactionId fxid)
 {
-	FullTransactionId fxid = AdjustToFullTransactionId(xid);
-
 	return snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X%08X",
 					EpochFromFullTransactionId(fxid),
 					XidFromFullTransactionId(fxid));
@@ -1049,7 +1051,7 @@ void
 StartPrepare(GlobalTransaction gxact)
 {
 	PGPROC	   *proc = GetPGProcByNumber(gxact->pgprocno);
-	TransactionId xid = gxact->xid;
+	TransactionId xid = XidFromFullTransactionId(gxact->fxid);
 	TwoPhaseFileHeader hdr;
 	TransactionId *children;
 	RelFileLocator *commitrels;
@@ -1281,10 +1283,11 @@ RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info,
  * If it looks OK (has a valid magic number and CRC), return the palloc'd
  * contents of the file, issuing an error when finding corrupted data.  If
  * missing_ok is true, which indicates that missing files can be safely
- * ignored, then return NULL.  This state can be reached when doing recovery.
+ * ignored, then return NULL.  This state can be reached when doing recovery
+ * after discarding two-phase files from frozen epochs.
  */
 static char *
-ReadTwoPhaseFile(TransactionId xid, bool missing_ok)
+ReadTwoPhaseFile(FullTransactionId fxid, bool missing_ok)
 {
 	char		path[MAXPGPATH];
 	char	   *buf;
@@ -1296,7 +1299,7 @@ ReadTwoPhaseFile(TransactionId xid, bool missing_ok)
 				file_crc;
 	int			r;
 
-	TwoPhaseFilePath(path, xid);
+	TwoPhaseFilePath(path, fxid);
 
 	fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
 	if (fd < 0)
@@ -1426,12 +1429,12 @@ XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len)
 		if (errormsg)
 			ereport(ERROR,
 					(errcode_for_file_access(),
-					 errmsg("could not read two-phase state from WAL at %X/%X: %s",
+					 errmsg("could not read two-phase state from WAL at %X/%08X: %s",
 							LSN_FORMAT_ARGS(lsn), errormsg)));
 		else
 			ereport(ERROR,
 					(errcode_for_file_access(),
-					 errmsg("could not read two-phase state from WAL at %X/%X",
+					 errmsg("could not read two-phase state from WAL at %X/%08X",
 							LSN_FORMAT_ARGS(lsn))));
 	}
 
@@ -1439,7 +1442,7 @@ XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len)
 		(XLogRecGetInfo(xlogreader) & XLOG_XACT_OPMASK) != XLOG_XACT_PREPARE)
 		ereport(ERROR,
 				(errcode_for_file_access(),
-				 errmsg("expected two-phase state data is not present in WAL at %X/%X",
+				 errmsg("expected two-phase state data is not present in WAL at %X/%08X",
 						LSN_FORMAT_ARGS(lsn))));
 
 	if (len != NULL)
@@ -1461,6 +1464,7 @@ StandbyTransactionIdIsPrepared(TransactionId xid)
 	char	   *buf;
 	TwoPhaseFileHeader *hdr;
 	bool		result;
+	FullTransactionId fxid;
 
 	Assert(TransactionIdIsValid(xid));
 
@@ -1468,7 +1472,8 @@ StandbyTransactionIdIsPrepared(TransactionId xid)
 		return false;			/* nothing to do */
 
 	/* Read and validate file */
-	buf = ReadTwoPhaseFile(xid, true);
+	fxid = AdjustToFullTransactionId(xid);
+	buf = ReadTwoPhaseFile(fxid, true);
 	if (buf == NULL)
 		return false;
 
@@ -1488,6 +1493,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
 {
 	GlobalTransaction gxact;
 	PGPROC	   *proc;
+	FullTransactionId fxid;
 	TransactionId xid;
 	bool		ondisk;
 	char	   *buf;
@@ -1509,7 +1515,8 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
 	 */
 	gxact = LockGXact(gid, GetUserId());
 	proc = GetPGProcByNumber(gxact->pgprocno);
-	xid = gxact->xid;
+	fxid = gxact->fxid;
+	xid = XidFromFullTransactionId(fxid);
 
 	/*
 	 * Read and validate 2PC state data. State data will typically be stored
@@ -1517,7 +1524,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
 	 * to disk if for some reason they have lived for a long time.
 	 */
 	if (gxact->ondisk)
-		buf = ReadTwoPhaseFile(xid, false);
+		buf = ReadTwoPhaseFile(fxid, false);
 	else
 		XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, NULL);
 
@@ -1636,11 +1643,11 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
 
 	/* And now do the callbacks */
 	if (isCommit)
-		ProcessRecords(bufptr, xid, twophase_postcommit_callbacks);
+		ProcessRecords(bufptr, fxid, twophase_postcommit_callbacks);
 	else
-		ProcessRecords(bufptr, xid, twophase_postabort_callbacks);
+		ProcessRecords(bufptr, fxid, twophase_postabort_callbacks);
 
-	PredicateLockTwoPhaseFinish(xid, isCommit);
+	PredicateLockTwoPhaseFinish(fxid, isCommit);
 
 	/*
 	 * Read this value while holding the two-phase lock, as the on-disk 2PC
@@ -1664,7 +1671,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
 	 * And now we can clean up any files we may have left.
 	 */
 	if (ondisk)
-		RemoveTwoPhaseFile(xid, true);
+		RemoveTwoPhaseFile(fxid, true);
 
 	MyLockedGxact = NULL;
 
@@ -1677,7 +1684,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
  * Scan 2PC state data in memory and call the indicated callbacks for each 2PC record.
  */
 static void
-ProcessRecords(char *bufptr, TransactionId xid,
+ProcessRecords(char *bufptr, FullTransactionId fxid,
 			   const TwoPhaseCallback callbacks[])
 {
 	for (;;)
@@ -1691,24 +1698,28 @@ ProcessRecords(char *bufptr, TransactionId xid,
 		bufptr += MAXALIGN(sizeof(TwoPhaseRecordOnDisk));
 
 		if (callbacks[record->rmid] != NULL)
-			callbacks[record->rmid] (xid, record->info, bufptr, record->len);
+			callbacks[record->rmid] (fxid, record->info, bufptr, record->len);
 
 		bufptr += MAXALIGN(record->len);
 	}
 }
 
 /*
- * Remove the 2PC file for the specified XID.
+ * Remove the 2PC file.
  *
  * If giveWarning is false, do not complain about file-not-present;
  * this is an expected case during WAL replay.
+ *
+ * This routine is used at early stages at recovery where future and
+ * past orphaned files are checked, hence the FullTransactionId to build
+ * a complete file name fit for the removal.
  */
 static void
-RemoveTwoPhaseFile(TransactionId xid, bool giveWarning)
+RemoveTwoPhaseFile(FullTransactionId fxid, bool giveWarning)
 {
 	char		path[MAXPGPATH];
 
-	TwoPhaseFilePath(path, xid);
+	TwoPhaseFilePath(path, fxid);
 	if (unlink(path))
 		if (errno != ENOENT || giveWarning)
 			ereport(WARNING,
@@ -1723,7 +1734,7 @@ RemoveTwoPhaseFile(TransactionId xid, bool giveWarning)
  * Note: content and len don't include CRC.
  */
 static void
-RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
+RecreateTwoPhaseFile(FullTransactionId fxid, void *content, int len)
 {
 	char		path[MAXPGPATH];
 	pg_crc32c	statefile_crc;
@@ -1734,7 +1745,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
 	COMP_CRC32C(statefile_crc, content, len);
 	FIN_CRC32C(statefile_crc);
 
-	TwoPhaseFilePath(path, xid);
+	TwoPhaseFilePath(path, fxid);
 
 	fd = OpenTransientFile(path,
 						   O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY);
@@ -1846,7 +1857,7 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon)
 			int			len;
 
 			XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, &len);
-			RecreateTwoPhaseFile(gxact->xid, buf, len);
+			RecreateTwoPhaseFile(gxact->fxid, buf, len);
 			gxact->ondisk = true;
 			gxact->prepare_start_lsn = InvalidXLogRecPtr;
 			gxact->prepare_end_lsn = InvalidXLogRecPtr;
@@ -1897,19 +1908,17 @@ restoreTwoPhaseData(void)
 		if (strlen(clde->d_name) == 16 &&
 			strspn(clde->d_name, "0123456789ABCDEF") == 16)
 		{
-			TransactionId xid;
 			FullTransactionId fxid;
 			char	   *buf;
 
 			fxid = FullTransactionIdFromU64(strtou64(clde->d_name, NULL, 16));
-			xid = XidFromFullTransactionId(fxid);
 
-			buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr,
+			buf = ProcessTwoPhaseBuffer(fxid, InvalidXLogRecPtr,
 										true, false, false);
 			if (buf == NULL)
 				continue;
 
-			PrepareRedoAdd(buf, InvalidXLogRecPtr,
+			PrepareRedoAdd(fxid, buf, InvalidXLogRecPtr,
 						   InvalidXLogRecPtr, InvalidRepOriginId);
 		}
 	}
@@ -1968,9 +1977,7 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
 
 		Assert(gxact->inredo);
 
-		xid = gxact->xid;
-
-		buf = ProcessTwoPhaseBuffer(xid,
+		buf = ProcessTwoPhaseBuffer(gxact->fxid,
 									gxact->prepare_start_lsn,
 									gxact->ondisk, false, true);
 
@@ -1981,6 +1988,7 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
 		 * OK, we think this file is valid.  Incorporate xid into the
 		 * running-minimum result.
 		 */
+		xid = XidFromFullTransactionId(gxact->fxid);
 		if (TransactionIdPrecedes(xid, result))
 			result = xid;
 
@@ -2036,15 +2044,12 @@ StandbyRecoverPreparedTransactions(void)
 	LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
 	for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
 	{
-		TransactionId xid;
 		char	   *buf;
 		GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
 
 		Assert(gxact->inredo);
 
-		xid = gxact->xid;
-
-		buf = ProcessTwoPhaseBuffer(xid,
+		buf = ProcessTwoPhaseBuffer(gxact->fxid,
 									gxact->prepare_start_lsn,
 									gxact->ondisk, true, false);
 		if (buf != NULL)
@@ -2077,16 +2082,14 @@ RecoverPreparedTransactions(void)
 	LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
 	for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
 	{
-		TransactionId xid;
 		char	   *buf;
 		GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
+		FullTransactionId fxid = gxact->fxid;
 		char	   *bufptr;
 		TwoPhaseFileHeader *hdr;
 		TransactionId *subxids;
 		const char *gid;
 
-		xid = gxact->xid;
-
 		/*
 		 * Reconstruct subtrans state for the transaction --- needed because
 		 * pg_subtrans is not preserved over a restart.  Note that we are
@@ -2096,17 +2099,20 @@ RecoverPreparedTransactions(void)
 		 * SubTransSetParent has been set before, if the prepared transaction
 		 * generated xid assignment records.
 		 */
-		buf = ProcessTwoPhaseBuffer(xid,
+		buf = ProcessTwoPhaseBuffer(gxact->fxid,
 									gxact->prepare_start_lsn,
 									gxact->ondisk, true, false);
 		if (buf == NULL)
 			continue;
 
 		ereport(LOG,
-				(errmsg("recovering prepared transaction %u from shared memory", xid)));
+				(errmsg("recovering prepared transaction %u of epoch %u from shared memory",
+						XidFromFullTransactionId(gxact->fxid),
+						EpochFromFullTransactionId(gxact->fxid))));
 
 		hdr = (TwoPhaseFileHeader *) buf;
-		Assert(TransactionIdEquals(hdr->xid, xid));
+		Assert(TransactionIdEquals(hdr->xid,
+								   XidFromFullTransactionId(gxact->fxid)));
 		bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
 		gid = (const char *) bufptr;
 		bufptr += MAXALIGN(hdr->gidlen);
@@ -2122,7 +2128,7 @@ RecoverPreparedTransactions(void)
 		 * Recreate its GXACT and dummy PGPROC. But, check whether it was
 		 * added in redo and already has a shmem entry for it.
 		 */
-		MarkAsPreparingGuts(gxact, xid, gid,
+		MarkAsPreparingGuts(gxact, gxact->fxid, gid,
 							hdr->prepared_at,
 							hdr->owner, hdr->database);
 
@@ -2137,7 +2143,7 @@ RecoverPreparedTransactions(void)
 		/*
 		 * Recover other state (notably locks) using resource managers.
 		 */
-		ProcessRecords(bufptr, xid, twophase_recover_callbacks);
+		ProcessRecords(bufptr, fxid, twophase_recover_callbacks);
 
 		/*
 		 * Release locks held by the standby process after we process each
@@ -2145,7 +2151,7 @@ RecoverPreparedTransactions(void)
 		 * additional locks at any one time.
 		 */
 		if (InHotStandby)
-			StandbyReleaseLockTree(xid, hdr->nsubxacts, subxids);
+			StandbyReleaseLockTree(hdr->xid, hdr->nsubxacts, subxids);
 
 		/*
 		 * We're done with recovering this transaction. Clear MyLockedGxact,
@@ -2164,7 +2170,7 @@ RecoverPreparedTransactions(void)
 /*
  * ProcessTwoPhaseBuffer
  *
- * Given a transaction id, read it either from disk or read it directly
+ * Given a FullTransactionId, read it either from disk or read it directly
  * via shmem xlog record pointer using the provided "prepare_start_lsn".
  *
  * If setParent is true, set up subtransaction parent linkages.
@@ -2173,13 +2179,12 @@ RecoverPreparedTransactions(void)
  * value scanned.
  */
 static char *
-ProcessTwoPhaseBuffer(TransactionId xid,
+ProcessTwoPhaseBuffer(FullTransactionId fxid,
 					  XLogRecPtr prepare_start_lsn,
 					  bool fromdisk,
 					  bool setParent, bool setNextXid)
 {
 	FullTransactionId nextXid = TransamVariables->nextXid;
-	TransactionId origNextXid = XidFromFullTransactionId(nextXid);
 	TransactionId *subxids;
 	char	   *buf;
 	TwoPhaseFileHeader *hdr;
@@ -2191,41 +2196,46 @@ ProcessTwoPhaseBuffer(TransactionId xid,
 		Assert(prepare_start_lsn != InvalidXLogRecPtr);
 
 	/* Already processed? */
-	if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
+	if (TransactionIdDidCommit(XidFromFullTransactionId(fxid)) ||
+		TransactionIdDidAbort(XidFromFullTransactionId(fxid)))
 	{
 		if (fromdisk)
 		{
 			ereport(WARNING,
-					(errmsg("removing stale two-phase state file for transaction %u",
-							xid)));
-			RemoveTwoPhaseFile(xid, true);
+					(errmsg("removing stale two-phase state file for transaction %u of epoch %u",
+							XidFromFullTransactionId(fxid),
+							EpochFromFullTransactionId(fxid))));
+			RemoveTwoPhaseFile(fxid, true);
 		}
 		else
 		{
 			ereport(WARNING,
-					(errmsg("removing stale two-phase state from memory for transaction %u",
-							xid)));
-			PrepareRedoRemove(xid, true);
+					(errmsg("removing stale two-phase state from memory for transaction %u of epoch %u",
+							XidFromFullTransactionId(fxid),
+							EpochFromFullTransactionId(fxid))));
+			PrepareRedoRemoveFull(fxid, true);
 		}
 		return NULL;
 	}
 
 	/* Reject XID if too new */
-	if (TransactionIdFollowsOrEquals(xid, origNextXid))
+	if (FullTransactionIdFollowsOrEquals(fxid, nextXid))
 	{
 		if (fromdisk)
 		{
 			ereport(WARNING,
-					(errmsg("removing future two-phase state file for transaction %u",
-							xid)));
-			RemoveTwoPhaseFile(xid, true);
+					(errmsg("removing future two-phase state file for transaction %u of epoch %u",
+							XidFromFullTransactionId(fxid),
+							EpochFromFullTransactionId(fxid))));
+			RemoveTwoPhaseFile(fxid, true);
 		}
 		else
 		{
 			ereport(WARNING,
-					(errmsg("removing future two-phase state from memory for transaction %u",
-							xid)));
-			PrepareRedoRemove(xid, true);
+					(errmsg("removing future two-phase state from memory for transaction %u of epoch %u",
+							XidFromFullTransactionId(fxid),
+							EpochFromFullTransactionId(fxid))));
+			PrepareRedoRemoveFull(fxid, true);
 		}
 		return NULL;
 	}
@@ -2233,7 +2243,7 @@ ProcessTwoPhaseBuffer(TransactionId xid,
 	if (fromdisk)
 	{
 		/* Read and validate file */
-		buf = ReadTwoPhaseFile(xid, false);
+		buf = ReadTwoPhaseFile(fxid, false);
 	}
 	else
 	{
@@ -2243,18 +2253,20 @@ ProcessTwoPhaseBuffer(TransactionId xid,
 
 	/* Deconstruct header */
 	hdr = (TwoPhaseFileHeader *) buf;
-	if (!TransactionIdEquals(hdr->xid, xid))
+	if (!TransactionIdEquals(hdr->xid, XidFromFullTransactionId(fxid)))
 	{
 		if (fromdisk)
 			ereport(ERROR,
 					(errcode(ERRCODE_DATA_CORRUPTED),
-					 errmsg("corrupted two-phase state file for transaction %u",
-							xid)));
+					 errmsg("corrupted two-phase state file for transaction %u of epoch %u",
+							XidFromFullTransactionId(fxid),
+							EpochFromFullTransactionId(fxid))));
 		else
 			ereport(ERROR,
 					(errcode(ERRCODE_DATA_CORRUPTED),
-					 errmsg("corrupted two-phase state in memory for transaction %u",
-							xid)));
+					 errmsg("corrupted two-phase state in memory for transaction %u of epoch %u",
+							XidFromFullTransactionId(fxid),
+							EpochFromFullTransactionId(fxid))));
 	}
 
 	/*
@@ -2268,14 +2280,14 @@ ProcessTwoPhaseBuffer(TransactionId xid,
 	{
 		TransactionId subxid = subxids[i];
 
-		Assert(TransactionIdFollows(subxid, xid));
+		Assert(TransactionIdFollows(subxid, XidFromFullTransactionId(fxid)));
 
 		/* update nextXid if needed */
 		if (setNextXid)
 			AdvanceNextFullTransactionIdPastXid(subxid);
 
 		if (setParent)
-			SubTransSetParent(subxid, xid);
+			SubTransSetParent(subxid, XidFromFullTransactionId(fxid));
 	}
 
 	return buf;
@@ -2466,8 +2478,9 @@ RecordTransactionAbortPrepared(TransactionId xid,
  * data, the entry is marked as located on disk.
  */
 void
-PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
-			   XLogRecPtr end_lsn, RepOriginId origin_id)
+PrepareRedoAdd(FullTransactionId fxid, char *buf,
+			   XLogRecPtr start_lsn, XLogRecPtr end_lsn,
+			   RepOriginId origin_id)
 {
 	TwoPhaseFileHeader *hdr = (TwoPhaseFileHeader *) buf;
 	char	   *bufptr;
@@ -2477,6 +2490,13 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
 	Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE));
 	Assert(RecoveryInProgress());
 
+	if (!FullTransactionIdIsValid(fxid))
+	{
+		Assert(InRecovery);
+		fxid = FullTransactionIdFromAllowableAt(TransamVariables->nextXid,
+												hdr->xid);
+	}
+
 	bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
 	gid = (const char *) bufptr;
 
@@ -2505,14 +2525,15 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
 	{
 		char		path[MAXPGPATH];
 
-		TwoPhaseFilePath(path, hdr->xid);
+		Assert(InRecovery);
+		TwoPhaseFilePath(path, fxid);
 
 		if (access(path, F_OK) == 0)
 		{
 			ereport(reachedConsistency ? ERROR : WARNING,
 					(errmsg("could not recover two-phase state file for transaction %u",
 							hdr->xid),
-					 errdetail("Two-phase state file has been found in WAL record %X/%X, but this transaction has already been restored from disk.",
+					 errdetail("Two-phase state file has been found in WAL record %X/%08X, but this transaction has already been restored from disk.",
 							   LSN_FORMAT_ARGS(start_lsn))));
 			return;
 		}
@@ -2536,7 +2557,7 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
 	gxact->prepared_at = hdr->prepared_at;
 	gxact->prepare_start_lsn = start_lsn;
 	gxact->prepare_end_lsn = end_lsn;
-	gxact->xid = hdr->xid;
+	gxact->fxid = fxid;
 	gxact->owner = hdr->owner;
 	gxact->locking_backend = INVALID_PROC_NUMBER;
 	gxact->valid = false;
@@ -2555,11 +2576,13 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
 						   false /* backward */ , false /* WAL */ );
 	}
 
-	elog(DEBUG2, "added 2PC data in shared memory for transaction %u", gxact->xid);
+	elog(DEBUG2, "added 2PC data in shared memory for transaction %u of epoch %u",
+		 XidFromFullTransactionId(gxact->fxid),
+		 EpochFromFullTransactionId(gxact->fxid));
 }
 
 /*
- * PrepareRedoRemove
+ * PrepareRedoRemoveFull
  *
  * Remove the corresponding gxact entry from TwoPhaseState. Also remove
  * the 2PC file if a prepared transaction was saved via an earlier checkpoint.
@@ -2567,8 +2590,8 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn,
  * Caller must hold TwoPhaseStateLock in exclusive mode, because TwoPhaseState
  * is updated.
  */
-void
-PrepareRedoRemove(TransactionId xid, bool giveWarning)
+static void
+PrepareRedoRemoveFull(FullTransactionId fxid, bool giveWarning)
 {
 	GlobalTransaction gxact = NULL;
 	int			i;
@@ -2581,7 +2604,7 @@ PrepareRedoRemove(TransactionId xid, bool giveWarning)
 	{
 		gxact = TwoPhaseState->prepXacts[i];
 
-		if (gxact->xid == xid)
+		if (FullTransactionIdEquals(gxact->fxid, fxid))
 		{
 			Assert(gxact->inredo);
 			found = true;
@@ -2598,13 +2621,29 @@ PrepareRedoRemove(TransactionId xid, bool giveWarning)
 	/*
 	 * And now we can clean up any files we may have left.
 	 */
-	elog(DEBUG2, "removing 2PC data for transaction %u", xid);
+	elog(DEBUG2, "removing 2PC data for transaction %u of epoch %u ",
+		 XidFromFullTransactionId(fxid),
+		 EpochFromFullTransactionId(fxid));
+
 	if (gxact->ondisk)
-		RemoveTwoPhaseFile(xid, giveWarning);
+		RemoveTwoPhaseFile(fxid, giveWarning);
+
 	RemoveGXact(gxact);
 }
 
 /*
+ * Wrapper of PrepareRedoRemoveFull(), for TransactionIds.
+ */
+void
+PrepareRedoRemove(TransactionId xid, bool giveWarning)
+{
+	FullTransactionId fxid =
+		FullTransactionIdFromAllowableAt(TransamVariables->nextXid, xid);
+
+	PrepareRedoRemoveFull(fxid, giveWarning);
+}
+
+/*
  * LookupGXact
  *		Check if the prepared transaction with the given GID, lsn and timestamp
  *		exists.
@@ -2648,7 +2687,7 @@ LookupGXact(const char *gid, XLogRecPtr prepare_end_lsn,
 			 * between publisher and subscriber.
 			 */
 			if (gxact->ondisk)
-				buf = ReadTwoPhaseFile(gxact->xid, false);
+				buf = ReadTwoPhaseFile(gxact->fxid, false);
 			else
 			{
 				Assert(gxact->prepare_start_lsn);
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index b885513f765..41601fcb280 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -2515,7 +2515,7 @@ static void
 PrepareTransaction(void)
 {
 	TransactionState s = CurrentTransactionState;
-	TransactionId xid = GetCurrentTransactionId();
+	FullTransactionId fxid = GetCurrentFullTransactionId();
 	GlobalTransaction gxact;
 	TimestampTz prepared_at;
 
@@ -2644,7 +2644,7 @@ PrepareTransaction(void)
 	 * Reserve the GID for this transaction. This could fail if the requested
 	 * GID is invalid or already in use.
 	 */
-	gxact = MarkAsPreparing(xid, prepareGID, prepared_at,
+	gxact = MarkAsPreparing(fxid, prepareGID, prepared_at,
 							GetUserId(), MyDatabaseId);
 	prepareGID = NULL;
 
@@ -2694,7 +2694,7 @@ PrepareTransaction(void)
 	 * ProcArrayClearTransaction().  Otherwise, a GetLockConflicts() would
 	 * conclude "xact already committed or aborted" for our locks.
 	 */
-	PostPrepare_Locks(xid);
+	PostPrepare_Locks(fxid);
 
 	/*
 	 * Let others know about no transaction in progress by me.  This has to be
@@ -2738,9 +2738,9 @@ PrepareTransaction(void)
 
 	PostPrepare_smgr();
 
-	PostPrepare_MultiXact(xid);
+	PostPrepare_MultiXact(fxid);
 
-	PostPrepare_PredicateLocks(xid);
+	PostPrepare_PredicateLocks(fxid);
 
 	ResourceOwnerRelease(TopTransactionResourceOwner,
 						 RESOURCE_RELEASE_LOCKS,
@@ -6420,7 +6420,8 @@ xact_redo(XLogReaderState *record)
 		 * gxact entry.
 		 */
 		LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
-		PrepareRedoAdd(XLogRecGetData(record),
+		PrepareRedoAdd(InvalidFullTransactionId,
+					   XLogRecGetData(record),
 					   record->ReadRecPtr,
 					   record->EndRecPtr,
 					   XLogRecGetOrigin(record));
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 1914859b2ee..8e7827c6ed9 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -449,7 +449,6 @@ typedef struct XLogCtlData
 	/* Protected by info_lck: */
 	XLogwrtRqst LogwrtRqst;
 	XLogRecPtr	RedoRecPtr;		/* a recent copy of Insert->RedoRecPtr */
-	FullTransactionId ckptFullXid;	/* nextXid of latest checkpoint */
 	XLogRecPtr	asyncXactLSN;	/* LSN of newest async commit/abort */
 	XLogRecPtr	replicationSlotMinLSN;	/* oldest LSN needed by any slot */
 
@@ -1028,7 +1027,7 @@ XLogInsertRecord(XLogRecData *rdata,
 		oldCxt = MemoryContextSwitchTo(walDebugCxt);
 
 		initStringInfo(&buf);
-		appendStringInfo(&buf, "INSERT @ %X/%X: ", LSN_FORMAT_ARGS(EndPos));
+		appendStringInfo(&buf, "INSERT @ %X/%08X: ", LSN_FORMAT_ARGS(EndPos));
 
 		/*
 		 * We have to piece together the WAL record data from the XLogRecData
@@ -1549,8 +1548,8 @@ WaitXLogInsertionsToFinish(XLogRecPtr upto)
 	if (upto > reservedUpto)
 	{
 		ereport(LOG,
-				(errmsg("request to flush past end of generated WAL; request %X/%X, current position %X/%X",
-						LSN_FORMAT_ARGS(upto), LSN_FORMAT_ARGS(reservedUpto))));
+				errmsg("request to flush past end of generated WAL; request %X/%08X, current position %X/%08X",
+					   LSN_FORMAT_ARGS(upto), LSN_FORMAT_ARGS(reservedUpto)));
 		upto = reservedUpto;
 	}
 
@@ -1716,7 +1715,7 @@ GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli)
 		endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]);
 
 		if (expectedEndPtr != endptr)
-			elog(PANIC, "could not find WAL buffer for %X/%X",
+			elog(PANIC, "could not find WAL buffer for %X/%08X",
 				 LSN_FORMAT_ARGS(ptr));
 	}
 	else
@@ -1776,7 +1775,7 @@ WALReadFromBuffers(char *dstbuf, XLogRecPtr startptr, Size count,
 	inserted = pg_atomic_read_u64(&XLogCtl->logInsertResult);
 	if (startptr + count > inserted)
 		ereport(ERROR,
-				errmsg("cannot read past end of generated WAL: requested %X/%X, current position %X/%X",
+				errmsg("cannot read past end of generated WAL: requested %X/%08X, current position %X/%08X",
 					   LSN_FORMAT_ARGS(startptr + count),
 					   LSN_FORMAT_ARGS(inserted)));
 
@@ -2281,7 +2280,7 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic)
 #ifdef WAL_DEBUG
 	if (XLOG_DEBUG && npages > 0)
 	{
-		elog(DEBUG1, "initialized %d pages, up to %X/%X",
+		elog(DEBUG1, "initialized %d pages, up to %X/%08X",
 			 npages, LSN_FORMAT_ARGS(NewPageEndPtr));
 	}
 #endif
@@ -2347,25 +2346,6 @@ check_wal_segment_size(int *newval, void **extra, GucSource source)
 }
 
 /*
- * GUC check_hook for max_slot_wal_keep_size
- *
- * We don't allow the value of max_slot_wal_keep_size other than -1 during the
- * binary upgrade. See start_postmaster() in pg_upgrade for more details.
- */
-bool
-check_max_slot_wal_keep_size(int *newval, void **extra, GucSource source)
-{
-	if (IsBinaryUpgrade && *newval != -1)
-	{
-		GUC_check_errdetail("\"%s\" must be set to -1 during binary upgrade mode.",
-							"max_slot_wal_keep_size");
-		return false;
-	}
-
-	return true;
-}
-
-/*
  * At a checkpoint, how many WAL segments to recycle as preallocated future
  * XLOG segments? Returns the highest segment that should be preallocated.
  */
@@ -2492,7 +2472,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible)
 		XLogRecPtr	EndPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[curridx]);
 
 		if (LogwrtResult.Write >= EndPtr)
-			elog(PANIC, "xlog write request %X/%X is past end of log %X/%X",
+			elog(PANIC, "xlog write request %X/%08X is past end of log %X/%08X",
 				 LSN_FORMAT_ARGS(LogwrtResult.Write),
 				 LSN_FORMAT_ARGS(EndPtr));
 
@@ -2892,7 +2872,7 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
 		newMinRecoveryPoint = GetCurrentReplayRecPtr(&newMinRecoveryPointTLI);
 		if (!force && newMinRecoveryPoint < lsn)
 			elog(WARNING,
-				 "xlog min recovery request %X/%X is past current point %X/%X",
+				 "xlog min recovery request %X/%08X is past current point %X/%08X",
 				 LSN_FORMAT_ARGS(lsn), LSN_FORMAT_ARGS(newMinRecoveryPoint));
 
 		/* update control file */
@@ -2905,9 +2885,9 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
 			LocalMinRecoveryPointTLI = newMinRecoveryPointTLI;
 
 			ereport(DEBUG2,
-					(errmsg_internal("updated min recovery point to %X/%X on timeline %u",
-									 LSN_FORMAT_ARGS(newMinRecoveryPoint),
-									 newMinRecoveryPointTLI)));
+					errmsg_internal("updated min recovery point to %X/%08X on timeline %u",
+									LSN_FORMAT_ARGS(newMinRecoveryPoint),
+									newMinRecoveryPointTLI));
 		}
 	}
 	LWLockRelease(ControlFileLock);
@@ -2945,7 +2925,7 @@ XLogFlush(XLogRecPtr record)
 
 #ifdef WAL_DEBUG
 	if (XLOG_DEBUG)
-		elog(LOG, "xlog flush request %X/%X; write %X/%X; flush %X/%X",
+		elog(LOG, "xlog flush request %X/%08X; write %X/%08X; flush %X/%08X",
 			 LSN_FORMAT_ARGS(record),
 			 LSN_FORMAT_ARGS(LogwrtResult.Write),
 			 LSN_FORMAT_ARGS(LogwrtResult.Flush));
@@ -3078,7 +3058,7 @@ XLogFlush(XLogRecPtr record)
 	 */
 	if (LogwrtResult.Flush < record)
 		elog(ERROR,
-			 "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X",
+			 "xlog flush request %X/%08X is not satisfied --- flushed only to %X/%08X",
 			 LSN_FORMAT_ARGS(record),
 			 LSN_FORMAT_ARGS(LogwrtResult.Flush));
 }
@@ -3205,7 +3185,7 @@ XLogBackgroundFlush(void)
 
 #ifdef WAL_DEBUG
 	if (XLOG_DEBUG)
-		elog(LOG, "xlog bg flush request write %X/%X; flush: %X/%X, current is write %X/%X; flush %X/%X",
+		elog(LOG, "xlog bg flush request write %X/%08X; flush: %X/%08X, current is write %X/%08X; flush %X/%08X",
 			 LSN_FORMAT_ARGS(WriteRqst.Write),
 			 LSN_FORMAT_ARGS(WriteRqst.Flush),
 			 LSN_FORMAT_ARGS(LogwrtResult.Write),
@@ -5763,7 +5743,6 @@ StartupXLOG(void)
 	SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB, true);
 	SetCommitTsLimit(checkPoint.oldestCommitTsXid,
 					 checkPoint.newestCommitTsXid);
-	XLogCtl->ckptFullXid = checkPoint.nextXid;
 
 	/*
 	 * Clear out any old relcache cache files.  This is *necessary* if we do
@@ -6505,7 +6484,7 @@ PerformRecoveryXLogAction(void)
 	else
 	{
 		RequestCheckpoint(CHECKPOINT_END_OF_RECOVERY |
-						  CHECKPOINT_IMMEDIATE |
+						  CHECKPOINT_FAST |
 						  CHECKPOINT_WAIT);
 	}
 
@@ -6814,7 +6793,7 @@ ShutdownXLOG(int code, Datum arg)
 	WalSndWaitStopping();
 
 	if (RecoveryInProgress())
-		CreateRestartPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
+		CreateRestartPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_FAST);
 	else
 	{
 		/*
@@ -6826,7 +6805,7 @@ ShutdownXLOG(int code, Datum arg)
 		if (XLogArchivingActive())
 			RequestXLogSwitch(false);
 
-		CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_IMMEDIATE);
+		CreateCheckPoint(CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_FAST);
 	}
 }
 
@@ -6842,24 +6821,24 @@ LogCheckpointStart(int flags, bool restartpoint)
 				(errmsg("restartpoint starting:%s%s%s%s%s%s%s%s",
 						(flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
 						(flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
-						(flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
+						(flags & CHECKPOINT_FAST) ? " fast" : "",
 						(flags & CHECKPOINT_FORCE) ? " force" : "",
 						(flags & CHECKPOINT_WAIT) ? " wait" : "",
 						(flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
 						(flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
-						(flags & CHECKPOINT_FLUSH_ALL) ? " flush-all" : "")));
+						(flags & CHECKPOINT_FLUSH_UNLOGGED) ? " flush-unlogged" : "")));
 	else
 		ereport(LOG,
 		/* translator: the placeholders show checkpoint options */
 				(errmsg("checkpoint starting:%s%s%s%s%s%s%s%s",
 						(flags & CHECKPOINT_IS_SHUTDOWN) ? " shutdown" : "",
 						(flags & CHECKPOINT_END_OF_RECOVERY) ? " end-of-recovery" : "",
-						(flags & CHECKPOINT_IMMEDIATE) ? " immediate" : "",
+						(flags & CHECKPOINT_FAST) ? " fast" : "",
 						(flags & CHECKPOINT_FORCE) ? " force" : "",
 						(flags & CHECKPOINT_WAIT) ? " wait" : "",
 						(flags & CHECKPOINT_CAUSE_XLOG) ? " wal" : "",
 						(flags & CHECKPOINT_CAUSE_TIME) ? " time" : "",
-						(flags & CHECKPOINT_FLUSH_ALL) ? " flush-all" : "")));
+						(flags & CHECKPOINT_FLUSH_UNLOGGED) ? " flush-unlogged" : "")));
 }
 
 /*
@@ -6921,7 +6900,7 @@ LogCheckpointEnd(bool restartpoint)
 						"%d removed, %d recycled; write=%ld.%03d s, "
 						"sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
 						"longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
-						"estimate=%d kB; lsn=%X/%X, redo lsn=%X/%X",
+						"estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
 						CheckpointStats.ckpt_bufs_written,
 						(double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
 						CheckpointStats.ckpt_slru_written,
@@ -6945,7 +6924,7 @@ LogCheckpointEnd(bool restartpoint)
 						"%d removed, %d recycled; write=%ld.%03d s, "
 						"sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, "
 						"longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, "
-						"estimate=%d kB; lsn=%X/%X, redo lsn=%X/%X",
+						"estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X",
 						CheckpointStats.ckpt_bufs_written,
 						(double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers,
 						CheckpointStats.ckpt_slru_written,
@@ -7042,12 +7021,12 @@ update_checkpoint_display(int flags, bool restartpoint, bool reset)
  * flags is a bitwise OR of the following:
  *	CHECKPOINT_IS_SHUTDOWN: checkpoint is for database shutdown.
  *	CHECKPOINT_END_OF_RECOVERY: checkpoint is for end of WAL recovery.
- *	CHECKPOINT_IMMEDIATE: finish the checkpoint ASAP,
- *		ignoring checkpoint_completion_target parameter.
+ *	CHECKPOINT_FAST: finish the checkpoint ASAP, ignoring
+ *		checkpoint_completion_target parameter.
  *	CHECKPOINT_FORCE: force a checkpoint even if no XLOG activity has occurred
  *		since the last one (implied by CHECKPOINT_IS_SHUTDOWN or
  *		CHECKPOINT_END_OF_RECOVERY).
- *	CHECKPOINT_FLUSH_ALL: also flush buffers of unlogged tables.
+ *	CHECKPOINT_FLUSH_UNLOGGED: also flush buffers of unlogged tables.
  *
  * Note: flags contains other bits, of interest here only for logging purposes.
  * In particular note that this routine is synchronous and does not pay
@@ -7456,11 +7435,6 @@ CreateCheckPoint(int flags)
 	UpdateControlFile();
 	LWLockRelease(ControlFileLock);
 
-	/* Update shared-memory copy of checkpoint XID/epoch */
-	SpinLockAcquire(&XLogCtl->info_lck);
-	XLogCtl->ckptFullXid = checkPoint.nextXid;
-	SpinLockRelease(&XLogCtl->info_lck);
-
 	/*
 	 * We are now done with critical updates; no need for system panic if we
 	 * have trouble while fooling with old log segments.
@@ -7498,6 +7472,10 @@ CreateCheckPoint(int flags)
 	if (PriorRedoPtr != InvalidXLogRecPtr)
 		UpdateCheckPointDistanceEstimate(RedoRecPtr - PriorRedoPtr);
 
+#ifdef USE_INJECTION_POINTS
+	INJECTION_POINT("checkpoint-before-old-wal-removal", NULL);
+#endif
+
 	/*
 	 * Delete old log files, those no longer needed for last checkpoint to
 	 * prevent the disk holding the xlog from growing full.
@@ -7637,7 +7615,7 @@ CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr,
 	if (!RecoveryInProgress())
 		elog(ERROR, "can only be used at end of recovery");
 	if (pagePtr % XLOG_BLCKSZ != 0)
-		elog(ERROR, "invalid position for missing continuation record %X/%X",
+		elog(ERROR, "invalid position for missing continuation record %X/%08X",
 			 LSN_FORMAT_ARGS(pagePtr));
 
 	/* The current WAL insert position should be right after the page header */
@@ -7648,7 +7626,7 @@ CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr,
 		startPos += SizeOfXLogShortPHD;
 	recptr = GetXLogInsertRecPtr();
 	if (recptr != startPos)
-		elog(ERROR, "invalid WAL insert position %X/%X for OVERWRITE_CONTRECORD",
+		elog(ERROR, "invalid WAL insert position %X/%08X for OVERWRITE_CONTRECORD",
 			 LSN_FORMAT_ARGS(recptr));
 
 	START_CRIT_SECTION();
@@ -7678,7 +7656,7 @@ CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr,
 
 	/* check that the record was inserted to the right place */
 	if (ProcLastRecPtr != startPos)
-		elog(ERROR, "OVERWRITE_CONTRECORD was inserted to unexpected position %X/%X",
+		elog(ERROR, "OVERWRITE_CONTRECORD was inserted to unexpected position %X/%08X",
 			 LSN_FORMAT_ARGS(ProcLastRecPtr));
 
 	XLogFlush(recptr);
@@ -7747,8 +7725,7 @@ RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record)
 	if (XLogHaveInvalidPages())
 	{
 		elog(DEBUG2,
-			 "could not record restart point at %X/%X because there "
-			 "are unresolved references to invalid pages",
+			 "could not record restart point at %X/%08X because there are unresolved references to invalid pages",
 			 LSN_FORMAT_ARGS(checkPoint->redo));
 		return;
 	}
@@ -7828,8 +7805,8 @@ CreateRestartPoint(int flags)
 		lastCheckPoint.redo <= ControlFile->checkPointCopy.redo)
 	{
 		ereport(DEBUG2,
-				(errmsg_internal("skipping restartpoint, already performed at %X/%X",
-								 LSN_FORMAT_ARGS(lastCheckPoint.redo))));
+				errmsg_internal("skipping restartpoint, already performed at %X/%08X",
+								LSN_FORMAT_ARGS(lastCheckPoint.redo)));
 
 		UpdateMinRecoveryPoint(InvalidXLogRecPtr, true);
 		if (flags & CHECKPOINT_IS_SHUTDOWN)
@@ -8013,10 +7990,10 @@ CreateRestartPoint(int flags)
 
 	xtime = GetLatestXTime();
 	ereport((log_checkpoints ? LOG : DEBUG2),
-			(errmsg("recovery restart point at %X/%X",
-					LSN_FORMAT_ARGS(lastCheckPoint.redo)),
-			 xtime ? errdetail("Last completed transaction was at log time %s.",
-							   timestamptz_to_str(xtime)) : 0));
+			errmsg("recovery restart point at %X/%08X",
+				   LSN_FORMAT_ARGS(lastCheckPoint.redo)),
+			xtime ? errdetail("Last completed transaction was at log time %s.",
+							  timestamptz_to_str(xtime)) : 0);
 
 	/*
 	 * Finally, execute archive_cleanup_command, if any.
@@ -8147,17 +8124,19 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
 	XLByteToSeg(recptr, currSegNo, wal_segment_size);
 	segno = currSegNo;
 
-	/*
-	 * Calculate how many segments are kept by slots first, adjusting for
-	 * max_slot_wal_keep_size.
-	 */
+	/* Calculate how many segments are kept by slots. */
 	keep = XLogGetReplicationSlotMinimumLSN();
 	if (keep != InvalidXLogRecPtr && keep < recptr)
 	{
 		XLByteToSeg(keep, segno, wal_segment_size);
 
-		/* Cap by max_slot_wal_keep_size ... */
-		if (max_slot_wal_keep_size_mb >= 0)
+		/*
+		 * Account for max_slot_wal_keep_size to avoid keeping more than
+		 * configured.  However, don't do that during a binary upgrade: if
+		 * slots were to be invalidated because of this, it would not be
+		 * possible to preserve logical ones during the upgrade.
+		 */
+		if (max_slot_wal_keep_size_mb >= 0 && !IsBinaryUpgrade)
 		{
 			uint64		slot_keep_segs;
 
@@ -8277,8 +8256,8 @@ XLogRestorePoint(const char *rpName)
 	RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
 
 	ereport(LOG,
-			(errmsg("restore point \"%s\" created at %X/%X",
-					rpName, LSN_FORMAT_ARGS(RecPtr))));
+			errmsg("restore point \"%s\" created at %X/%08X",
+				   rpName, LSN_FORMAT_ARGS(RecPtr)));
 
 	return RecPtr;
 }
@@ -8530,11 +8509,6 @@ xlog_redo(XLogReaderState *record)
 		ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
 		LWLockRelease(ControlFileLock);
 
-		/* Update shared-memory copy of checkpoint XID/epoch */
-		SpinLockAcquire(&XLogCtl->info_lck);
-		XLogCtl->ckptFullXid = checkPoint.nextXid;
-		SpinLockRelease(&XLogCtl->info_lck);
-
 		/*
 		 * We should've already switched to the new TLI before replaying this
 		 * record.
@@ -8591,11 +8565,6 @@ xlog_redo(XLogReaderState *record)
 		ControlFile->checkPointCopy.nextXid = checkPoint.nextXid;
 		LWLockRelease(ControlFileLock);
 
-		/* Update shared-memory copy of checkpoint XID/epoch */
-		SpinLockAcquire(&XLogCtl->info_lck);
-		XLogCtl->ckptFullXid = checkPoint.nextXid;
-		SpinLockRelease(&XLogCtl->info_lck);
-
 		/* TLI should not change in an on-line checkpoint */
 		(void) GetCurrentReplayRecPtr(&replayTLI);
 		if (checkPoint.ThisTimeLineID != replayTLI)
@@ -8943,9 +8912,8 @@ issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli)
  * backup state and tablespace map.
  *
  * Input parameters are "state" (the backup state), "fast" (if true, we do
- * the checkpoint in immediate mode to make it faster), and "tablespaces"
- * (if non-NULL, indicates a list of tablespaceinfo structs describing the
- * cluster's tablespaces.).
+ * the checkpoint in fast mode), and "tablespaces" (if non-NULL, indicates a
+ * list of tablespaceinfo structs describing the cluster's tablespaces.).
  *
  * The tablespace map contents are appended to passed-in parameter
  * tablespace_map and the caller is responsible for including it in the backup
@@ -9073,11 +9041,11 @@ do_pg_backup_start(const char *backupidstr, bool fast, List **tablespaces,
 			 * during recovery means that checkpointer is running, we can use
 			 * RequestCheckpoint() to establish a restartpoint.
 			 *
-			 * We use CHECKPOINT_IMMEDIATE only if requested by user (via
-			 * passing fast = true).  Otherwise this can take awhile.
+			 * We use CHECKPOINT_FAST only if requested by user (via passing
+			 * fast = true).  Otherwise this can take awhile.
 			 */
 			RequestCheckpoint(CHECKPOINT_FORCE | CHECKPOINT_WAIT |
-							  (fast ? CHECKPOINT_IMMEDIATE : 0));
+							  (fast ? CHECKPOINT_FAST : 0));
 
 			/*
 			 * Now we need to fetch the checkpoint record location, and also
diff --git a/src/backend/access/transam/xlogbackup.c b/src/backend/access/transam/xlogbackup.c
index 342590e0a46..cda4b38b7d6 100644
--- a/src/backend/access/transam/xlogbackup.c
+++ b/src/backend/access/transam/xlogbackup.c
@@ -42,7 +42,7 @@ build_backup_content(BackupState *state, bool ishistoryfile)
 
 	XLByteToSeg(state->startpoint, startsegno, wal_segment_size);
 	XLogFileName(startxlogfile, state->starttli, startsegno, wal_segment_size);
-	appendStringInfo(result, "START WAL LOCATION: %X/%X (file %s)\n",
+	appendStringInfo(result, "START WAL LOCATION: %X/%08X (file %s)\n",
 					 LSN_FORMAT_ARGS(state->startpoint), startxlogfile);
 
 	if (ishistoryfile)
@@ -52,11 +52,11 @@ build_backup_content(BackupState *state, bool ishistoryfile)
 
 		XLByteToSeg(state->stoppoint, stopsegno, wal_segment_size);
 		XLogFileName(stopxlogfile, state->stoptli, stopsegno, wal_segment_size);
-		appendStringInfo(result, "STOP WAL LOCATION: %X/%X (file %s)\n",
+		appendStringInfo(result, "STOP WAL LOCATION: %X/%08X (file %s)\n",
 						 LSN_FORMAT_ARGS(state->stoppoint), stopxlogfile);
 	}
 
-	appendStringInfo(result, "CHECKPOINT LOCATION: %X/%X\n",
+	appendStringInfo(result, "CHECKPOINT LOCATION: %X/%08X\n",
 					 LSN_FORMAT_ARGS(state->checkpointloc));
 	appendStringInfoString(result, "BACKUP METHOD: streamed\n");
 	appendStringInfo(result, "BACKUP FROM: %s\n",
@@ -81,7 +81,7 @@ build_backup_content(BackupState *state, bool ishistoryfile)
 	Assert(XLogRecPtrIsInvalid(state->istartpoint) == (state->istarttli == 0));
 	if (!XLogRecPtrIsInvalid(state->istartpoint))
 	{
-		appendStringInfo(result, "INCREMENTAL FROM LSN: %X/%X\n",
+		appendStringInfo(result, "INCREMENTAL FROM LSN: %X/%08X\n",
 						 LSN_FORMAT_ARGS(state->istartpoint));
 		appendStringInfo(result, "INCREMENTAL FROM TLI: %u\n",
 						 state->istarttli);
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index 5ee9d0b028e..c7571429e8e 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -530,6 +530,18 @@ XLogInsert(RmgrId rmid, uint8 info)
 }
 
 /*
+ * Simple wrapper to XLogInsert to insert a WAL record with elementary
+ * contents (only an int64 is supported as value currently).
+ */
+XLogRecPtr
+XLogSimpleInsertInt64(RmgrId rmid, uint8 info, int64 value)
+{
+	XLogBeginInsert();
+	XLogRegisterData(&value, sizeof(value));
+	return XLogInsert(rmid, info);
+}
+
+/*
  * Assemble a WAL record from the registered data and buffers into an
  * XLogRecData chain, ready for insertion with XLogInsertRecord().
  *
diff --git a/src/backend/access/transam/xlogprefetcher.c b/src/backend/access/transam/xlogprefetcher.c
index 7735562db01..ed3aacabc98 100644
--- a/src/backend/access/transam/xlogprefetcher.c
+++ b/src/backend/access/transam/xlogprefetcher.c
@@ -546,7 +546,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
 
 #ifdef XLOGPREFETCHER_DEBUG_LEVEL
 					elog(XLOGPREFETCHER_DEBUG_LEVEL,
-						 "suppressing all readahead until %X/%X is replayed due to possible TLI change",
+						 "suppressing all readahead until %X/%08X is replayed due to possible TLI change",
 						 LSN_FORMAT_ARGS(record->lsn));
 #endif
 
@@ -579,7 +579,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
 
 #ifdef XLOGPREFETCHER_DEBUG_LEVEL
 					elog(XLOGPREFETCHER_DEBUG_LEVEL,
-						 "suppressing prefetch in database %u until %X/%X is replayed due to raw file copy",
+						 "suppressing prefetch in database %u until %X/%08X is replayed due to raw file copy",
 						 rlocator.dbOid,
 						 LSN_FORMAT_ARGS(record->lsn));
 #endif
@@ -607,7 +607,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
 
 #ifdef XLOGPREFETCHER_DEBUG_LEVEL
 						elog(XLOGPREFETCHER_DEBUG_LEVEL,
-							 "suppressing prefetch in relation %u/%u/%u until %X/%X is replayed, which creates the relation",
+							 "suppressing prefetch in relation %u/%u/%u until %X/%08X is replayed, which creates the relation",
 							 xlrec->rlocator.spcOid,
 							 xlrec->rlocator.dbOid,
 							 xlrec->rlocator.relNumber,
@@ -630,7 +630,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
 
 #ifdef XLOGPREFETCHER_DEBUG_LEVEL
 					elog(XLOGPREFETCHER_DEBUG_LEVEL,
-						 "suppressing prefetch in relation %u/%u/%u from block %u until %X/%X is replayed, which truncates the relation",
+						 "suppressing prefetch in relation %u/%u/%u from block %u until %X/%08X is replayed, which truncates the relation",
 						 xlrec->rlocator.spcOid,
 						 xlrec->rlocator.dbOid,
 						 xlrec->rlocator.relNumber,
@@ -729,7 +729,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
 			{
 #ifdef XLOGPREFETCHER_DEBUG_LEVEL
 				elog(XLOGPREFETCHER_DEBUG_LEVEL,
-					 "suppressing all prefetch in relation %u/%u/%u until %X/%X is replayed, because the relation does not exist on disk",
+					 "suppressing all prefetch in relation %u/%u/%u until %X/%08X is replayed, because the relation does not exist on disk",
 					 reln->smgr_rlocator.locator.spcOid,
 					 reln->smgr_rlocator.locator.dbOid,
 					 reln->smgr_rlocator.locator.relNumber,
@@ -750,7 +750,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn)
 			{
 #ifdef XLOGPREFETCHER_DEBUG_LEVEL
 				elog(XLOGPREFETCHER_DEBUG_LEVEL,
-					 "suppressing prefetch in relation %u/%u/%u from block %u until %X/%X is replayed, because the relation is too small",
+					 "suppressing prefetch in relation %u/%u/%u from block %u until %X/%08X is replayed, because the relation is too small",
 					 reln->smgr_rlocator.locator.spcOid,
 					 reln->smgr_rlocator.locator.dbOid,
 					 reln->smgr_rlocator.locator.relNumber,
@@ -928,7 +928,7 @@ XLogPrefetcherIsFiltered(XLogPrefetcher *prefetcher, RelFileLocator rlocator,
 		{
 #ifdef XLOGPREFETCHER_DEBUG_LEVEL
 			elog(XLOGPREFETCHER_DEBUG_LEVEL,
-				 "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%X is replayed (blocks >= %u filtered)",
+				 "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%08X is replayed (blocks >= %u filtered)",
 				 rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno,
 				 LSN_FORMAT_ARGS(filter->filter_until_replayed),
 				 filter->filter_from_block);
@@ -944,7 +944,7 @@ XLogPrefetcherIsFiltered(XLogPrefetcher *prefetcher, RelFileLocator rlocator,
 		{
 #ifdef XLOGPREFETCHER_DEBUG_LEVEL
 			elog(XLOGPREFETCHER_DEBUG_LEVEL,
-				 "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%X is replayed (whole database)",
+				 "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%08X is replayed (whole database)",
 				 rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno,
 				 LSN_FORMAT_ARGS(filter->filter_until_replayed));
 #endif
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 2790ade1f91..ac1f801b1eb 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -617,7 +617,7 @@ restart:
 	}
 	else if (targetRecOff < pageHeaderSize)
 	{
-		report_invalid_record(state, "invalid record offset at %X/%X: expected at least %u, got %u",
+		report_invalid_record(state, "invalid record offset at %X/%08X: expected at least %u, got %u",
 							  LSN_FORMAT_ARGS(RecPtr),
 							  pageHeaderSize, targetRecOff);
 		goto err;
@@ -626,7 +626,7 @@ restart:
 	if ((((XLogPageHeader) state->readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) &&
 		targetRecOff == pageHeaderSize)
 	{
-		report_invalid_record(state, "contrecord is requested by %X/%X",
+		report_invalid_record(state, "contrecord is requested by %X/%08X",
 							  LSN_FORMAT_ARGS(RecPtr));
 		goto err;
 	}
@@ -667,7 +667,7 @@ restart:
 		if (total_len < SizeOfXLogRecord)
 		{
 			report_invalid_record(state,
-								  "invalid record length at %X/%X: expected at least %u, got %u",
+								  "invalid record length at %X/%08X: expected at least %u, got %u",
 								  LSN_FORMAT_ARGS(RecPtr),
 								  (uint32) SizeOfXLogRecord, total_len);
 			goto err;
@@ -756,7 +756,7 @@ restart:
 			if (!(pageHeader->xlp_info & XLP_FIRST_IS_CONTRECORD))
 			{
 				report_invalid_record(state,
-									  "there is no contrecord flag at %X/%X",
+									  "there is no contrecord flag at %X/%08X",
 									  LSN_FORMAT_ARGS(RecPtr));
 				goto err;
 			}
@@ -769,7 +769,7 @@ restart:
 				total_len != (pageHeader->xlp_rem_len + gotlen))
 			{
 				report_invalid_record(state,
-									  "invalid contrecord length %u (expected %lld) at %X/%X",
+									  "invalid contrecord length %u (expected %lld) at %X/%08X",
 									  pageHeader->xlp_rem_len,
 									  ((long long) total_len) - gotlen,
 									  LSN_FORMAT_ARGS(RecPtr));
@@ -1132,7 +1132,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
 	if (record->xl_tot_len < SizeOfXLogRecord)
 	{
 		report_invalid_record(state,
-							  "invalid record length at %X/%X: expected at least %u, got %u",
+							  "invalid record length at %X/%08X: expected at least %u, got %u",
 							  LSN_FORMAT_ARGS(RecPtr),
 							  (uint32) SizeOfXLogRecord, record->xl_tot_len);
 		return false;
@@ -1140,7 +1140,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
 	if (!RmgrIdIsValid(record->xl_rmid))
 	{
 		report_invalid_record(state,
-							  "invalid resource manager ID %u at %X/%X",
+							  "invalid resource manager ID %u at %X/%08X",
 							  record->xl_rmid, LSN_FORMAT_ARGS(RecPtr));
 		return false;
 	}
@@ -1153,7 +1153,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
 		if (!(record->xl_prev < RecPtr))
 		{
 			report_invalid_record(state,
-								  "record with incorrect prev-link %X/%X at %X/%X",
+								  "record with incorrect prev-link %X/%08X at %X/%08X",
 								  LSN_FORMAT_ARGS(record->xl_prev),
 								  LSN_FORMAT_ARGS(RecPtr));
 			return false;
@@ -1169,7 +1169,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
 		if (record->xl_prev != PrevRecPtr)
 		{
 			report_invalid_record(state,
-								  "record with incorrect prev-link %X/%X at %X/%X",
+								  "record with incorrect prev-link %X/%08X at %X/%08X",
 								  LSN_FORMAT_ARGS(record->xl_prev),
 								  LSN_FORMAT_ARGS(RecPtr));
 			return false;
@@ -1207,7 +1207,7 @@ ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
 	if (!EQ_CRC32C(record->xl_crc, crc))
 	{
 		report_invalid_record(state,
-							  "incorrect resource manager data checksum in record at %X/%X",
+							  "incorrect resource manager data checksum in record at %X/%08X",
 							  LSN_FORMAT_ARGS(recptr));
 		return false;
 	}
@@ -1241,7 +1241,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
 		XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
 
 		report_invalid_record(state,
-							  "invalid magic number %04X in WAL segment %s, LSN %X/%X, offset %u",
+							  "invalid magic number %04X in WAL segment %s, LSN %X/%08X, offset %u",
 							  hdr->xlp_magic,
 							  fname,
 							  LSN_FORMAT_ARGS(recptr),
@@ -1256,7 +1256,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
 		XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
 
 		report_invalid_record(state,
-							  "invalid info bits %04X in WAL segment %s, LSN %X/%X, offset %u",
+							  "invalid info bits %04X in WAL segment %s, LSN %X/%08X, offset %u",
 							  hdr->xlp_info,
 							  fname,
 							  LSN_FORMAT_ARGS(recptr),
@@ -1298,7 +1298,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
 
 		/* hmm, first page of file doesn't have a long header? */
 		report_invalid_record(state,
-							  "invalid info bits %04X in WAL segment %s, LSN %X/%X, offset %u",
+							  "invalid info bits %04X in WAL segment %s, LSN %X/%08X, offset %u",
 							  hdr->xlp_info,
 							  fname,
 							  LSN_FORMAT_ARGS(recptr),
@@ -1318,7 +1318,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
 		XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
 
 		report_invalid_record(state,
-							  "unexpected pageaddr %X/%X in WAL segment %s, LSN %X/%X, offset %u",
+							  "unexpected pageaddr %X/%08X in WAL segment %s, LSN %X/%08X, offset %u",
 							  LSN_FORMAT_ARGS(hdr->xlp_pageaddr),
 							  fname,
 							  LSN_FORMAT_ARGS(recptr),
@@ -1344,7 +1344,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr,
 			XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize);
 
 			report_invalid_record(state,
-								  "out-of-sequence timeline ID %u (after %u) in WAL segment %s, LSN %X/%X, offset %u",
+								  "out-of-sequence timeline ID %u (after %u) in WAL segment %s, LSN %X/%08X, offset %u",
 								  hdr->xlp_tli,
 								  state->latestPageTLI,
 								  fname,
@@ -1756,7 +1756,7 @@ DecodeXLogRecord(XLogReaderState *state,
 			if (block_id <= decoded->max_block_id)
 			{
 				report_invalid_record(state,
-									  "out-of-order block_id %u at %X/%X",
+									  "out-of-order block_id %u at %X/%08X",
 									  block_id,
 									  LSN_FORMAT_ARGS(state->ReadRecPtr));
 				goto err;
@@ -1780,14 +1780,14 @@ DecodeXLogRecord(XLogReaderState *state,
 			if (blk->has_data && blk->data_len == 0)
 			{
 				report_invalid_record(state,
-									  "BKPBLOCK_HAS_DATA set, but no data included at %X/%X",
+									  "BKPBLOCK_HAS_DATA set, but no data included at %X/%08X",
 									  LSN_FORMAT_ARGS(state->ReadRecPtr));
 				goto err;
 			}
 			if (!blk->has_data && blk->data_len != 0)
 			{
 				report_invalid_record(state,
-									  "BKPBLOCK_HAS_DATA not set, but data length is %u at %X/%X",
+									  "BKPBLOCK_HAS_DATA not set, but data length is %u at %X/%08X",
 									  (unsigned int) blk->data_len,
 									  LSN_FORMAT_ARGS(state->ReadRecPtr));
 				goto err;
@@ -1823,7 +1823,7 @@ DecodeXLogRecord(XLogReaderState *state,
 					 blk->bimg_len == BLCKSZ))
 				{
 					report_invalid_record(state,
-										  "BKPIMAGE_HAS_HOLE set, but hole offset %u length %u block image length %u at %X/%X",
+										  "BKPIMAGE_HAS_HOLE set, but hole offset %u length %u block image length %u at %X/%08X",
 										  (unsigned int) blk->hole_offset,
 										  (unsigned int) blk->hole_length,
 										  (unsigned int) blk->bimg_len,
@@ -1839,7 +1839,7 @@ DecodeXLogRecord(XLogReaderState *state,
 					(blk->hole_offset != 0 || blk->hole_length != 0))
 				{
 					report_invalid_record(state,
-										  "BKPIMAGE_HAS_HOLE not set, but hole offset %u length %u at %X/%X",
+										  "BKPIMAGE_HAS_HOLE not set, but hole offset %u length %u at %X/%08X",
 										  (unsigned int) blk->hole_offset,
 										  (unsigned int) blk->hole_length,
 										  LSN_FORMAT_ARGS(state->ReadRecPtr));
@@ -1853,7 +1853,7 @@ DecodeXLogRecord(XLogReaderState *state,
 					blk->bimg_len == BLCKSZ)
 				{
 					report_invalid_record(state,
-										  "BKPIMAGE_COMPRESSED set, but block image length %u at %X/%X",
+										  "BKPIMAGE_COMPRESSED set, but block image length %u at %X/%08X",
 										  (unsigned int) blk->bimg_len,
 										  LSN_FORMAT_ARGS(state->ReadRecPtr));
 					goto err;
@@ -1868,7 +1868,7 @@ DecodeXLogRecord(XLogReaderState *state,
 					blk->bimg_len != BLCKSZ)
 				{
 					report_invalid_record(state,
-										  "neither BKPIMAGE_HAS_HOLE nor BKPIMAGE_COMPRESSED set, but block image length is %u at %X/%X",
+										  "neither BKPIMAGE_HAS_HOLE nor BKPIMAGE_COMPRESSED set, but block image length is %u at %X/%08X",
 										  (unsigned int) blk->data_len,
 										  LSN_FORMAT_ARGS(state->ReadRecPtr));
 					goto err;
@@ -1884,7 +1884,7 @@ DecodeXLogRecord(XLogReaderState *state,
 				if (rlocator == NULL)
 				{
 					report_invalid_record(state,
-										  "BKPBLOCK_SAME_REL set but no previous rel at %X/%X",
+										  "BKPBLOCK_SAME_REL set but no previous rel at %X/%08X",
 										  LSN_FORMAT_ARGS(state->ReadRecPtr));
 					goto err;
 				}
@@ -1896,7 +1896,7 @@ DecodeXLogRecord(XLogReaderState *state,
 		else
 		{
 			report_invalid_record(state,
-								  "invalid block_id %u at %X/%X",
+								  "invalid block_id %u at %X/%08X",
 								  block_id, LSN_FORMAT_ARGS(state->ReadRecPtr));
 			goto err;
 		}
@@ -1963,7 +1963,7 @@ DecodeXLogRecord(XLogReaderState *state,
 
 shortdata_err:
 	report_invalid_record(state,
-						  "record with invalid length at %X/%X",
+						  "record with invalid length at %X/%08X",
 						  LSN_FORMAT_ARGS(state->ReadRecPtr));
 err:
 	*errormsg = state->errormsg_buf;
@@ -2073,14 +2073,14 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
 		!record->record->blocks[block_id].in_use)
 	{
 		report_invalid_record(record,
-							  "could not restore image at %X/%X with invalid block %d specified",
+							  "could not restore image at %X/%08X with invalid block %d specified",
 							  LSN_FORMAT_ARGS(record->ReadRecPtr),
 							  block_id);
 		return false;
 	}
 	if (!record->record->blocks[block_id].has_image)
 	{
-		report_invalid_record(record, "could not restore image at %X/%X with invalid state, block %d",
+		report_invalid_record(record, "could not restore image at %X/%08X with invalid state, block %d",
 							  LSN_FORMAT_ARGS(record->ReadRecPtr),
 							  block_id);
 		return false;
@@ -2107,7 +2107,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
 									bkpb->bimg_len, BLCKSZ - bkpb->hole_length) <= 0)
 				decomp_success = false;
 #else
-			report_invalid_record(record, "could not restore image at %X/%X compressed with %s not supported by build, block %d",
+			report_invalid_record(record, "could not restore image at %X/%08X compressed with %s not supported by build, block %d",
 								  LSN_FORMAT_ARGS(record->ReadRecPtr),
 								  "LZ4",
 								  block_id);
@@ -2124,7 +2124,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
 			if (ZSTD_isError(decomp_result))
 				decomp_success = false;
 #else
-			report_invalid_record(record, "could not restore image at %X/%X compressed with %s not supported by build, block %d",
+			report_invalid_record(record, "could not restore image at %X/%08X compressed with %s not supported by build, block %d",
 								  LSN_FORMAT_ARGS(record->ReadRecPtr),
 								  "zstd",
 								  block_id);
@@ -2133,7 +2133,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
 		}
 		else
 		{
-			report_invalid_record(record, "could not restore image at %X/%X compressed with unknown method, block %d",
+			report_invalid_record(record, "could not restore image at %X/%08X compressed with unknown method, block %d",
 								  LSN_FORMAT_ARGS(record->ReadRecPtr),
 								  block_id);
 			return false;
@@ -2141,7 +2141,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
 
 		if (!decomp_success)
 		{
-			report_invalid_record(record, "could not decompress image at %X/%X, block %d",
+			report_invalid_record(record, "could not decompress image at %X/%08X, block %d",
 								  LSN_FORMAT_ARGS(record->ReadRecPtr),
 								  block_id);
 			return false;
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index 6ce979f2d8b..23878b2dd91 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -620,10 +620,10 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
 		 * than ControlFile->checkPoint is used.
 		 */
 		ereport(LOG,
-				(errmsg("starting backup recovery with redo LSN %X/%X, checkpoint LSN %X/%X, on timeline ID %u",
-						LSN_FORMAT_ARGS(RedoStartLSN),
-						LSN_FORMAT_ARGS(CheckPointLoc),
-						CheckPointTLI)));
+				errmsg("starting backup recovery with redo LSN %X/%08X, checkpoint LSN %X/%08X, on timeline ID %u",
+					   LSN_FORMAT_ARGS(RedoStartLSN),
+					   LSN_FORMAT_ARGS(CheckPointLoc),
+					   CheckPointTLI));
 
 		/*
 		 * When a backup_label file is present, we want to roll forward from
@@ -636,8 +636,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
 			memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
 			wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
 			ereport(DEBUG1,
-					(errmsg_internal("checkpoint record is at %X/%X",
-									 LSN_FORMAT_ARGS(CheckPointLoc))));
+					errmsg_internal("checkpoint record is at %X/%08X",
+									LSN_FORMAT_ARGS(CheckPointLoc)));
 			InRecovery = true;	/* force recovery even if SHUTDOWNED */
 
 			/*
@@ -652,23 +652,23 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
 				if (!ReadRecord(xlogprefetcher, LOG, false,
 								checkPoint.ThisTimeLineID))
 					ereport(FATAL,
-							(errmsg("could not find redo location %X/%X referenced by checkpoint record at %X/%X",
-									LSN_FORMAT_ARGS(checkPoint.redo), LSN_FORMAT_ARGS(CheckPointLoc)),
-							 errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n"
-									 "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
-									 "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
-									 DataDir, DataDir, DataDir, DataDir)));
+							errmsg("could not find redo location %X/%08X referenced by checkpoint record at %X/%08X",
+								   LSN_FORMAT_ARGS(checkPoint.redo), LSN_FORMAT_ARGS(CheckPointLoc)),
+							errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n"
+									"If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
+									"Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
+									DataDir, DataDir, DataDir, DataDir));
 			}
 		}
 		else
 		{
 			ereport(FATAL,
-					(errmsg("could not locate required checkpoint record at %X/%X",
-							LSN_FORMAT_ARGS(CheckPointLoc)),
-					 errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n"
-							 "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
-							 "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
-							 DataDir, DataDir, DataDir, DataDir)));
+					errmsg("could not locate required checkpoint record at %X/%08X",
+						   LSN_FORMAT_ARGS(CheckPointLoc)),
+					errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n"
+							"If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
+							"Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
+							DataDir, DataDir, DataDir, DataDir));
 			wasShutdown = false;	/* keep compiler quiet */
 		}
 
@@ -773,8 +773,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
 		 */
 		if (!XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
 			ereport(LOG,
-					(errmsg("restarting backup recovery with redo LSN %X/%X",
-							LSN_FORMAT_ARGS(ControlFile->backupStartPoint))));
+					errmsg("restarting backup recovery with redo LSN %X/%08X",
+						   LSN_FORMAT_ARGS(ControlFile->backupStartPoint)));
 
 		/* Get the last valid checkpoint record. */
 		CheckPointLoc = ControlFile->checkPoint;
@@ -786,8 +786,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
 		if (record != NULL)
 		{
 			ereport(DEBUG1,
-					(errmsg_internal("checkpoint record is at %X/%X",
-									 LSN_FORMAT_ARGS(CheckPointLoc))));
+					errmsg_internal("checkpoint record is at %X/%08X",
+									LSN_FORMAT_ARGS(CheckPointLoc)));
 		}
 		else
 		{
@@ -798,8 +798,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
 			 * simplify processing around checkpoints.
 			 */
 			ereport(PANIC,
-					(errmsg("could not locate a valid checkpoint record at %X/%X",
-							LSN_FORMAT_ARGS(CheckPointLoc))));
+					errmsg("could not locate a valid checkpoint record at %X/%08X",
+						   LSN_FORMAT_ARGS(CheckPointLoc)));
 		}
 		memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
 		wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
@@ -824,8 +824,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
 							recoveryTargetName)));
 		else if (recoveryTarget == RECOVERY_TARGET_LSN)
 			ereport(LOG,
-					(errmsg("starting point-in-time recovery to WAL location (LSN) \"%X/%X\"",
-							LSN_FORMAT_ARGS(recoveryTargetLSN))));
+					errmsg("starting point-in-time recovery to WAL location (LSN) \"%X/%08X\"",
+						   LSN_FORMAT_ARGS(recoveryTargetLSN)));
 		else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE)
 			ereport(LOG,
 					(errmsg("starting point-in-time recovery to earliest consistent point")));
@@ -855,7 +855,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
 				(errmsg("requested timeline %u is not a child of this server's history",
 						recoveryTargetTLI),
 		/* translator: %s is a backup_label file or a pg_control file */
-				 errdetail("Latest checkpoint in file \"%s\" is at %X/%X on timeline %u, but in the history of the requested timeline, the server forked off from that timeline at %X/%X.",
+				 errdetail("Latest checkpoint in file \"%s\" is at %X/%08X on timeline %u, but in the history of the requested timeline, the server forked off from that timeline at %X/%08X.",
 						   haveBackupLabel ? "backup_label" : "pg_control",
 						   LSN_FORMAT_ARGS(CheckPointLoc),
 						   CheckPointTLI,
@@ -870,15 +870,15 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
 		tliOfPointInHistory(ControlFile->minRecoveryPoint - 1, expectedTLEs) !=
 		ControlFile->minRecoveryPointTLI)
 		ereport(FATAL,
-				(errmsg("requested timeline %u does not contain minimum recovery point %X/%X on timeline %u",
-						recoveryTargetTLI,
-						LSN_FORMAT_ARGS(ControlFile->minRecoveryPoint),
-						ControlFile->minRecoveryPointTLI)));
+				errmsg("requested timeline %u does not contain minimum recovery point %X/%08X on timeline %u",
+					   recoveryTargetTLI,
+					   LSN_FORMAT_ARGS(ControlFile->minRecoveryPoint),
+					   ControlFile->minRecoveryPointTLI));
 
 	ereport(DEBUG1,
-			(errmsg_internal("redo record is at %X/%X; shutdown %s",
-							 LSN_FORMAT_ARGS(checkPoint.redo),
-							 wasShutdown ? "true" : "false")));
+			errmsg_internal("redo record is at %X/%08X; shutdown %s",
+							LSN_FORMAT_ARGS(checkPoint.redo),
+							wasShutdown ? "true" : "false"));
 	ereport(DEBUG1,
 			(errmsg_internal("next transaction ID: " UINT64_FORMAT "; next OID: %u",
 							 U64FromFullTransactionId(checkPoint.nextXid),
@@ -1253,14 +1253,14 @@ read_backup_label(XLogRecPtr *checkPointLoc, TimeLineID *backupLabelTLI,
 	 * is pretty crude, but we are not expecting any variability in the file
 	 * format).
 	 */
-	if (fscanf(lfp, "START WAL LOCATION: %X/%X (file %08X%16s)%c",
+	if (fscanf(lfp, "START WAL LOCATION: %X/%08X (file %08X%16s)%c",
 			   &hi, &lo, &tli_from_walseg, startxlogfilename, &ch) != 5 || ch != '\n')
 		ereport(FATAL,
 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 				 errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
 	RedoStartLSN = ((uint64) hi) << 32 | lo;
 	RedoStartTLI = tli_from_walseg;
-	if (fscanf(lfp, "CHECKPOINT LOCATION: %X/%X%c",
+	if (fscanf(lfp, "CHECKPOINT LOCATION: %X/%08X%c",
 			   &hi, &lo, &ch) != 3 || ch != '\n')
 		ereport(FATAL,
 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
@@ -1332,7 +1332,7 @@ read_backup_label(XLogRecPtr *checkPointLoc, TimeLineID *backupLabelTLI,
 								 tli_from_file, BACKUP_LABEL_FILE)));
 	}
 
-	if (fscanf(lfp, "INCREMENTAL FROM LSN: %X/%X\n", &hi, &lo) > 0)
+	if (fscanf(lfp, "INCREMENTAL FROM LSN: %X/%08X\n", &hi, &lo) > 0)
 		ereport(FATAL,
 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 				 errmsg("this is an incremental backup, not a data directory"),
@@ -1722,8 +1722,8 @@ PerformWalRecovery(void)
 		if (record->xl_rmid != RM_XLOG_ID ||
 			(record->xl_info & ~XLR_INFO_MASK) != XLOG_CHECKPOINT_REDO)
 			ereport(FATAL,
-					(errmsg("unexpected record type found at redo point %X/%X",
-							LSN_FORMAT_ARGS(xlogreader->ReadRecPtr))));
+					errmsg("unexpected record type found at redo point %X/%08X",
+						   LSN_FORMAT_ARGS(xlogreader->ReadRecPtr)));
 	}
 	else
 	{
@@ -1745,8 +1745,8 @@ PerformWalRecovery(void)
 		RmgrStartup();
 
 		ereport(LOG,
-				(errmsg("redo starts at %X/%X",
-						LSN_FORMAT_ARGS(xlogreader->ReadRecPtr))));
+				errmsg("redo starts at %X/%08X",
+					   LSN_FORMAT_ARGS(xlogreader->ReadRecPtr)));
 
 		/* Prepare to report progress of the redo phase. */
 		if (!StandbyMode)
@@ -1758,7 +1758,7 @@ PerformWalRecovery(void)
 		do
 		{
 			if (!StandbyMode)
-				ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X",
+				ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%08X",
 										 LSN_FORMAT_ARGS(xlogreader->ReadRecPtr));
 
 #ifdef WAL_DEBUG
@@ -1767,7 +1767,7 @@ PerformWalRecovery(void)
 				StringInfoData buf;
 
 				initStringInfo(&buf);
-				appendStringInfo(&buf, "REDO @ %X/%X; LSN %X/%X: ",
+				appendStringInfo(&buf, "REDO @ %X/%08X; LSN %X/%08X: ",
 								 LSN_FORMAT_ARGS(xlogreader->ReadRecPtr),
 								 LSN_FORMAT_ARGS(xlogreader->EndRecPtr));
 				xlog_outrec(&buf, xlogreader);
@@ -1880,9 +1880,9 @@ PerformWalRecovery(void)
 		RmgrCleanup();
 
 		ereport(LOG,
-				(errmsg("redo done at %X/%X system usage: %s",
-						LSN_FORMAT_ARGS(xlogreader->ReadRecPtr),
-						pg_rusage_show(&ru0))));
+				errmsg("redo done at %X/%08X system usage: %s",
+					   LSN_FORMAT_ARGS(xlogreader->ReadRecPtr),
+					   pg_rusage_show(&ru0)));
 		xtime = GetLatestXTime();
 		if (xtime)
 			ereport(LOG,
@@ -2092,7 +2092,7 @@ xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI)
 
 		memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_overwrite_contrecord));
 		if (xlrec.overwritten_lsn != record->overwrittenRecPtr)
-			elog(FATAL, "mismatching overwritten LSN %X/%X -> %X/%X",
+			elog(FATAL, "mismatching overwritten LSN %X/%08X -> %X/%08X",
 				 LSN_FORMAT_ARGS(xlrec.overwritten_lsn),
 				 LSN_FORMAT_ARGS(record->overwrittenRecPtr));
 
@@ -2101,9 +2101,9 @@ xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI)
 		missingContrecPtr = InvalidXLogRecPtr;
 
 		ereport(LOG,
-				(errmsg("successfully skipped missing contrecord at %X/%X, overwritten at %s",
-						LSN_FORMAT_ARGS(xlrec.overwritten_lsn),
-						timestamptz_to_str(xlrec.overwrite_time))));
+				errmsg("successfully skipped missing contrecord at %X/%08X, overwritten at %s",
+					   LSN_FORMAT_ARGS(xlrec.overwritten_lsn),
+					   timestamptz_to_str(xlrec.overwrite_time)));
 
 		/* Verifying the record should only happen once */
 		record->overwrittenRecPtr = InvalidXLogRecPtr;
@@ -2129,7 +2129,7 @@ xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI)
 			backupEndPoint = lsn;
 		}
 		else
-			elog(DEBUG1, "saw end-of-backup record for backup starting at %X/%X, waiting for %X/%X",
+			elog(DEBUG1, "saw end-of-backup record for backup starting at %X/%08X, waiting for %X/%08X",
 				 LSN_FORMAT_ARGS(startpoint), LSN_FORMAT_ARGS(backupStartPoint));
 	}
 }
@@ -2224,9 +2224,9 @@ CheckRecoveryConsistency(void)
 		backupEndRequired = false;
 
 		ereport(LOG,
-				(errmsg("completed backup recovery with redo LSN %X/%X and end LSN %X/%X",
-						LSN_FORMAT_ARGS(saveBackupStartPoint),
-						LSN_FORMAT_ARGS(saveBackupEndPoint))));
+				errmsg("completed backup recovery with redo LSN %X/%08X and end LSN %X/%08X",
+					   LSN_FORMAT_ARGS(saveBackupStartPoint),
+					   LSN_FORMAT_ARGS(saveBackupEndPoint)));
 	}
 
 	/*
@@ -2255,8 +2255,8 @@ CheckRecoveryConsistency(void)
 		reachedConsistency = true;
 		SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT);
 		ereport(LOG,
-				(errmsg("consistent recovery state reached at %X/%X",
-						LSN_FORMAT_ARGS(lastReplayedEndRecPtr))));
+				errmsg("consistent recovery state reached at %X/%08X",
+					   LSN_FORMAT_ARGS(lastReplayedEndRecPtr)));
 	}
 
 	/*
@@ -2293,7 +2293,7 @@ rm_redo_error_callback(void *arg)
 	xlog_block_info(&buf, record);
 
 	/* translator: %s is a WAL record description */
-	errcontext("WAL redo at %X/%X for %s",
+	errcontext("WAL redo at %X/%08X for %s",
 			   LSN_FORMAT_ARGS(record->ReadRecPtr),
 			   buf.data);
 
@@ -2328,7 +2328,7 @@ xlog_outdesc(StringInfo buf, XLogReaderState *record)
 static void
 xlog_outrec(StringInfo buf, XLogReaderState *record)
 {
-	appendStringInfo(buf, "prev %X/%X; xid %u",
+	appendStringInfo(buf, "prev %X/%08X; xid %u",
 					 LSN_FORMAT_ARGS(XLogRecGetPrev(record)),
 					 XLogRecGetXid(record));
 
@@ -2416,10 +2416,10 @@ checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI,
 		lsn < minRecoveryPoint &&
 		newTLI > minRecoveryPointTLI)
 		ereport(PANIC,
-				(errmsg("unexpected timeline ID %u in checkpoint record, before reaching minimum recovery point %X/%X on timeline %u",
-						newTLI,
-						LSN_FORMAT_ARGS(minRecoveryPoint),
-						minRecoveryPointTLI)));
+				errmsg("unexpected timeline ID %u in checkpoint record, before reaching minimum recovery point %X/%08X on timeline %u",
+					   newTLI,
+					   LSN_FORMAT_ARGS(minRecoveryPoint),
+					   minRecoveryPointTLI));
 
 	/* Looks good */
 }
@@ -2621,8 +2621,8 @@ recoveryStopsBefore(XLogReaderState *record)
 		recoveryStopTime = 0;
 		recoveryStopName[0] = '\0';
 		ereport(LOG,
-				(errmsg("recovery stopping before WAL location (LSN) \"%X/%X\"",
-						LSN_FORMAT_ARGS(recoveryStopLSN))));
+				errmsg("recovery stopping before WAL location (LSN) \"%X/%08X\"",
+					   LSN_FORMAT_ARGS(recoveryStopLSN)));
 		return true;
 	}
 
@@ -2789,8 +2789,8 @@ recoveryStopsAfter(XLogReaderState *record)
 		recoveryStopTime = 0;
 		recoveryStopName[0] = '\0';
 		ereport(LOG,
-				(errmsg("recovery stopping after WAL location (LSN) \"%X/%X\"",
-						LSN_FORMAT_ARGS(recoveryStopLSN))));
+				errmsg("recovery stopping after WAL location (LSN) \"%X/%08X\"",
+					   LSN_FORMAT_ARGS(recoveryStopLSN)));
 		return true;
 	}
 
@@ -2910,7 +2910,7 @@ getRecoveryStopReason(void)
 				 timestamptz_to_str(recoveryStopTime));
 	else if (recoveryTarget == RECOVERY_TARGET_LSN)
 		snprintf(reason, sizeof(reason),
-				 "%s LSN %X/%X\n",
+				 "%s LSN %X/%08X\n",
 				 recoveryStopAfter ? "after" : "before",
 				 LSN_FORMAT_ARGS(recoveryStopLSN));
 	else if (recoveryTarget == RECOVERY_TARGET_NAME)
@@ -3213,11 +3213,11 @@ ReadRecord(XLogPrefetcher *xlogprefetcher, int emode,
 			XLogFileName(fname, xlogreader->seg.ws_tli, segno,
 						 wal_segment_size);
 			ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
-					(errmsg("unexpected timeline ID %u in WAL segment %s, LSN %X/%X, offset %u",
-							xlogreader->latestPageTLI,
-							fname,
-							LSN_FORMAT_ARGS(xlogreader->latestPagePtr),
-							offset)));
+					errmsg("unexpected timeline ID %u in WAL segment %s, LSN %X/%08X, offset %u",
+						   xlogreader->latestPageTLI,
+						   fname,
+						   LSN_FORMAT_ARGS(xlogreader->latestPagePtr),
+						   offset));
 			record = NULL;
 		}
 
@@ -3429,14 +3429,14 @@ retry:
 			errno = save_errno;
 			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
 					(errcode_for_file_access(),
-					 errmsg("could not read from WAL segment %s, LSN %X/%X, offset %u: %m",
+					 errmsg("could not read from WAL segment %s, LSN %X/%08X, offset %u: %m",
 							fname, LSN_FORMAT_ARGS(targetPagePtr),
 							readOff)));
 		}
 		else
 			ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
 					(errcode(ERRCODE_DATA_CORRUPTED),
-					 errmsg("could not read from WAL segment %s, LSN %X/%X, offset %u: read %d of %zu",
+					 errmsg("could not read from WAL segment %s, LSN %X/%08X, offset %u: read %d of %zu",
 							fname, LSN_FORMAT_ARGS(targetPagePtr),
 							readOff, r, (Size) XLOG_BLCKSZ)));
 		goto next_record_is_invalid;
@@ -3718,7 +3718,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 						wait_time = wal_retrieve_retry_interval -
 							TimestampDifferenceMilliseconds(last_fail_time, now);
 
-						elog(LOG, "waiting for WAL to become available at %X/%X",
+						elog(LOG, "waiting for WAL to become available at %X/%08X",
 							 LSN_FORMAT_ARGS(RecPtr));
 
 						/* Do background tasks that might benefit us later. */
@@ -3864,7 +3864,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
 							tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
 
 							if (curFileTLI > 0 && tli < curFileTLI)
-								elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+								elog(ERROR, "according to history file, WAL location %X/%08X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
 									 LSN_FORMAT_ARGS(tliRecPtr),
 									 tli, curFileTLI);
 						}
@@ -4177,10 +4177,10 @@ rescanLatestTimeLine(TimeLineID replayTLI, XLogRecPtr replayLSN)
 	if (currentTle->end < replayLSN)
 	{
 		ereport(LOG,
-				(errmsg("new timeline %u forked off current database system timeline %u before current recovery point %X/%X",
-						newtarget,
-						replayTLI,
-						LSN_FORMAT_ARGS(replayLSN))));
+				errmsg("new timeline %u forked off current database system timeline %u before current recovery point %X/%08X",
+					   newtarget,
+					   replayTLI,
+					   LSN_FORMAT_ARGS(replayLSN)));
 		return false;
 	}
 
@@ -4994,13 +4994,25 @@ check_recovery_target_timeline(char **newval, void **extra, GucSource source)
 		rttg = RECOVERY_TARGET_TIMELINE_LATEST;
 	else
 	{
+		char	   *endp;
+		uint64		timeline;
+
 		rttg = RECOVERY_TARGET_TIMELINE_NUMERIC;
 
 		errno = 0;
-		strtoul(*newval, NULL, 0);
-		if (errno == EINVAL || errno == ERANGE)
+		timeline = strtou64(*newval, &endp, 0);
+
+		if (*endp != '\0' || errno == EINVAL || errno == ERANGE)
+		{
+			GUC_check_errdetail("\"%s\" is not a valid number.",
+								"recovery_target_timeline");
+			return false;
+		}
+
+		if (timeline < 1 || timeline > PG_UINT32_MAX)
 		{
-			GUC_check_errdetail("\"recovery_target_timeline\" is not a valid number.");
+			GUC_check_errdetail("\"%s\" must be between %u and %u.",
+								"recovery_target_timeline", 1, UINT_MAX);
 			return false;
 		}
 	}
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c
index c389b27f77d..27ea52fdfee 100644
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -795,7 +795,7 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage,
 
 		list_free_deep(timelineHistory);
 
-		elog(DEBUG3, "switched to timeline %u valid until %X/%X",
+		elog(DEBUG3, "switched to timeline %u valid until %X/%08X",
 			 state->currTLI,
 			 LSN_FORMAT_ARGS(state->currTLIValidUntil));
 	}
diff --git a/src/backend/backup/backup_manifest.c b/src/backend/backup/backup_manifest.c
index 22e2be37c95..d05252f383c 100644
--- a/src/backend/backup/backup_manifest.c
+++ b/src/backend/backup/backup_manifest.c
@@ -281,7 +281,7 @@ AddWALInfoToBackupManifest(backup_manifest_info *manifest, XLogRecPtr startptr,
 		}
 
 		AppendToManifest(manifest,
-						 "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }",
+						 "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%08X\", \"End-LSN\": \"%X/%08X\" }",
 						 first_wal_range ? "" : ",\n",
 						 entry->tli,
 						 LSN_FORMAT_ARGS(tl_beginptr),
diff --git a/src/backend/backup/basebackup_copy.c b/src/backend/backup/basebackup_copy.c
index a284ce318ff..18b0b5a52d3 100644
--- a/src/backend/backup/basebackup_copy.c
+++ b/src/backend/backup/basebackup_copy.c
@@ -361,7 +361,7 @@ SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli)
 	tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual);
 
 	/* Data row */
-	values[0] = CStringGetTextDatum(psprintf("%X/%X", LSN_FORMAT_ARGS(ptr)));
+	values[0] = CStringGetTextDatum(psprintf("%X/%08X", LSN_FORMAT_ARGS(ptr)));
 	values[1] = Int64GetDatum(tli);
 	do_tup_output(tstate, values, nulls);
 
diff --git a/src/backend/backup/basebackup_incremental.c b/src/backend/backup/basebackup_incremental.c
index 28491b1e0ab..a0d48ff0fef 100644
--- a/src/backend/backup/basebackup_incremental.c
+++ b/src/backend/backup/basebackup_incremental.c
@@ -409,7 +409,7 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
 			if (range->start_lsn < tlep[i]->begin)
 				ereport(ERROR,
 						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-						 errmsg("manifest requires WAL from initial timeline %u starting at %X/%X, but that timeline begins at %X/%X",
+						 errmsg("manifest requires WAL from initial timeline %u starting at %X/%08X, but that timeline begins at %X/%08X",
 								range->tli,
 								LSN_FORMAT_ARGS(range->start_lsn),
 								LSN_FORMAT_ARGS(tlep[i]->begin))));
@@ -419,7 +419,7 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
 			if (range->start_lsn != tlep[i]->begin)
 				ereport(ERROR,
 						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-						 errmsg("manifest requires WAL from continuation timeline %u starting at %X/%X, but that timeline begins at %X/%X",
+						 errmsg("manifest requires WAL from continuation timeline %u starting at %X/%08X, but that timeline begins at %X/%08X",
 								range->tli,
 								LSN_FORMAT_ARGS(range->start_lsn),
 								LSN_FORMAT_ARGS(tlep[i]->begin))));
@@ -430,7 +430,7 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
 			if (range->end_lsn > backup_state->startpoint)
 				ereport(ERROR,
 						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-						 errmsg("manifest requires WAL from final timeline %u ending at %X/%X, but this backup starts at %X/%X",
+						 errmsg("manifest requires WAL from final timeline %u ending at %X/%08X, but this backup starts at %X/%08X",
 								range->tli,
 								LSN_FORMAT_ARGS(range->end_lsn),
 								LSN_FORMAT_ARGS(backup_state->startpoint)),
@@ -441,7 +441,7 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
 			if (range->end_lsn != tlep[i]->end)
 				ereport(ERROR,
 						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-						 errmsg("manifest requires WAL from non-final timeline %u ending at %X/%X, but this server switched timelines at %X/%X",
+						 errmsg("manifest requires WAL from non-final timeline %u ending at %X/%08X, but this server switched timelines at %X/%08X",
 								range->tli,
 								LSN_FORMAT_ARGS(range->end_lsn),
 								LSN_FORMAT_ARGS(tlep[i]->end))));
@@ -522,18 +522,18 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib,
 			if (XLogRecPtrIsInvalid(tli_missing_lsn))
 				ereport(ERROR,
 						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-						 errmsg("WAL summaries are required on timeline %u from %X/%X to %X/%X, but no summaries for that timeline and LSN range exist",
+						 errmsg("WAL summaries are required on timeline %u from %X/%08X to %X/%08X, but no summaries for that timeline and LSN range exist",
 								tle->tli,
 								LSN_FORMAT_ARGS(tli_start_lsn),
 								LSN_FORMAT_ARGS(tli_end_lsn))));
 			else
 				ereport(ERROR,
 						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-						 errmsg("WAL summaries are required on timeline %u from %X/%X to %X/%X, but the summaries for that timeline and LSN range are incomplete",
+						 errmsg("WAL summaries are required on timeline %u from %X/%08X to %X/%08X, but the summaries for that timeline and LSN range are incomplete",
 								tle->tli,
 								LSN_FORMAT_ARGS(tli_start_lsn),
 								LSN_FORMAT_ARGS(tli_end_lsn)),
-						 errdetail("The first unsummarized LSN in this range is %X/%X.",
+						 errdetail("The first unsummarized LSN in this range is %X/%08X.",
 								   LSN_FORMAT_ARGS(tli_missing_lsn))));
 		}
 
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index 6db864892d0..fc8638c1b61 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -109,6 +109,8 @@ static const struct typinfo TypInfo[] = {
 	F_REGROLEIN, F_REGROLEOUT},
 	{"regnamespace", REGNAMESPACEOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
 	F_REGNAMESPACEIN, F_REGNAMESPACEOUT},
+	{"regdatabase", REGDATABASEOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
+	F_REGDATABASEIN, F_REGDATABASEOUT},
 	{"text", TEXTOID, 0, -1, false, TYPALIGN_INT, TYPSTORAGE_EXTENDED, DEFAULT_COLLATION_OID,
 	F_TEXTIN, F_TEXTOUT},
 	{"oid", OIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index 18316a3968b..7dded634eb8 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -1850,6 +1850,17 @@ find_expr_references_walker(Node *node,
 							 errmsg("constant of the type %s cannot be used here",
 									"regrole")));
 					break;
+
+					/*
+					 * Dependencies for regdatabase should be shared among all
+					 * databases, so explicitly inhibit to have dependencies.
+					 */
+				case REGDATABASEOID:
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("constant of the type %s cannot be used here",
+									"regdatabase")));
+					break;
 			}
 		}
 		return false;
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index fbaed5359ad..fd6537567ea 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -665,6 +665,15 @@ CheckAttributeType(const char *attname,
 	}
 
 	/*
+	 * For consistency with check_virtual_generated_security().
+	 */
+	if ((flags & CHKATYPE_IS_VIRTUAL) && atttypid >= FirstUnpinnedObjectId)
+		ereport(ERROR,
+				errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				errmsg("virtual generated column \"%s\" cannot have a user-defined type", attname),
+				errdetail("Virtual generated columns that make use of user-defined types are not yet supported."));
+
+	/*
 	 * This might not be strictly invalid per SQL standard, but it is pretty
 	 * useless, and it cannot be dumped, so we must disallow it.
 	 */
@@ -1100,6 +1109,7 @@ AddNewRelationType(const char *typeName,
  *		if false, relacl is always set NULL
  *	allow_system_table_mods: true to allow creation in system namespaces
  *	is_internal: is this a system-generated catalog?
+ *	relrewrite: link to original relation during a table rewrite
  *
  * Output parameters:
  *	typaddress: if not null, gets the object address of the new pg_type entry
@@ -2996,7 +3006,7 @@ AddRelationNotNullConstraints(Relation rel, List *constraints,
 				if (constr->is_no_inherit)
 					ereport(ERROR,
 							(errcode(ERRCODE_DATATYPE_MISMATCH),
-							 errmsg("cannot define not-null constraint on column \"%s\" with NO INHERIT",
+							 errmsg("cannot define not-null constraint with NO INHERIT on column \"%s\"",
 									strVal(linitial(constr->keys))),
 							 errdetail("The column has an inherited not-null constraint.")));
 
@@ -3215,6 +3225,86 @@ check_nested_generated(ParseState *pstate, Node *node)
 }
 
 /*
+ * Check security of virtual generated column expression.
+ *
+ * Just like selecting from a view is exploitable (CVE-2024-7348), selecting
+ * from a table with virtual generated columns is exploitable.  Users who are
+ * concerned about this can avoid selecting from views, but telling them to
+ * avoid selecting from tables is less practical.
+ *
+ * To address this, this restricts generation expressions for virtual
+ * generated columns are restricted to using built-in functions and types.  We
+ * assume that built-in functions and types cannot be exploited for this
+ * purpose.  Note the overall security also requires that all functions in use
+ * a immutable.  (For example, there are some built-in non-immutable functions
+ * that can run arbitrary SQL.)  The immutability is checked elsewhere, since
+ * that is a property that needs to hold independent of security
+ * considerations.
+ *
+ * In the future, this could be expanded by some new mechanism to declare
+ * other functions and types as safe or trusted for this purpose, but that is
+ * to be designed.
+ */
+
+/*
+ * Callback for check_functions_in_node() that determines whether a function
+ * is user-defined.
+ */
+static bool
+contains_user_functions_checker(Oid func_id, void *context)
+{
+	return (func_id >= FirstUnpinnedObjectId);
+}
+
+/*
+ * Checks for all the things we don't want in the generation expressions of
+ * virtual generated columns for security reasons.  Errors out if it finds
+ * one.
+ */
+static bool
+check_virtual_generated_security_walker(Node *node, void *context)
+{
+	ParseState *pstate = context;
+
+	if (node == NULL)
+		return false;
+
+	if (!IsA(node, List))
+	{
+		if (check_functions_in_node(node, contains_user_functions_checker, NULL))
+			ereport(ERROR,
+					errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					errmsg("generation expression uses user-defined function"),
+					errdetail("Virtual generated columns that make use of user-defined functions are not yet supported."),
+					parser_errposition(pstate, exprLocation(node)));
+
+		/*
+		 * check_functions_in_node() doesn't check some node types (see
+		 * comment there).  We handle CoerceToDomain and MinMaxExpr by
+		 * checking for built-in types.  The other listed node types cannot
+		 * call user-definable SQL-visible functions.
+		 *
+		 * We furthermore need this type check to handle built-in, immutable
+		 * polymorphic functions such as array_eq().
+		 */
+		if (exprType(node) >= FirstUnpinnedObjectId)
+			ereport(ERROR,
+					errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+					errmsg("generation expression uses user-defined type"),
+					errdetail("Virtual generated columns that make use of user-defined types are not yet supported."),
+					parser_errposition(pstate, exprLocation(node)));
+	}
+
+	return expression_tree_walker(node, check_virtual_generated_security_walker, context);
+}
+
+static void
+check_virtual_generated_security(ParseState *pstate, Node *node)
+{
+	check_virtual_generated_security_walker(node, pstate);
+}
+
+/*
  * Take a raw default and convert it to a cooked format ready for
  * storage.
  *
@@ -3253,6 +3343,10 @@ cookDefault(ParseState *pstate,
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
 					 errmsg("generation expression is not immutable")));
+
+		/* Check security of expressions for virtual generated column */
+		if (attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
+			check_virtual_generated_security(pstate, expr);
 	}
 	else
 	{
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 739a92bdcc1..c4029a4f3d3 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -800,11 +800,11 @@ index_create(Relation heapRelation,
 				 errmsg("user-defined indexes on system catalog tables are not supported")));
 
 	/*
-	 * Btree text_pattern_ops uses text_eq as the equality operator, which is
-	 * fine as long as the collation is deterministic; text_eq then reduces to
+	 * Btree text_pattern_ops uses texteq as the equality operator, which is
+	 * fine as long as the collation is deterministic; texteq then reduces to
 	 * bitwise equality and so it is semantically compatible with the other
 	 * operators and functions in that opclass.  But with a nondeterministic
-	 * collation, text_eq could yield results that are incompatible with the
+	 * collation, texteq could yield results that are incompatible with the
 	 * actual behavior of the index (which is determined by the opclass's
 	 * comparison function).  We prevent such problems by refusing creation of
 	 * an index with that opclass and a nondeterministic collation.
@@ -814,7 +814,7 @@ index_create(Relation heapRelation,
 	 * opclasses as incompatible with nondeterminism; but for now, this small
 	 * hack suffices.
 	 *
-	 * Another solution is to use a special operator, not text_eq, as the
+	 * Another solution is to use a special operator, not texteq, as the
 	 * equality opclass member; but that is undesirable because it would
 	 * prevent index usage in many queries that work fine today.
 	 */
@@ -3020,7 +3020,7 @@ index_build(Relation heapRelation,
 
 	/*
 	 * Determine worker process details for parallel CREATE INDEX.  Currently,
-	 * only btree and BRIN have support for parallel builds.
+	 * only btree, GIN, and BRIN have support for parallel builds.
 	 *
 	 * Note that planner considers parallel safety for us.
 	 */
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 15efb02badb..b2d5332effc 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -666,6 +666,14 @@ GRANT SELECT ON pg_shmem_allocations_numa TO pg_read_all_stats;
 REVOKE EXECUTE ON FUNCTION pg_get_shmem_allocations_numa() FROM PUBLIC;
 GRANT EXECUTE ON FUNCTION pg_get_shmem_allocations_numa() TO pg_read_all_stats;
 
+CREATE VIEW pg_dsm_registry_allocations AS
+    SELECT * FROM pg_get_dsm_registry_allocations();
+
+REVOKE ALL ON pg_dsm_registry_allocations FROM PUBLIC;
+GRANT SELECT ON pg_dsm_registry_allocations TO pg_read_all_stats;
+REVOKE EXECUTE ON FUNCTION pg_get_dsm_registry_allocations() FROM PUBLIC;
+GRANT EXECUTE ON FUNCTION pg_get_dsm_registry_allocations() TO pg_read_all_stats;
+
 CREATE VIEW pg_backend_memory_contexts AS
     SELECT * FROM pg_get_backend_memory_contexts();
 
@@ -674,11 +682,6 @@ GRANT SELECT ON pg_backend_memory_contexts TO pg_read_all_stats;
 REVOKE EXECUTE ON FUNCTION pg_get_backend_memory_contexts() FROM PUBLIC;
 GRANT EXECUTE ON FUNCTION pg_get_backend_memory_contexts() TO pg_read_all_stats;
 
-REVOKE EXECUTE ON FUNCTION
-	pg_get_process_memory_contexts(integer, boolean, float) FROM PUBLIC;
-GRANT EXECUTE ON FUNCTION
-	pg_get_process_memory_contexts(integer, boolean, float) TO pg_read_all_stats;
-
 -- Statistics views
 
 CREATE VIEW pg_stat_all_tables AS
@@ -900,7 +903,7 @@ CREATE VIEW pg_stat_activity AS
             S.wait_event,
             S.state,
             S.backend_xid,
-            s.backend_xmin,
+            S.backend_xmin,
             S.query_id,
             S.query,
             S.backend_type
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 4fffb76e557..7111d5d5334 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -76,7 +76,7 @@ static BufferAccessStrategy vac_strategy;
 
 
 static void do_analyze_rel(Relation onerel,
-						   VacuumParams *params, List *va_cols,
+						   const VacuumParams params, List *va_cols,
 						   AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
 						   bool inh, bool in_outer_xact, int elevel);
 static void compute_index_stats(Relation onerel, double totalrows,
@@ -107,7 +107,7 @@ static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
  */
 void
 analyze_rel(Oid relid, RangeVar *relation,
-			VacuumParams *params, List *va_cols, bool in_outer_xact,
+			const VacuumParams params, List *va_cols, bool in_outer_xact,
 			BufferAccessStrategy bstrategy)
 {
 	Relation	onerel;
@@ -116,7 +116,7 @@ analyze_rel(Oid relid, RangeVar *relation,
 	BlockNumber relpages = 0;
 
 	/* Select logging level */
-	if (params->options & VACOPT_VERBOSE)
+	if (params.options & VACOPT_VERBOSE)
 		elevel = INFO;
 	else
 		elevel = DEBUG2;
@@ -138,8 +138,8 @@ analyze_rel(Oid relid, RangeVar *relation,
 	 *
 	 * Make sure to generate only logs for ANALYZE in this case.
 	 */
-	onerel = vacuum_open_relation(relid, relation, params->options & ~(VACOPT_VACUUM),
-								  params->log_min_duration >= 0,
+	onerel = vacuum_open_relation(relid, relation, params.options & ~(VACOPT_VACUUM),
+								  params.log_min_duration >= 0,
 								  ShareUpdateExclusiveLock);
 
 	/* leave if relation could not be opened or locked */
@@ -155,7 +155,7 @@ analyze_rel(Oid relid, RangeVar *relation,
 	 */
 	if (!vacuum_is_permitted_for_relation(RelationGetRelid(onerel),
 										  onerel->rd_rel,
-										  params->options & ~VACOPT_VACUUM))
+										  params.options & ~VACOPT_VACUUM))
 	{
 		relation_close(onerel, ShareUpdateExclusiveLock);
 		return;
@@ -227,7 +227,7 @@ analyze_rel(Oid relid, RangeVar *relation,
 	else
 	{
 		/* No need for a WARNING if we already complained during VACUUM */
-		if (!(params->options & VACOPT_VACUUM))
+		if (!(params.options & VACOPT_VACUUM))
 			ereport(WARNING,
 					(errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables",
 							RelationGetRelationName(onerel))));
@@ -275,7 +275,7 @@ analyze_rel(Oid relid, RangeVar *relation,
  * appropriate acquirefunc for each child table.
  */
 static void
-do_analyze_rel(Relation onerel, VacuumParams *params,
+do_analyze_rel(Relation onerel, const VacuumParams params,
 			   List *va_cols, AcquireSampleRowsFunc acquirefunc,
 			   BlockNumber relpages, bool inh, bool in_outer_xact,
 			   int elevel)
@@ -309,9 +309,9 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 	PgStat_Counter startreadtime = 0;
 	PgStat_Counter startwritetime = 0;
 
-	verbose = (params->options & VACOPT_VERBOSE) != 0;
+	verbose = (params.options & VACOPT_VERBOSE) != 0;
 	instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
-							  params->log_min_duration >= 0));
+							  params.log_min_duration >= 0));
 	if (inh)
 		ereport(elevel,
 				(errmsg("analyzing \"%s.%s\" inheritance tree",
@@ -706,7 +706,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 	 * amvacuumcleanup() when called in ANALYZE-only mode.  The only exception
 	 * among core index AMs is GIN/ginvacuumcleanup().
 	 */
-	if (!(params->options & VACOPT_VACUUM))
+	if (!(params.options & VACOPT_VACUUM))
 	{
 		for (ind = 0; ind < nindexes; ind++)
 		{
@@ -736,9 +736,9 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 	{
 		TimestampTz endtime = GetCurrentTimestamp();
 
-		if (verbose || params->log_min_duration == 0 ||
+		if (verbose || params.log_min_duration == 0 ||
 			TimestampDifferenceExceeds(starttime, endtime,
-									   params->log_min_duration))
+									   params.log_min_duration))
 		{
 			long		delay_in_ms;
 			WalUsage	walusage;
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 54a08e4102e..b55221d44cd 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -917,7 +917,7 @@ copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex, bool verb
 	 * not to be aggressive about this.
 	 */
 	memset(&params, 0, sizeof(VacuumParams));
-	vacuum_get_cutoffs(OldHeap, &params, &cutoffs);
+	vacuum_get_cutoffs(OldHeap, params, &cutoffs);
 
 	/*
 	 * FreezeXid will become the table's new relfrozenxid, and that mustn't go
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 74ae42b19a7..fae9c41db65 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -322,11 +322,13 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 }
 
 /*
- * Extract a CopyHeaderChoice value from a DefElem.  This is like
- * defGetBoolean() but also accepts the special value "match".
+ * Extract the CopyFormatOptions.header_line value from a DefElem.
+ *
+ * Parses the HEADER option for COPY, which can be a boolean, a non-negative
+ * integer (number of lines to skip), or the special value "match".
  */
-static CopyHeaderChoice
-defGetCopyHeaderChoice(DefElem *def, bool is_from)
+static int
+defGetCopyHeaderOption(DefElem *def, bool is_from)
 {
 	/*
 	 * If no parameter value given, assume "true" is meant.
@@ -335,20 +337,27 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
 		return COPY_HEADER_TRUE;
 
 	/*
-	 * Allow 0, 1, "true", "false", "on", "off", or "match".
+	 * Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or
+	 * "match".
 	 */
 	switch (nodeTag(def->arg))
 	{
 		case T_Integer:
-			switch (intVal(def->arg))
 			{
-				case 0:
-					return COPY_HEADER_FALSE;
-				case 1:
-					return COPY_HEADER_TRUE;
-				default:
-					/* otherwise, error out below */
-					break;
+				int			ival = intVal(def->arg);
+
+				if (ival < 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+							 errmsg("a negative integer value cannot be "
+									"specified for %s", def->defname)));
+
+				if (!is_from && ival > 1)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("cannot use multi-line header in COPY TO")));
+
+				return ival;
 			}
 			break;
 		default:
@@ -381,7 +390,8 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
 	}
 	ereport(ERROR,
 			(errcode(ERRCODE_SYNTAX_ERROR),
-			 errmsg("%s requires a Boolean value or \"match\"",
+			 errmsg("%s requires a Boolean value, a non-negative integer, "
+					"or the string \"match\"",
 					def->defname)));
 	return COPY_HEADER_FALSE;	/* keep compiler quiet */
 }
@@ -566,7 +576,7 @@ ProcessCopyOptions(ParseState *pstate,
 			if (header_specified)
 				errorConflictingDefElem(defel, pstate);
 			header_specified = true;
-			opts_out->header_line = defGetCopyHeaderChoice(defel, is_from);
+			opts_out->header_line = defGetCopyHeaderOption(defel, is_from);
 		}
 		else if (strcmp(defel->defname, "quote") == 0)
 		{
@@ -769,7 +779,7 @@ ProcessCopyOptions(ParseState *pstate,
 				 errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
 
 	/* Check header */
-	if (opts_out->binary && opts_out->header_line)
+	if (opts_out->binary && opts_out->header_line != COPY_HEADER_FALSE)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index f5fc346e201..b1ae97b833d 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -771,21 +771,30 @@ static pg_attribute_always_inline bool
 NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
 {
 	int			fldct;
-	bool		done;
+	bool		done = false;
 
 	/* only available for text or csv input */
 	Assert(!cstate->opts.binary);
 
 	/* on input check that the header line is correct if needed */
-	if (cstate->cur_lineno == 0 && cstate->opts.header_line)
+	if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
 	{
 		ListCell   *cur;
 		TupleDesc	tupDesc;
+		int			lines_to_skip = cstate->opts.header_line;
+
+		/* If set to "match", one header line is skipped */
+		if (cstate->opts.header_line == COPY_HEADER_MATCH)
+			lines_to_skip = 1;
 
 		tupDesc = RelationGetDescr(cstate->rel);
 
-		cstate->cur_lineno++;
-		done = CopyReadLine(cstate, is_csv);
+		for (int i = 0; i < lines_to_skip; i++)
+		{
+			cstate->cur_lineno++;
+			if ((done = CopyReadLine(cstate, is_csv)))
+				break;
+		}
 
 		if (cstate->opts.header_line == COPY_HEADER_MATCH)
 		{
@@ -1538,7 +1547,7 @@ GetDecimalFromHex(char hex)
 	if (isdigit((unsigned char) hex))
 		return hex - '0';
 	else
-		return tolower((unsigned char) hex) - 'a' + 10;
+		return pg_ascii_tolower((unsigned char) hex) - 'a' + 10;
 }
 
 /*
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index ea6f18f2c80..67b94b91cae 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
 														  cstate->file_encoding);
 
 	/* if a header has been requested send the line */
-	if (cstate->opts.header_line)
+	if (cstate->opts.header_line == COPY_HEADER_TRUE)
 	{
 		ListCell   *cur;
 		bool		hdr_delim = false;
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 5fbbcdaabb1..502a45163c8 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -570,8 +570,8 @@ CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid,
 	 * any CREATE DATABASE commands.
 	 */
 	if (!IsBinaryUpgrade)
-		RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE |
-						  CHECKPOINT_WAIT | CHECKPOINT_FLUSH_ALL);
+		RequestCheckpoint(CHECKPOINT_FAST | CHECKPOINT_FORCE |
+						  CHECKPOINT_WAIT | CHECKPOINT_FLUSH_UNLOGGED);
 
 	/*
 	 * Iterate through all tablespaces of the template database, and copy each
@@ -673,7 +673,7 @@ CreateDatabaseUsingFileCopy(Oid src_dboid, Oid dst_dboid, Oid src_tsid,
 	 * strategy that avoids these problems.
 	 */
 	if (!IsBinaryUpgrade)
-		RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE |
+		RequestCheckpoint(CHECKPOINT_FAST | CHECKPOINT_FORCE |
 						  CHECKPOINT_WAIT);
 }
 
@@ -1065,16 +1065,41 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 
 	/* Check that the chosen locales are valid, and get canonical spellings */
 	if (!check_locale(LC_COLLATE, dbcollate, &canonname))
-		ereport(ERROR,
-				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-				 errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate),
-				 errhint("If the locale name is specific to ICU, use ICU_LOCALE.")));
+	{
+		if (dblocprovider == COLLPROVIDER_BUILTIN)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate),
+					 errhint("If the locale name is specific to the builtin provider, use BUILTIN_LOCALE.")));
+		else if (dblocprovider == COLLPROVIDER_ICU)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate),
+					 errhint("If the locale name is specific to the ICU provider, use ICU_LOCALE.")));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate)));
+	}
 	dbcollate = canonname;
 	if (!check_locale(LC_CTYPE, dbctype, &canonname))
-		ereport(ERROR,
-				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-				 errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype),
-				 errhint("If the locale name is specific to ICU, use ICU_LOCALE.")));
+	{
+		if (dblocprovider == COLLPROVIDER_BUILTIN)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype),
+					 errhint("If the locale name is specific to the builtin provider, use BUILTIN_LOCALE.")));
+		else if (dblocprovider == COLLPROVIDER_ICU)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype),
+					 errhint("If the locale name is specific to the ICU provider, use ICU_LOCALE.")));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype)));
+	}
+
 	dbctype = canonname;
 
 	check_encoding_locale_matches(encoding, dbcollate, dbctype);
@@ -1845,7 +1870,7 @@ dropdb(const char *dbname, bool missing_ok, bool force)
 	 * Force a checkpoint to make sure the checkpointer has received the
 	 * message sent by ForgetDatabaseSyncRequests.
 	 */
-	RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
+	RequestCheckpoint(CHECKPOINT_FAST | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
 
 	/* Close all smgr fds in all backends. */
 	WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
@@ -2095,8 +2120,8 @@ movedb(const char *dbname, const char *tblspcname)
 	 * On Windows, this also ensures that background procs don't hold any open
 	 * files, which would cause rmdir() to fail.
 	 */
-	RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT
-					  | CHECKPOINT_FLUSH_ALL);
+	RequestCheckpoint(CHECKPOINT_FAST | CHECKPOINT_FORCE | CHECKPOINT_WAIT
+					  | CHECKPOINT_FLUSH_UNLOGGED);
 
 	/* Close all smgr fds in all backends. */
 	WaitForProcSignalBarrier(EmitProcSignalBarrier(PROCSIGNAL_BARRIER_SMGRRELEASE));
@@ -2227,7 +2252,7 @@ movedb(const char *dbname, const char *tblspcname)
 		 * any unlogged operations done in the new DB tablespace before the
 		 * next checkpoint.
 		 */
-		RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
+		RequestCheckpoint(CHECKPOINT_FAST | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
 
 		/*
 		 * Force synchronous commit, thus minimizing the window between
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index bfa83fbc3fe..7e2792ead71 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -811,14 +811,10 @@ ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc)
 	 * the queryid in any of the EXPLAIN plans to keep stable the results
 	 * generated by regression test suites.
 	 */
-	if (es->verbose && queryDesc->plannedstmt->queryId != UINT64CONST(0) &&
+	if (es->verbose && queryDesc->plannedstmt->queryId != INT64CONST(0) &&
 		compute_query_id != COMPUTE_QUERY_ID_REGRESS)
 	{
-		/*
-		 * Output the queryid as an int64 rather than a uint64 so we match
-		 * what would be seen in the BIGINT pg_stat_statements.queryid column.
-		 */
-		ExplainPropertyInteger("Query Identifier", NULL, (int64)
+		ExplainPropertyInteger("Query Identifier", NULL,
 							   queryDesc->plannedstmt->queryId, es);
 	}
 }
diff --git a/src/backend/commands/foreigncmds.c b/src/backend/commands/foreigncmds.c
index c14e038d54f..8d2d7431544 100644
--- a/src/backend/commands/foreigncmds.c
+++ b/src/backend/commands/foreigncmds.c
@@ -71,15 +71,26 @@ optionListToArray(List *options)
 	foreach(cell, options)
 	{
 		DefElem    *def = lfirst(cell);
+		const char *name;
 		const char *value;
 		Size		len;
 		text	   *t;
 
+		name = def->defname;
 		value = defGetString(def);
-		len = VARHDRSZ + strlen(def->defname) + 1 + strlen(value);
+
+		/* Insist that name not contain "=", else "a=b=c" is ambiguous */
+		if (strchr(name, '=') != NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("invalid option name \"%s\": must not contain \"=\"",
+							name)));
+
+		len = VARHDRSZ + strlen(name) + 1 + strlen(value);
+		/* +1 leaves room for sprintf's trailing null */
 		t = palloc(len + 1);
 		SET_VARSIZE(t, len);
-		sprintf(VARDATA(t), "%s=%s", def->defname, value);
+		sprintf(VARDATA(t), "%s=%s", name, value);
 
 		astate = accumArrayResult(astate, PointerGetDatum(t),
 								  false, TEXTOID,
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index d962fe392cd..6f753ab6d7a 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -2469,8 +2469,8 @@ GetOperatorFromCompareType(Oid opclass, Oid rhstype, CompareType cmptype,
 					cmptype == COMPARE_EQ ? errmsg("could not identify an equality operator for type %s", format_type_be(opcintype)) :
 					cmptype == COMPARE_OVERLAP ? errmsg("could not identify an overlaps operator for type %s", format_type_be(opcintype)) :
 					cmptype == COMPARE_CONTAINED_BY ? errmsg("could not identify a contained-by operator for type %s", format_type_be(opcintype)) : 0,
-					errdetail("Could not translate compare type %d for operator family \"%s\", input type %s, access method \"%s\".",
-							  cmptype, get_opfamily_name(opfamily, false), format_type_be(opcintype), get_am_name(amid)));
+					errdetail("Could not translate compare type %d for operator family \"%s\" of access method \"%s\".",
+							  cmptype, get_opfamily_name(opfamily, false), get_am_name(amid)));
 
 		/*
 		 * We parameterize rhstype so foreign keys can ask for a <@ operator
@@ -2592,7 +2592,9 @@ makeObjectName(const char *name1, const char *name2, const char *label)
  * constraint names.)
  *
  * Note: it is theoretically possible to get a collision anyway, if someone
- * else chooses the same name concurrently.  This is fairly unlikely to be
+ * else chooses the same name concurrently.  We shorten the race condition
+ * window by checking for conflicting relations using SnapshotDirty, but
+ * that doesn't close the window entirely.  This is fairly unlikely to be
  * a problem in practice, especially if one is holding an exclusive lock on
  * the relation identified by name1.  However, if choosing multiple names
  * within a single command, you'd better create the new object and do
@@ -2608,15 +2610,45 @@ ChooseRelationName(const char *name1, const char *name2,
 	int			pass = 0;
 	char	   *relname = NULL;
 	char		modlabel[NAMEDATALEN];
+	SnapshotData SnapshotDirty;
+	Relation	pgclassrel;
+
+	/* prepare to search pg_class with a dirty snapshot */
+	InitDirtySnapshot(SnapshotDirty);
+	pgclassrel = table_open(RelationRelationId, AccessShareLock);
 
 	/* try the unmodified label first */
 	strlcpy(modlabel, label, sizeof(modlabel));
 
 	for (;;)
 	{
+		ScanKeyData key[2];
+		SysScanDesc scan;
+		bool		collides;
+
 		relname = makeObjectName(name1, name2, modlabel);
 
-		if (!OidIsValid(get_relname_relid(relname, namespaceid)))
+		/* is there any conflicting relation name? */
+		ScanKeyInit(&key[0],
+					Anum_pg_class_relname,
+					BTEqualStrategyNumber, F_NAMEEQ,
+					CStringGetDatum(relname));
+		ScanKeyInit(&key[1],
+					Anum_pg_class_relnamespace,
+					BTEqualStrategyNumber, F_OIDEQ,
+					ObjectIdGetDatum(namespaceid));
+
+		scan = systable_beginscan(pgclassrel, ClassNameNspIndexId,
+								  true /* indexOK */ ,
+								  &SnapshotDirty,
+								  2, key);
+
+		collides = HeapTupleIsValid(systable_getnext(scan));
+
+		systable_endscan(scan);
+
+		/* break out of loop if no conflict */
+		if (!collides)
 		{
 			if (!isconstraint ||
 				!ConstraintNameExists(relname, namespaceid))
@@ -2628,6 +2660,8 @@ ChooseRelationName(const char *name1, const char *name2,
 		snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
 	}
 
+	table_close(pgclassrel, AccessShareLock);
+
 	return relname;
 }
 
@@ -4226,7 +4260,7 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein
 									 false);
 
 		/*
-		 * Updating pg_index might involve TOAST table access, so ensure we
+		 * Swapping the indexes might involve TOAST table access, so ensure we
 		 * have a valid snapshot.
 		 */
 		PushActiveSnapshot(GetTransactionSnapshot());
diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c
index 27c2cb26ef5..188e26f0e6e 100644
--- a/src/backend/commands/matview.c
+++ b/src/backend/commands/matview.c
@@ -835,7 +835,8 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,
 	if (!foundUniqueIndex)
 		ereport(ERROR,
 				errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				errmsg("could not find suitable unique index on materialized view"));
+				errmsg("could not find suitable unique index on materialized view \"%s\"",
+					   RelationGetRelationName(matviewRel)));
 
 	appendStringInfoString(&querybuf,
 						   " AND newdata.* OPERATOR(pg_catalog.*=) mv.*) "
diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 0b23d94c38e..1bf7eaae5b3 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -2130,8 +2130,8 @@ defGetGeneratedColsOption(DefElem *def)
 
 	ereport(ERROR,
 			errcode(ERRCODE_SYNTAX_ERROR),
-			errmsg("%s requires a \"none\" or \"stored\" value",
-				   def->defname));
+			errmsg("invalid value for publication parameter \"%s\": \"%s\"", def->defname, sval),
+			errdetail("Valid values are \"%s\" and \"%s\".", "none", "stored"));
 
 	return PUBLISH_GENCOLS_NONE;	/* keep compiler quiet */
 }
diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c
index 4aec73bcc6b..e23b0de7242 100644
--- a/src/backend/commands/subscriptioncmds.c
+++ b/src/backend/commands/subscriptioncmds.c
@@ -1267,7 +1267,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt,
 						IsSet(opts.specified_opts, SUBOPT_SLOT_NAME))
 						ereport(ERROR,
 								(errcode(ERRCODE_SYNTAX_ERROR),
-								 errmsg("slot_name and two_phase cannot be altered at the same time")));
+								 errmsg("\"slot_name\" and \"two_phase\" cannot be altered at the same time")));
 
 					/*
 					 * Note that workers may still survive even if the
@@ -1283,7 +1283,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt,
 					if (logicalrep_workers_find(subid, true, true))
 						ereport(ERROR,
 								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-								 errmsg("cannot alter two_phase when logical replication worker is still running"),
+								 errmsg("cannot alter \"two_phase\" when logical replication worker is still running"),
 								 errhint("Try again after some time.")));
 
 					/*
@@ -1297,7 +1297,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt,
 						LookupGXactBySubid(subid))
 						ereport(ERROR,
 								(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-								 errmsg("cannot disable two_phase when prepared transactions are present"),
+								 errmsg("cannot disable \"two_phase\" when prepared transactions exist"),
 								 errhint("Resolve these transactions and try again.")));
 
 					/* Change system catalog accordingly */
@@ -1539,7 +1539,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt,
 					if (!XLogRecPtrIsInvalid(remote_lsn) && opts.lsn < remote_lsn)
 						ereport(ERROR,
 								(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-								 errmsg("skip WAL location (LSN %X/%X) must be greater than origin LSN %X/%X",
+								 errmsg("skip WAL location (LSN %X/%08X) must be greater than origin LSN %X/%08X",
 										LSN_FORMAT_ARGS(opts.lsn),
 										LSN_FORMAT_ARGS(remote_lsn))));
 				}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 54ad38247aa..cb811520c29 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -430,8 +430,8 @@ static void AlterConstrUpdateConstraintEntry(ATAlterConstraint *cmdcon, Relation
 static ObjectAddress ATExecValidateConstraint(List **wqueue,
 											  Relation rel, char *constrName,
 											  bool recurse, bool recursing, LOCKMODE lockmode);
-static void QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
-										HeapTuple contuple, LOCKMODE lockmode);
+static void QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation fkrel,
+										Oid pkrelid, HeapTuple contuple, LOCKMODE lockmode);
 static void QueueCheckConstraintValidation(List **wqueue, Relation conrel, Relation rel,
 										   char *constrName, HeapTuple contuple,
 										   bool recurse, bool recursing, LOCKMODE lockmode);
@@ -2711,8 +2711,7 @@ MergeAttributes(List *columns, const List *supers, char relpersistence,
 							RelationGetRelationName(relation))));
 
 		/* If existing rel is temp, it must belong to this session */
-		if (relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
-			!relation->rd_islocaltemp)
+		if (RELATION_IS_OTHER_TEMP(relation))
 			ereport(ERROR,
 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 					 errmsg(!is_partition
@@ -7374,7 +7373,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
 	/* make sure datatype is legal for a column */
 	CheckAttributeType(NameStr(attribute->attname), attribute->atttypid, attribute->attcollation,
 					   list_make1_oid(rel->rd_rel->reltype),
-					   0);
+					   (attribute->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL ? CHKATYPE_IS_VIRTUAL : 0));
 
 	InsertPgAttributeTuples(attrdesc, tupdesc, myrelid, NULL, NULL);
 
@@ -8609,7 +8608,7 @@ ATExecSetExpression(AlteredTableInfo *tab, Relation rel, const char *colName,
 		rel->rd_att->constr && rel->rd_att->constr->num_check > 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns on tables with check constraints"),
+				 errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns in tables with check constraints"),
 				 errdetail("Column \"%s\" of relation \"%s\" is a virtual generated column.",
 						   colName, RelationGetRelationName(rel))));
 
@@ -8627,7 +8626,7 @@ ATExecSetExpression(AlteredTableInfo *tab, Relation rel, const char *colName,
 		GetRelationPublications(RelationGetRelid(rel)) != NIL)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns on tables that are part of a publication"),
+				 errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns in tables that are part of a publication"),
 				 errdetail("Column \"%s\" of relation \"%s\" is a virtual generated column.",
 						   colName, RelationGetRelationName(rel))));
 
@@ -10189,7 +10188,7 @@ ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
 	if (pk_has_without_overlaps && !with_period)
 		ereport(ERROR,
 				errcode(ERRCODE_INVALID_FOREIGN_KEY),
-				errmsg("foreign key must use PERIOD when referencing a primary using WITHOUT OVERLAPS"));
+				errmsg("foreign key must use PERIOD when referencing a primary key using WITHOUT OVERLAPS"));
 
 	/*
 	 * Now we can check permissions.
@@ -10330,8 +10329,8 @@ ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
 					for_overlaps
 					? errmsg("could not identify an overlaps operator for foreign key")
 					: errmsg("could not identify an equality operator for foreign key"),
-					errdetail("Could not translate compare type %d for operator family \"%s\", input type %s, access method \"%s\".",
-							  cmptype, get_opfamily_name(opfamily, false), format_type_be(opcintype), get_am_name(amid)));
+					errdetail("Could not translate compare type %d for operator family \"%s\" of access method \"%s\".",
+							  cmptype, get_opfamily_name(opfamily, false), get_am_name(amid)));
 
 		/*
 		 * There had better be a primary equality operator for the index.
@@ -11858,6 +11857,7 @@ AttachPartitionForeignKey(List **wqueue,
 	if (queueValidation)
 	{
 		Relation	conrel;
+		Oid			confrelid;
 
 		conrel = table_open(ConstraintRelationId, RowExclusiveLock);
 
@@ -11865,9 +11865,11 @@ AttachPartitionForeignKey(List **wqueue,
 		if (!HeapTupleIsValid(partcontup))
 			elog(ERROR, "cache lookup failed for constraint %u", partConstrOid);
 
+		confrelid = ((Form_pg_constraint) GETSTRUCT(partcontup))->confrelid;
+
 		/* Use the same lock as for AT_ValidateConstraint */
-		QueueFKConstraintValidation(wqueue, conrel, partition, partcontup,
-									ShareUpdateExclusiveLock);
+		QueueFKConstraintValidation(wqueue, conrel, partition, confrelid,
+									partcontup, ShareUpdateExclusiveLock);
 		ReleaseSysCache(partcontup);
 		table_close(conrel, RowExclusiveLock);
 	}
@@ -12463,9 +12465,12 @@ ATExecAlterConstrEnforceability(List **wqueue, ATAlterConstraint *cmdcon,
 
 		/*
 		 * Tell Phase 3 to check that the constraint is satisfied by existing
-		 * rows.
+		 * rows.  Only applies to leaf partitions, and (for constraints that
+		 * reference a partitioned table) only if this is not one of the
+		 * pg_constraint rows that exist solely to support action triggers.
 		 */
-		if (rel->rd_rel->relkind == RELKIND_RELATION)
+		if (rel->rd_rel->relkind == RELKIND_RELATION &&
+			currcon->confrelid == pkrelid)
 		{
 			AlteredTableInfo *tab;
 			NewConstraint *newcon;
@@ -12907,8 +12912,9 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
 		con->contype != CONSTRAINT_NOTNULL)
 		ereport(ERROR,
 				errcode(ERRCODE_WRONG_OBJECT_TYPE),
-				errmsg("constraint \"%s\" of relation \"%s\" is not a foreign key, check, or not-null constraint",
-					   constrName, RelationGetRelationName(rel)));
+				errmsg("cannot validate constraint \"%s\" of relation \"%s\"",
+					   constrName, RelationGetRelationName(rel)),
+				errdetail("This operation is not supported for this type of constraint."));
 
 	if (!con->conenforced)
 		ereport(ERROR,
@@ -12919,7 +12925,8 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
 	{
 		if (con->contype == CONSTRAINT_FOREIGN)
 		{
-			QueueFKConstraintValidation(wqueue, conrel, rel, tuple, lockmode);
+			QueueFKConstraintValidation(wqueue, conrel, rel, con->confrelid,
+										tuple, lockmode);
 		}
 		else if (con->contype == CONSTRAINT_CHECK)
 		{
@@ -12952,8 +12959,8 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
  * for the specified relation and all its children.
  */
 static void
-QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
-							HeapTuple contuple, LOCKMODE lockmode)
+QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation fkrel,
+							Oid pkrelid, HeapTuple contuple, LOCKMODE lockmode)
 {
 	Form_pg_constraint con;
 	AlteredTableInfo *tab;
@@ -12964,7 +12971,17 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
 	Assert(con->contype == CONSTRAINT_FOREIGN);
 	Assert(!con->convalidated);
 
-	if (rel->rd_rel->relkind == RELKIND_RELATION)
+	/*
+	 * Add the validation to phase 3's queue; not needed for partitioned
+	 * tables themselves, only for their partitions.
+	 *
+	 * When the referenced table (pkrelid) is partitioned, the referencing
+	 * table (fkrel) has one pg_constraint row pointing to each partition
+	 * thereof.  These rows are there only to support action triggers and no
+	 * table scan is needed, therefore skip this for them as well.
+	 */
+	if (fkrel->rd_rel->relkind == RELKIND_RELATION &&
+		con->confrelid == pkrelid)
 	{
 		NewConstraint *newcon;
 		Constraint *fkconstraint;
@@ -12983,15 +13000,16 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
 		newcon->qual = (Node *) fkconstraint;
 
 		/* Find or create work queue entry for this table */
-		tab = ATGetQueueEntry(wqueue, rel);
+		tab = ATGetQueueEntry(wqueue, fkrel);
 		tab->constraints = lappend(tab->constraints, newcon);
 	}
 
 	/*
 	 * If the table at either end of the constraint is partitioned, we need to
-	 * recurse and handle every constraint that is a child of this constraint.
+	 * recurse and handle every unvalidate constraint that is a child of this
+	 * constraint.
 	 */
-	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
+	if (fkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
 		get_rel_relkind(con->confrelid) == RELKIND_PARTITIONED_TABLE)
 	{
 		ScanKeyData pkey;
@@ -13023,8 +13041,12 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
 
 			childrel = table_open(childcon->conrelid, lockmode);
 
-			QueueFKConstraintValidation(wqueue, conrel, childrel, childtup,
-										lockmode);
+			/*
+			 * NB: Note that pkrelid should be passed as-is during recursion,
+			 * as it is required to identify the root referenced table.
+			 */
+			QueueFKConstraintValidation(wqueue, conrel, childrel, pkrelid,
+										childtup, lockmode);
 			table_close(childrel, NoLock);
 		}
 
@@ -13032,7 +13054,11 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
 	}
 
 	/*
-	 * Now update the catalog, while we have the door open.
+	 * Now mark the pg_constraint row as validated (even if we didn't check,
+	 * notably the ones for partitions on the referenced side).
+	 *
+	 * We rely on transaction abort to roll back this change if phase 3
+	 * ultimately finds violating rows.  This is a bit ugly.
 	 */
 	copyTuple = heap_copytuple(contuple);
 	copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple);
@@ -14400,7 +14426,7 @@ ATPrepAlterColumnType(List **wqueue,
 	/* make sure datatype is legal for a column */
 	CheckAttributeType(colName, targettype, targetcollid,
 					   list_make1_oid(rel->rd_rel->reltype),
-					   0);
+					   (attTup->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL ? CHKATYPE_IS_VIRTUAL : 0));
 
 	if (attTup->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
 	{
@@ -14458,6 +14484,9 @@ ATPrepAlterColumnType(List **wqueue,
 		/* Fix collations after all else */
 		assign_expr_collations(pstate, transform);
 
+		/* Expand virtual generated columns in the expr. */
+		transform = expand_generated_columns_in_expr(transform, rel, 1);
+
 		/* Plan the expr now so we can accurately assess the need to rewrite. */
 		transform = (Node *) expression_planner((Expr *) transform);
 
@@ -15385,9 +15414,12 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
 	/*
 	 * Re-parse the index and constraint definitions, and attach them to the
 	 * appropriate work queue entries.  We do this before dropping because in
-	 * the case of a FOREIGN KEY constraint, we might not yet have exclusive
-	 * lock on the table the constraint is attached to, and we need to get
-	 * that before reparsing/dropping.
+	 * the case of a constraint on another table, we might not yet have
+	 * exclusive lock on the table the constraint is attached to, and we need
+	 * to get that before reparsing/dropping.  (That's possible at least for
+	 * FOREIGN KEY, CHECK, and EXCLUSION constraints; in non-FK cases it
+	 * requires a dependency on the target table's composite type in the other
+	 * table's constraint expressions.)
 	 *
 	 * We can't rely on the output of deparsing to tell us which relation to
 	 * operate on, because concurrent activity might have made the name
@@ -15403,7 +15435,6 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
 		Form_pg_constraint con;
 		Oid			relid;
 		Oid			confrelid;
-		char		contype;
 		bool		conislocal;
 
 		tup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(oldId));
@@ -15420,7 +15451,6 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
 				elog(ERROR, "could not identify relation associated with constraint %u", oldId);
 		}
 		confrelid = con->confrelid;
-		contype = con->contype;
 		conislocal = con->conislocal;
 		ReleaseSysCache(tup);
 
@@ -15438,12 +15468,12 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
 			continue;
 
 		/*
-		 * When rebuilding an FK constraint that references the table we're
-		 * modifying, we might not yet have any lock on the FK's table, so get
-		 * one now.  We'll need AccessExclusiveLock for the DROP CONSTRAINT
-		 * step, so there's no value in asking for anything weaker.
+		 * When rebuilding another table's constraint that references the
+		 * table we're modifying, we might not yet have any lock on the other
+		 * table, so get one now.  We'll need AccessExclusiveLock for the DROP
+		 * CONSTRAINT step, so there's no value in asking for anything weaker.
 		 */
-		if (relid != tab->relid && contype == CONSTRAINT_FOREIGN)
+		if (relid != tab->relid)
 			LockRelationOid(relid, AccessExclusiveLock);
 
 		ATPostAlterTypeParse(oldId, relid, confrelid,
@@ -15457,6 +15487,14 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
 		Oid			relid;
 
 		relid = IndexGetRelation(oldId, false);
+
+		/*
+		 * As above, make sure we have lock on the index's table if it's not
+		 * the same table.
+		 */
+		if (relid != tab->relid)
+			LockRelationOid(relid, AccessExclusiveLock);
+
 		ATPostAlterTypeParse(oldId, relid, InvalidOid,
 							 (char *) lfirst(def_item),
 							 wqueue, lockmode, tab->rewrite);
@@ -15473,6 +15511,20 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
 		Oid			relid;
 
 		relid = StatisticsGetRelation(oldId, false);
+
+		/*
+		 * As above, make sure we have lock on the statistics object's table
+		 * if it's not the same table.  However, we take
+		 * ShareUpdateExclusiveLock here, aligning with the lock level used in
+		 * CreateStatistics and RemoveStatisticsById.
+		 *
+		 * CAUTION: this should be done after all cases that grab
+		 * AccessExclusiveLock, else we risk causing deadlock due to needing
+		 * to promote our table lock.
+		 */
+		if (relid != tab->relid)
+			LockRelationOid(relid, ShareUpdateExclusiveLock);
+
 		ATPostAlterTypeParse(oldId, relid, InvalidOid,
 							 (char *) lfirst(def_item),
 							 wqueue, lockmode, tab->rewrite);
@@ -15696,7 +15748,7 @@ ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd,
 		{
 			AlterDomainStmt *stmt = (AlterDomainStmt *) stm;
 
-			if (stmt->subtype == 'C')	/* ADD CONSTRAINT */
+			if (stmt->subtype == AD_AddConstraint)
 			{
 				Constraint *con = castNode(Constraint, stmt->def);
 				AlterTableCmd *cmd = makeNode(AlterTableCmd);
@@ -17199,15 +17251,13 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode)
 						RelationGetRelationName(parent_rel))));
 
 	/* If parent rel is temp, it must belong to this session */
-	if (parent_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
-		!parent_rel->rd_islocaltemp)
+	if (RELATION_IS_OTHER_TEMP(parent_rel))
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("cannot inherit from temporary relation of another session")));
 
 	/* Ditto for the child */
-	if (child_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
-		!child_rel->rd_islocaltemp)
+	if (RELATION_IS_OTHER_TEMP(child_rel))
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("cannot inherit to temporary relation of another session")));
@@ -20278,15 +20328,13 @@ ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd,
 						RelationGetRelationName(rel))));
 
 	/* If the parent is temp, it must belong to this session */
-	if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
-		!rel->rd_islocaltemp)
+	if (RELATION_IS_OTHER_TEMP(rel))
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("cannot attach as partition of temporary relation of another session")));
 
 	/* Ditto for the partition */
-	if (attachrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
-		!attachrel->rd_islocaltemp)
+	if (RELATION_IS_OTHER_TEMP(attachrel))
 		ereport(ERROR,
 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 				 errmsg("cannot attach temporary relation of another session as partition")));
@@ -20964,9 +21012,17 @@ ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab, Relation rel,
 		tab->rel = rel;
 	}
 
+	/*
+	 * Detaching the partition might involve TOAST table access, so ensure we
+	 * have a valid snapshot.
+	 */
+	PushActiveSnapshot(GetTransactionSnapshot());
+
 	/* Do the final part of detaching */
 	DetachPartitionFinalize(rel, partRel, concurrent, defaultPartOid);
 
+	PopActiveSnapshot();
+
 	ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel));
 
 	/* keep our lock until commit */
diff --git a/src/backend/commands/tablespace.c b/src/backend/commands/tablespace.c
index a9005cc7212..df31eace47a 100644
--- a/src/backend/commands/tablespace.c
+++ b/src/backend/commands/tablespace.c
@@ -500,7 +500,7 @@ DropTableSpace(DropTableSpaceStmt *stmt)
 		 * mustn't delete.  So instead, we force a checkpoint which will clean
 		 * out any lingering files, and try again.
 		 */
-		RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
+		RequestCheckpoint(CHECKPOINT_FAST | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
 
 		/*
 		 * On Windows, an unlinked file persists in the directory listing
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 67f8e70f9c1..7dc121f73f1 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -80,6 +80,7 @@ static bool GetTupleForTrigger(EState *estate,
 							   ItemPointer tid,
 							   LockTupleMode lockmode,
 							   TupleTableSlot *oldslot,
+							   bool do_epq_recheck,
 							   TupleTableSlot **epqslot,
 							   TM_Result *tmresultp,
 							   TM_FailureData *tmfdp);
@@ -2693,7 +2694,8 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
 					 HeapTuple fdw_trigtuple,
 					 TupleTableSlot **epqslot,
 					 TM_Result *tmresult,
-					 TM_FailureData *tmfd)
+					 TM_FailureData *tmfd,
+					 bool is_merge_delete)
 {
 	TupleTableSlot *slot = ExecGetTriggerOldSlot(estate, relinfo);
 	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
@@ -2708,9 +2710,17 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
 	{
 		TupleTableSlot *epqslot_candidate = NULL;
 
+		/*
+		 * Get a copy of the on-disk tuple we are planning to delete.  In
+		 * general, if the tuple has been concurrently updated, we should
+		 * recheck it using EPQ.  However, if this is a MERGE DELETE action,
+		 * we skip this EPQ recheck and leave it to the caller (it must do
+		 * additional rechecking, and might end up executing a different
+		 * action entirely).
+		 */
 		if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
-								LockTupleExclusive, slot, &epqslot_candidate,
-								tmresult, tmfd))
+								LockTupleExclusive, slot, !is_merge_delete,
+								&epqslot_candidate, tmresult, tmfd))
 			return false;
 
 		/*
@@ -2800,6 +2810,7 @@ ExecARDeleteTriggers(EState *estate,
 							   tupleid,
 							   LockTupleExclusive,
 							   slot,
+							   false,
 							   NULL,
 							   NULL,
 							   NULL);
@@ -2944,7 +2955,8 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
 					 HeapTuple fdw_trigtuple,
 					 TupleTableSlot *newslot,
 					 TM_Result *tmresult,
-					 TM_FailureData *tmfd)
+					 TM_FailureData *tmfd,
+					 bool is_merge_update)
 {
 	TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
 	TupleTableSlot *oldslot = ExecGetTriggerOldSlot(estate, relinfo);
@@ -2965,10 +2977,17 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
 	{
 		TupleTableSlot *epqslot_candidate = NULL;
 
-		/* get a copy of the on-disk tuple we are planning to update */
+		/*
+		 * Get a copy of the on-disk tuple we are planning to update.  In
+		 * general, if the tuple has been concurrently updated, we should
+		 * recheck it using EPQ.  However, if this is a MERGE UPDATE action,
+		 * we skip this EPQ recheck and leave it to the caller (it must do
+		 * additional rechecking, and might end up executing a different
+		 * action entirely).
+		 */
 		if (!GetTupleForTrigger(estate, epqstate, relinfo, tupleid,
-								lockmode, oldslot, &epqslot_candidate,
-								tmresult, tmfd))
+								lockmode, oldslot, !is_merge_update,
+								&epqslot_candidate, tmresult, tmfd))
 			return false;		/* cancel the update action */
 
 		/*
@@ -3142,6 +3161,7 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
 							   tupleid,
 							   LockTupleExclusive,
 							   oldslot,
+							   false,
 							   NULL,
 							   NULL,
 							   NULL);
@@ -3298,6 +3318,7 @@ GetTupleForTrigger(EState *estate,
 				   ItemPointer tid,
 				   LockTupleMode lockmode,
 				   TupleTableSlot *oldslot,
+				   bool do_epq_recheck,
 				   TupleTableSlot **epqslot,
 				   TM_Result *tmresultp,
 				   TM_FailureData *tmfdp)
@@ -3357,29 +3378,30 @@ GetTupleForTrigger(EState *estate,
 				if (tmfd.traversed)
 				{
 					/*
-					 * Recheck the tuple using EPQ. For MERGE, we leave this
-					 * to the caller (it must do additional rechecking, and
-					 * might end up executing a different action entirely).
+					 * Recheck the tuple using EPQ, if requested.  Otherwise,
+					 * just return that it was concurrently updated.
 					 */
-					if (estate->es_plannedstmt->commandType == CMD_MERGE)
+					if (do_epq_recheck)
 					{
-						if (tmresultp)
-							*tmresultp = TM_Updated;
-						return false;
+						*epqslot = EvalPlanQual(epqstate,
+												relation,
+												relinfo->ri_RangeTableIndex,
+												oldslot);
+
+						/*
+						 * If PlanQual failed for updated tuple - we must not
+						 * process this tuple!
+						 */
+						if (TupIsNull(*epqslot))
+						{
+							*epqslot = NULL;
+							return false;
+						}
 					}
-
-					*epqslot = EvalPlanQual(epqstate,
-											relation,
-											relinfo->ri_RangeTableIndex,
-											oldslot);
-
-					/*
-					 * If PlanQual failed for updated tuple - we must not
-					 * process this tuple!
-					 */
-					if (TupIsNull(*epqslot))
+					else
 					{
-						*epqslot = NULL;
+						if (tmresultp)
+							*tmresultp = TM_Updated;
 						return false;
 					}
 				}
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c
index 45ae7472ab5..26d985193ae 100644
--- a/src/backend/commands/typecmds.c
+++ b/src/backend/commands/typecmds.c
@@ -939,11 +939,19 @@ DefineDomain(ParseState *pstate, CreateDomainStmt *stmt)
 				break;
 
 			case CONSTR_NOTNULL:
-				if (nullDefined && !typNotNull)
+				if (nullDefined)
+				{
+					if (!typNotNull)
+						ereport(ERROR,
+								errcode(ERRCODE_SYNTAX_ERROR),
+								errmsg("conflicting NULL/NOT NULL constraints"),
+								parser_errposition(pstate, constr->location));
+
 					ereport(ERROR,
-							errcode(ERRCODE_SYNTAX_ERROR),
-							errmsg("conflicting NULL/NOT NULL constraints"),
+							errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+							errmsg("redundant NOT NULL constraint definition"),
 							parser_errposition(pstate, constr->location));
+				}
 				if (constr->is_no_inherit)
 					ereport(ERROR,
 							errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 33a33bf6b1c..733ef40ae7c 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -56,6 +56,7 @@
 #include "utils/fmgroids.h"
 #include "utils/guc.h"
 #include "utils/guc_hooks.h"
+#include "utils/injection_point.h"
 #include "utils/memutils.h"
 #include "utils/snapmgr.h"
 #include "utils/syscache.h"
@@ -123,7 +124,7 @@ static void vac_truncate_clog(TransactionId frozenXID,
 							  MultiXactId minMulti,
 							  TransactionId lastSaneFrozenXid,
 							  MultiXactId lastSaneMinMulti);
-static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
+static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
 					   BufferAccessStrategy bstrategy);
 static double compute_parallel_delay(void);
 static VacOptValue get_vacoptval_from_boolean(DefElem *def);
@@ -464,7 +465,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
 	}
 
 	/* Now go through the common routine */
-	vacuum(vacstmt->rels, &params, bstrategy, vac_context, isTopLevel);
+	vacuum(vacstmt->rels, params, bstrategy, vac_context, isTopLevel);
 
 	/* Finally, clean up the vacuum memory context */
 	MemoryContextDelete(vac_context);
@@ -493,7 +494,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
  * memory context that will not disappear at transaction commit.
  */
 void
-vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
+vacuum(List *relations, const VacuumParams params, BufferAccessStrategy bstrategy,
 	   MemoryContext vac_context, bool isTopLevel)
 {
 	static bool in_vacuum = false;
@@ -502,9 +503,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
 	volatile bool in_outer_xact,
 				use_own_xacts;
 
-	Assert(params != NULL);
-
-	stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
+	stmttype = (params.options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
 
 	/*
 	 * We cannot run VACUUM inside a user transaction block; if we were inside
@@ -514,7 +513,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
 	 *
 	 * ANALYZE (without VACUUM) can run either way.
 	 */
-	if (params->options & VACOPT_VACUUM)
+	if (params.options & VACOPT_VACUUM)
 	{
 		PreventInTransactionBlock(isTopLevel, stmttype);
 		in_outer_xact = false;
@@ -537,7 +536,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
 	 * Build list of relation(s) to process, putting any new data in
 	 * vac_context for safekeeping.
 	 */
-	if (params->options & VACOPT_ONLY_DATABASE_STATS)
+	if (params.options & VACOPT_ONLY_DATABASE_STATS)
 	{
 		/* We don't process any tables in this case */
 		Assert(relations == NIL);
@@ -553,7 +552,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
 			List	   *sublist;
 			MemoryContext old_context;
 
-			sublist = expand_vacuum_rel(vrel, vac_context, params->options);
+			sublist = expand_vacuum_rel(vrel, vac_context, params.options);
 			old_context = MemoryContextSwitchTo(vac_context);
 			newrels = list_concat(newrels, sublist);
 			MemoryContextSwitchTo(old_context);
@@ -561,7 +560,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
 		relations = newrels;
 	}
 	else
-		relations = get_all_vacuum_rels(vac_context, params->options);
+		relations = get_all_vacuum_rels(vac_context, params.options);
 
 	/*
 	 * Decide whether we need to start/commit our own transactions.
@@ -577,11 +576,11 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
 	 * transaction block, and also in an autovacuum worker, use own
 	 * transactions so we can release locks sooner.
 	 */
-	if (params->options & VACOPT_VACUUM)
+	if (params.options & VACOPT_VACUUM)
 		use_own_xacts = true;
 	else
 	{
-		Assert(params->options & VACOPT_ANALYZE);
+		Assert(params.options & VACOPT_ANALYZE);
 		if (AmAutoVacuumWorkerProcess())
 			use_own_xacts = true;
 		else if (in_outer_xact)
@@ -632,13 +631,13 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
 		{
 			VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
 
-			if (params->options & VACOPT_VACUUM)
+			if (params.options & VACOPT_VACUUM)
 			{
 				if (!vacuum_rel(vrel->oid, vrel->relation, params, bstrategy))
 					continue;
 			}
 
-			if (params->options & VACOPT_ANALYZE)
+			if (params.options & VACOPT_ANALYZE)
 			{
 				/*
 				 * If using separate xacts, start one for analyze. Otherwise,
@@ -702,8 +701,8 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
 		StartTransactionCommand();
 	}
 
-	if ((params->options & VACOPT_VACUUM) &&
-		!(params->options & VACOPT_SKIP_DATABASE_STATS))
+	if ((params.options & VACOPT_VACUUM) &&
+		!(params.options & VACOPT_SKIP_DATABASE_STATS))
 	{
 		/*
 		 * Update pg_database.datfrozenxid, and truncate pg_xact if possible.
@@ -1101,7 +1100,7 @@ get_all_vacuum_rels(MemoryContext vac_context, int options)
  * minimum).
  */
 bool
-vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
+vacuum_get_cutoffs(Relation rel, const VacuumParams params,
 				   struct VacuumCutoffs *cutoffs)
 {
 	int			freeze_min_age,
@@ -1117,10 +1116,10 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
 				aggressiveMXIDCutoff;
 
 	/* Use mutable copies of freeze age parameters */
-	freeze_min_age = params->freeze_min_age;
-	multixact_freeze_min_age = params->multixact_freeze_min_age;
-	freeze_table_age = params->freeze_table_age;
-	multixact_freeze_table_age = params->multixact_freeze_table_age;
+	freeze_min_age = params.freeze_min_age;
+	multixact_freeze_min_age = params.multixact_freeze_min_age;
+	freeze_table_age = params.freeze_table_age;
+	multixact_freeze_table_age = params.multixact_freeze_table_age;
 
 	/* Set pg_class fields in cutoffs */
 	cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid;
@@ -1997,7 +1996,7 @@ vac_truncate_clog(TransactionId frozenXID,
  *		At entry and exit, we are not inside a transaction.
  */
 static bool
-vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
+vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
 		   BufferAccessStrategy bstrategy)
 {
 	LOCKMODE	lmode;
@@ -2008,13 +2007,18 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
 	Oid			save_userid;
 	int			save_sec_context;
 	int			save_nestlevel;
+	VacuumParams toast_vacuum_params;
 
-	Assert(params != NULL);
+	/*
+	 * This function scribbles on the parameters, so make a copy early to
+	 * avoid affecting the TOAST table (if we do end up recursing to it).
+	 */
+	memcpy(&toast_vacuum_params, &params, sizeof(VacuumParams));
 
 	/* Begin a transaction for vacuuming this relation */
 	StartTransactionCommand();
 
-	if (!(params->options & VACOPT_FULL))
+	if (!(params.options & VACOPT_FULL))
 	{
 		/*
 		 * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
@@ -2040,7 +2044,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
 		 */
 		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
 		MyProc->statusFlags |= PROC_IN_VACUUM;
-		if (params->is_wraparound)
+		if (params.is_wraparound)
 			MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
 		ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
 		LWLockRelease(ProcArrayLock);
@@ -2064,12 +2068,12 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
 	 * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
 	 * way, we can be sure that no other backend is vacuuming the same table.
 	 */
-	lmode = (params->options & VACOPT_FULL) ?
+	lmode = (params.options & VACOPT_FULL) ?
 		AccessExclusiveLock : ShareUpdateExclusiveLock;
 
 	/* open the relation and get the appropriate lock on it */
-	rel = vacuum_open_relation(relid, relation, params->options,
-							   params->log_min_duration >= 0, lmode);
+	rel = vacuum_open_relation(relid, relation, params.options,
+							   params.log_min_duration >= 0, lmode);
 
 	/* leave if relation could not be opened or locked */
 	if (!rel)
@@ -2084,8 +2088,8 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
 	 * This is only safe to do because we hold a session lock on the main
 	 * relation that prevents concurrent deletion.
 	 */
-	if (OidIsValid(params->toast_parent))
-		priv_relid = params->toast_parent;
+	if (OidIsValid(params.toast_parent))
+		priv_relid = params.toast_parent;
 	else
 		priv_relid = RelationGetRelid(rel);
 
@@ -2098,7 +2102,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
 	 */
 	if (!vacuum_is_permitted_for_relation(priv_relid,
 										  rel->rd_rel,
-										  params->options & ~VACOPT_ANALYZE))
+										  params.options & ~VACOPT_ANALYZE))
 	{
 		relation_close(rel, lmode);
 		PopActiveSnapshot();
@@ -2169,7 +2173,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
 	 * Set index_cleanup option based on index_cleanup reloption if it wasn't
 	 * specified in VACUUM command, or when running in an autovacuum worker
 	 */
-	if (params->index_cleanup == VACOPTVALUE_UNSPECIFIED)
+	if (params.index_cleanup == VACOPTVALUE_UNSPECIFIED)
 	{
 		StdRdOptIndexCleanup vacuum_index_cleanup;
 
@@ -2180,56 +2184,74 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
 				((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
 
 		if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
-			params->index_cleanup = VACOPTVALUE_AUTO;
+			params.index_cleanup = VACOPTVALUE_AUTO;
 		else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
-			params->index_cleanup = VACOPTVALUE_ENABLED;
+			params.index_cleanup = VACOPTVALUE_ENABLED;
 		else
 		{
 			Assert(vacuum_index_cleanup ==
 				   STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
-			params->index_cleanup = VACOPTVALUE_DISABLED;
+			params.index_cleanup = VACOPTVALUE_DISABLED;
 		}
 	}
 
+#ifdef USE_INJECTION_POINTS
+	if (params.index_cleanup == VACOPTVALUE_AUTO)
+		INJECTION_POINT("vacuum-index-cleanup-auto", NULL);
+	else if (params.index_cleanup == VACOPTVALUE_DISABLED)
+		INJECTION_POINT("vacuum-index-cleanup-disabled", NULL);
+	else if (params.index_cleanup == VACOPTVALUE_ENABLED)
+		INJECTION_POINT("vacuum-index-cleanup-enabled", NULL);
+#endif
+
 	/*
 	 * Check if the vacuum_max_eager_freeze_failure_rate table storage
 	 * parameter was specified. This overrides the GUC value.
 	 */
 	if (rel->rd_options != NULL &&
 		((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate >= 0)
-		params->max_eager_freeze_failure_rate =
+		params.max_eager_freeze_failure_rate =
 			((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate;
 
 	/*
 	 * Set truncate option based on truncate reloption or GUC if it wasn't
 	 * specified in VACUUM command, or when running in an autovacuum worker
 	 */
-	if (params->truncate == VACOPTVALUE_UNSPECIFIED)
+	if (params.truncate == VACOPTVALUE_UNSPECIFIED)
 	{
 		StdRdOptions *opts = (StdRdOptions *) rel->rd_options;
 
 		if (opts && opts->vacuum_truncate_set)
 		{
 			if (opts->vacuum_truncate)
-				params->truncate = VACOPTVALUE_ENABLED;
+				params.truncate = VACOPTVALUE_ENABLED;
 			else
-				params->truncate = VACOPTVALUE_DISABLED;
+				params.truncate = VACOPTVALUE_DISABLED;
 		}
 		else if (vacuum_truncate)
-			params->truncate = VACOPTVALUE_ENABLED;
+			params.truncate = VACOPTVALUE_ENABLED;
 		else
-			params->truncate = VACOPTVALUE_DISABLED;
+			params.truncate = VACOPTVALUE_DISABLED;
 	}
 
+#ifdef USE_INJECTION_POINTS
+	if (params.truncate == VACOPTVALUE_AUTO)
+		INJECTION_POINT("vacuum-truncate-auto", NULL);
+	else if (params.truncate == VACOPTVALUE_DISABLED)
+		INJECTION_POINT("vacuum-truncate-disabled", NULL);
+	else if (params.truncate == VACOPTVALUE_ENABLED)
+		INJECTION_POINT("vacuum-truncate-enabled", NULL);
+#endif
+
 	/*
 	 * Remember the relation's TOAST relation for later, if the caller asked
 	 * us to process it.  In VACUUM FULL, though, the toast table is
 	 * automatically rebuilt by cluster_rel so we shouldn't recurse to it,
 	 * unless PROCESS_MAIN is disabled.
 	 */
-	if ((params->options & VACOPT_PROCESS_TOAST) != 0 &&
-		((params->options & VACOPT_FULL) == 0 ||
-		 (params->options & VACOPT_PROCESS_MAIN) == 0))
+	if ((params.options & VACOPT_PROCESS_TOAST) != 0 &&
+		((params.options & VACOPT_FULL) == 0 ||
+		 (params.options & VACOPT_PROCESS_MAIN) == 0))
 		toast_relid = rel->rd_rel->reltoastrelid;
 	else
 		toast_relid = InvalidOid;
@@ -2252,16 +2274,16 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
 	 * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN
 	 * to be set when we recurse to the TOAST table.
 	 */
-	if (params->options & VACOPT_PROCESS_MAIN)
+	if (params.options & VACOPT_PROCESS_MAIN)
 	{
 		/*
 		 * Do the actual work --- either FULL or "lazy" vacuum
 		 */
-		if (params->options & VACOPT_FULL)
+		if (params.options & VACOPT_FULL)
 		{
 			ClusterParams cluster_params = {0};
 
-			if ((params->options & VACOPT_VERBOSE) != 0)
+			if ((params.options & VACOPT_VERBOSE) != 0)
 				cluster_params.options |= CLUOPT_VERBOSE;
 
 			/* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
@@ -2299,19 +2321,16 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
 	 */
 	if (toast_relid != InvalidOid)
 	{
-		VacuumParams toast_vacuum_params;
-
 		/*
 		 * Force VACOPT_PROCESS_MAIN so vacuum_rel() processes it.  Likewise,
 		 * set toast_parent so that the privilege checks are done on the main
 		 * relation.  NB: This is only safe to do because we hold a session
 		 * lock on the main relation that prevents concurrent deletion.
 		 */
-		memcpy(&toast_vacuum_params, params, sizeof(VacuumParams));
 		toast_vacuum_params.options |= VACOPT_PROCESS_MAIN;
 		toast_vacuum_params.toast_parent = relid;
 
-		vacuum_rel(toast_relid, NULL, &toast_vacuum_params, bstrategy);
+		vacuum_rel(toast_relid, NULL, toast_vacuum_params, bstrategy);
 	}
 
 	/*
diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c
index 2b9d548cdeb..0feea1d30ec 100644
--- a/src/backend/commands/vacuumparallel.c
+++ b/src/backend/commands/vacuumparallel.c
@@ -63,7 +63,7 @@ typedef struct PVShared
 	 */
 	Oid			relid;
 	int			elevel;
-	uint64		queryid;
+	int64		queryid;
 
 	/*
 	 * Fields for both index vacuum and cleanup.
diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c
index 8a72b5e70a4..1a37737d4a2 100644
--- a/src/backend/executor/execExprInterp.c
+++ b/src/backend/executor/execExprInterp.c
@@ -5228,7 +5228,6 @@ ExecEvalJsonCoercionFinish(ExprState *state, ExprEvalStep *op)
 		 * JsonBehavior expression.
 		 */
 		jsestate->escontext.error_occurred = false;
-		jsestate->escontext.error_occurred = false;
 		jsestate->escontext.details_wanted = true;
 	}
 }
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 255bd795361..b5400749353 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -144,7 +144,7 @@ execTuplesHashPrepare(int numCols,
  *	hashfunctions: FmgrInfos of datatype-specific hashing functions to use
  *	collations: collations to use in comparisons
  *	nbuckets: initial estimate of hashtable size
- *	additionalsize: size of data stored in ->additional
+ *	additionalsize: size of data that may be stored along with the hash entry
  *	metacxt: memory context for long-lived allocation, but not per-entry data
  *	tablecxt: memory context in which to store table entries
  *	tempcxt: short-lived context for evaluation hash and comparison functions
@@ -288,7 +288,7 @@ ResetTupleHashTable(TupleHashTable hashtable)
  *
  * If isnew isn't NULL, then a new entry is created if no existing entry
  * matches.  On return, *isnew is true if the entry is newly created,
- * false if it existed already.  ->additional_data in the new entry has
+ * false if it existed already.  The additional data in the new entry has
  * been zeroed.
  */
 TupleHashEntry
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index bdf862b2406..ca33a854278 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -279,7 +279,7 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
  *		executor is performing an UPDATE that could not use an
  *		optimization like heapam's HOT (in more general terms a
  *		call to table_tuple_update() took place and set
- *		'update_indexes' to TUUI_All).  Receiving this hint makes
+ *		'update_indexes' to TU_All).  Receiving this hint makes
  *		us consider if we should pass down the 'indexUnchanged'
  *		hint in turn.  That's something that we figure out for
  *		each index_insert() call iff 'update' is true.
@@ -290,7 +290,7 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
  *		HOT has been applied and any updated columns are indexed
  *		only by summarizing indexes (or in more general terms a
  *		call to table_tuple_update() took place and set
- *		'update_indexes' to TUUI_Summarizing). We can (and must)
+ *		'update_indexes' to TU_Summarizing). We can (and must)
  *		therefore only update the indexes that have
  *		'amsummarizing' = true.
  *
diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c
index 53ddd25c42d..f262e7a66f7 100644
--- a/src/backend/executor/execReplication.c
+++ b/src/backend/executor/execReplication.c
@@ -670,7 +670,7 @@ ExecSimpleRelationUpdate(ResultRelInfo *resultRelInfo,
 		resultRelInfo->ri_TrigDesc->trig_update_before_row)
 	{
 		if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
-								  tid, NULL, slot, NULL, NULL))
+								  tid, NULL, slot, NULL, NULL, false))
 			skip_tuple = true;	/* "do nothing" */
 	}
 
@@ -746,7 +746,7 @@ ExecSimpleRelationDelete(ResultRelInfo *resultRelInfo,
 		resultRelInfo->ri_TrigDesc->trig_delete_before_row)
 	{
 		skip_tuple = !ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
-										   tid, NULL, NULL, NULL, NULL);
+										   tid, NULL, NULL, NULL, NULL, false);
 	}
 
 	if (!skip_tuple)
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 2bc89bf84dc..7c6c2c1f6e4 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -64,6 +64,7 @@
 #include "nodes/nodeFuncs.h"
 #include "optimizer/optimizer.h"
 #include "rewrite/rewriteHandler.h"
+#include "rewrite/rewriteManip.h"
 #include "storage/lmgr.h"
 #include "utils/builtins.h"
 #include "utils/datum.h"
@@ -1473,7 +1474,8 @@ ExecDeletePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 
 		return ExecBRDeleteTriggers(context->estate, context->epqstate,
 									resultRelInfo, tupleid, oldtuple,
-									epqreturnslot, result, &context->tmfd);
+									epqreturnslot, result, &context->tmfd,
+									context->mtstate->operation == CMD_MERGE);
 	}
 
 	return true;
@@ -2116,7 +2118,8 @@ ExecUpdatePrologue(ModifyTableContext *context, ResultRelInfo *resultRelInfo,
 
 		return ExecBRUpdateTriggers(context->estate, context->epqstate,
 									resultRelInfo, tupleid, oldtuple, slot,
-									result, &context->tmfd);
+									result, &context->tmfd,
+									context->mtstate->operation == CMD_MERGE);
 	}
 
 	return true;
@@ -3735,6 +3738,7 @@ ExecInitMerge(ModifyTableState *mtstate, EState *estate)
 			switch (action->commandType)
 			{
 				case CMD_INSERT:
+					/* INSERT actions always use rootRelInfo */
 					ExecCheckPlanOutput(rootRelInfo->ri_RelationDesc,
 										action->targetList);
 
@@ -3774,9 +3778,23 @@ ExecInitMerge(ModifyTableState *mtstate, EState *estate)
 					}
 					else
 					{
-						/* not partitioned? use the stock relation and slot */
-						tgtslot = resultRelInfo->ri_newTupleSlot;
-						tgtdesc = RelationGetDescr(resultRelInfo->ri_RelationDesc);
+						/*
+						 * If the MERGE targets an inherited table, we insert
+						 * into the root table, so we must initialize its
+						 * "new" tuple slot, if not already done, and use its
+						 * relation descriptor for the projection.
+						 *
+						 * For non-inherited tables, rootRelInfo and
+						 * resultRelInfo are the same, and the "new" tuple
+						 * slot will already have been initialized.
+						 */
+						if (rootRelInfo->ri_newTupleSlot == NULL)
+							rootRelInfo->ri_newTupleSlot =
+								table_slot_create(rootRelInfo->ri_RelationDesc,
+												  &estate->es_tupleTable);
+
+						tgtslot = rootRelInfo->ri_newTupleSlot;
+						tgtdesc = RelationGetDescr(rootRelInfo->ri_RelationDesc);
 					}
 
 					action_state->mas_proj =
@@ -3809,6 +3827,114 @@ ExecInitMerge(ModifyTableState *mtstate, EState *estate)
 			}
 		}
 	}
+
+	/*
+	 * If the MERGE targets an inherited table, any INSERT actions will use
+	 * rootRelInfo, and rootRelInfo will not be in the resultRelInfo array.
+	 * Therefore we must initialize its WITH CHECK OPTION constraints and
+	 * RETURNING projection, as ExecInitModifyTable did for the resultRelInfo
+	 * entries.
+	 *
+	 * Note that the planner does not build a withCheckOptionList or
+	 * returningList for the root relation, but as in ExecInitPartitionInfo,
+	 * we can use the first resultRelInfo entry as a reference to calculate
+	 * the attno's for the root table.
+	 */
+	if (rootRelInfo != mtstate->resultRelInfo &&
+		rootRelInfo->ri_RelationDesc->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
+		(mtstate->mt_merge_subcommands & MERGE_INSERT) != 0)
+	{
+		ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
+		Relation	rootRelation = rootRelInfo->ri_RelationDesc;
+		Relation	firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
+		int			firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
+		AttrMap    *part_attmap = NULL;
+		bool		found_whole_row;
+
+		if (node->withCheckOptionLists != NIL)
+		{
+			List	   *wcoList;
+			List	   *wcoExprs = NIL;
+
+			/* There should be as many WCO lists as result rels */
+			Assert(list_length(node->withCheckOptionLists) ==
+				   list_length(node->resultRelations));
+
+			/*
+			 * Use the first WCO list as a reference. In the most common case,
+			 * this will be for the same relation as rootRelInfo, and so there
+			 * will be no need to adjust its attno's.
+			 */
+			wcoList = linitial(node->withCheckOptionLists);
+			if (rootRelation != firstResultRel)
+			{
+				/* Convert any Vars in it to contain the root's attno's */
+				part_attmap =
+					build_attrmap_by_name(RelationGetDescr(rootRelation),
+										  RelationGetDescr(firstResultRel),
+										  false);
+
+				wcoList = (List *)
+					map_variable_attnos((Node *) wcoList,
+										firstVarno, 0,
+										part_attmap,
+										RelationGetForm(rootRelation)->reltype,
+										&found_whole_row);
+			}
+
+			foreach(lc, wcoList)
+			{
+				WithCheckOption *wco = lfirst_node(WithCheckOption, lc);
+				ExprState  *wcoExpr = ExecInitQual(castNode(List, wco->qual),
+												   &mtstate->ps);
+
+				wcoExprs = lappend(wcoExprs, wcoExpr);
+			}
+
+			rootRelInfo->ri_WithCheckOptions = wcoList;
+			rootRelInfo->ri_WithCheckOptionExprs = wcoExprs;
+		}
+
+		if (node->returningLists != NIL)
+		{
+			List	   *returningList;
+
+			/* There should be as many returning lists as result rels */
+			Assert(list_length(node->returningLists) ==
+				   list_length(node->resultRelations));
+
+			/*
+			 * Use the first returning list as a reference. In the most common
+			 * case, this will be for the same relation as rootRelInfo, and so
+			 * there will be no need to adjust its attno's.
+			 */
+			returningList = linitial(node->returningLists);
+			if (rootRelation != firstResultRel)
+			{
+				/* Convert any Vars in it to contain the root's attno's */
+				if (part_attmap == NULL)
+					part_attmap =
+						build_attrmap_by_name(RelationGetDescr(rootRelation),
+											  RelationGetDescr(firstResultRel),
+											  false);
+
+				returningList = (List *)
+					map_variable_attnos((Node *) returningList,
+										firstVarno, 0,
+										part_attmap,
+										RelationGetForm(rootRelation)->reltype,
+										&found_whole_row);
+			}
+			rootRelInfo->ri_returningList = returningList;
+
+			/* Initialize the RETURNING projection */
+			rootRelInfo->ri_projectReturning =
+				ExecBuildProjectionInfo(returningList, econtext,
+										mtstate->ps.ps_ResultTupleSlot,
+										&mtstate->ps,
+										RelationGetDescr(rootRelation));
+		}
+	}
 }
 
 /*
diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c
index ab2eab9596e..26f7420b64b 100644
--- a/src/backend/executor/nodeTidrangescan.c
+++ b/src/backend/executor/nodeTidrangescan.c
@@ -128,9 +128,11 @@ TidExprListCreate(TidRangeScanState *tidrangestate)
  *		TidRangeEval
  *
  *		Compute and set node's block and offset range to scan by evaluating
- *		the trss_tidexprs.  Returns false if we detect the range cannot
+ *		node->trss_tidexprs.  Returns false if we detect the range cannot
  *		contain any tuples.  Returns true if it's possible for the range to
- *		contain tuples.
+ *		contain tuples.  We don't bother validating that trss_mintid is less
+ *		than or equal to trss_maxtid, as the scan_set_tidrange() table AM
+ *		function will handle that.
  * ----------------------------------------------------------------
  */
 static bool
diff --git a/src/backend/jit/README b/src/backend/jit/README
index 5427bdf2153..a40950dfb03 100644
--- a/src/backend/jit/README
+++ b/src/backend/jit/README
@@ -205,7 +205,7 @@ The ability to do so allows us to get the LLVM IR for all operators
 bitcode files get installed into the server's
   $pkglibdir/bitcode/postgres/
 Using existing LLVM functionality (for parallel LTO compilation),
-additionally an index is over these is stored to
+additionally an index over these is stored to
 $pkglibdir/bitcode/postgres.index.bc
 
 Similarly extensions can install code into
diff --git a/src/backend/jit/llvm/meson.build b/src/backend/jit/llvm/meson.build
index c8e06dfbe35..805fbd69006 100644
--- a/src/backend/jit/llvm/meson.build
+++ b/src/backend/jit/llvm/meson.build
@@ -53,7 +53,7 @@ llvm_irgen_args = [
 
 if ccache.found()
   llvm_irgen_command = ccache
-  llvm_irgen_args = [clang.path()] + llvm_irgen_args
+  llvm_irgen_args = [clang.full_path()] + llvm_irgen_args
 else
   llvm_irgen_command = clang
 endif
diff --git a/src/backend/lib/README b/src/backend/lib/README
index f2fb591237d..c28cbe356f0 100644
--- a/src/backend/lib/README
+++ b/src/backend/lib/README
@@ -1,8 +1,6 @@
 This directory contains a general purpose data structures, for use anywhere
 in the backend:
 
-binaryheap.c - a binary heap
-
 bipartite_match.c - Hopcroft-Karp maximum cardinality algorithm for bipartite graphs
 
 bloomfilter.c - probabilistic, space-efficient set membership testing
@@ -21,8 +19,6 @@ pairingheap.c - a pairing heap
 
 rbtree.c - a red-black tree
 
-stringinfo.c - an extensible string type
-
 
 Aside from the inherent characteristics of the data structures, there are a
 few practical differences between the binary heap and the pairing heap. The
diff --git a/src/backend/libpq/be-secure-gssapi.c b/src/backend/libpq/be-secure-gssapi.c
index 717ba9824f9..5d98c58ffa8 100644
--- a/src/backend/libpq/be-secure-gssapi.c
+++ b/src/backend/libpq/be-secure-gssapi.c
@@ -46,11 +46,18 @@
  * don't want the other side to send arbitrarily huge packets as we
  * would have to allocate memory for them to then pass them to GSSAPI.
  *
- * Therefore, these two #define's are effectively part of the protocol
+ * Therefore, this #define is effectively part of the protocol
  * spec and can't ever be changed.
  */
-#define PQ_GSS_SEND_BUFFER_SIZE 16384
-#define PQ_GSS_RECV_BUFFER_SIZE 16384
+#define PQ_GSS_MAX_PACKET_SIZE 16384	/* includes uint32 header word */
+
+/*
+ * However, during the authentication exchange we must cope with whatever
+ * message size the GSSAPI library wants to send (because our protocol
+ * doesn't support splitting those messages).  Depending on configuration
+ * those messages might be as much as 64kB.
+ */
+#define PQ_GSS_AUTH_BUFFER_SIZE 65536	/* includes uint32 header word */
 
 /*
  * Since we manage at most one GSS-encrypted connection per backend,
@@ -114,9 +121,9 @@ be_gssapi_write(Port *port, const void *ptr, size_t len)
 	 * again, so if it offers a len less than that, something is wrong.
 	 *
 	 * Note: it may seem attractive to report partial write completion once
-	 * we've successfully sent any encrypted packets.  However, that can cause
-	 * problems for callers; notably, pqPutMsgEnd's heuristic to send only
-	 * full 8K blocks interacts badly with such a hack.  We won't save much,
+	 * we've successfully sent any encrypted packets.  However, doing that
+	 * expands the state space of this processing and has been responsible for
+	 * bugs in the past (cf. commit d053a879b).  We won't save much,
 	 * typically, by letting callers discard data early, so don't risk it.
 	 */
 	if (len < PqGSSSendConsumed)
@@ -210,12 +217,12 @@ be_gssapi_write(Port *port, const void *ptr, size_t len)
 			errno = ECONNRESET;
 			return -1;
 		}
-		if (output.length > PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32))
+		if (output.length > PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32))
 		{
 			ereport(COMMERROR,
 					(errmsg("server tried to send oversize GSSAPI packet (%zu > %zu)",
 							(size_t) output.length,
-							PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32))));
+							PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32))));
 			errno = ECONNRESET;
 			return -1;
 		}
@@ -346,12 +353,12 @@ be_gssapi_read(Port *port, void *ptr, size_t len)
 		/* Decode the packet length and check for overlength packet */
 		input.length = pg_ntoh32(*(uint32 *) PqGSSRecvBuffer);
 
-		if (input.length > PQ_GSS_RECV_BUFFER_SIZE - sizeof(uint32))
+		if (input.length > PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32))
 		{
 			ereport(COMMERROR,
 					(errmsg("oversize GSSAPI packet sent by the client (%zu > %zu)",
 							(size_t) input.length,
-							PQ_GSS_RECV_BUFFER_SIZE - sizeof(uint32))));
+							PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32))));
 			errno = ECONNRESET;
 			return -1;
 		}
@@ -517,10 +524,13 @@ secure_open_gssapi(Port *port)
 	 * that will never use them, and we ensure that the buffers are
 	 * sufficiently aligned for the length-word accesses that we do in some
 	 * places in this file.
+	 *
+	 * We'll use PQ_GSS_AUTH_BUFFER_SIZE-sized buffers until transport
+	 * negotiation is complete, then switch to PQ_GSS_MAX_PACKET_SIZE.
 	 */
-	PqGSSSendBuffer = malloc(PQ_GSS_SEND_BUFFER_SIZE);
-	PqGSSRecvBuffer = malloc(PQ_GSS_RECV_BUFFER_SIZE);
-	PqGSSResultBuffer = malloc(PQ_GSS_RECV_BUFFER_SIZE);
+	PqGSSSendBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE);
+	PqGSSRecvBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE);
+	PqGSSResultBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE);
 	if (!PqGSSSendBuffer || !PqGSSRecvBuffer || !PqGSSResultBuffer)
 		ereport(FATAL,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
@@ -568,16 +578,16 @@ secure_open_gssapi(Port *port)
 
 		/*
 		 * During initialization, packets are always fully consumed and
-		 * shouldn't ever be over PQ_GSS_RECV_BUFFER_SIZE in length.
+		 * shouldn't ever be over PQ_GSS_AUTH_BUFFER_SIZE in total length.
 		 *
 		 * Verify on our side that the client doesn't do something funny.
 		 */
-		if (input.length > PQ_GSS_RECV_BUFFER_SIZE)
+		if (input.length > PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32))
 		{
 			ereport(COMMERROR,
-					(errmsg("oversize GSSAPI packet sent by the client (%zu > %d)",
+					(errmsg("oversize GSSAPI packet sent by the client (%zu > %zu)",
 							(size_t) input.length,
-							PQ_GSS_RECV_BUFFER_SIZE)));
+							PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32))));
 			return -1;
 		}
 
@@ -631,12 +641,12 @@ secure_open_gssapi(Port *port)
 		{
 			uint32		netlen = pg_hton32(output.length);
 
-			if (output.length > PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32))
+			if (output.length > PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32))
 			{
 				ereport(COMMERROR,
 						(errmsg("server tried to send oversize GSSAPI packet (%zu > %zu)",
 								(size_t) output.length,
-								PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32))));
+								PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32))));
 				gss_release_buffer(&minor, &output);
 				return -1;
 			}
@@ -692,11 +702,28 @@ secure_open_gssapi(Port *port)
 	}
 
 	/*
+	 * Release the large authentication buffers and allocate the ones we want
+	 * for normal operation.
+	 */
+	free(PqGSSSendBuffer);
+	free(PqGSSRecvBuffer);
+	free(PqGSSResultBuffer);
+	PqGSSSendBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE);
+	PqGSSRecvBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE);
+	PqGSSResultBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE);
+	if (!PqGSSSendBuffer || !PqGSSRecvBuffer || !PqGSSResultBuffer)
+		ereport(FATAL,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory")));
+	PqGSSSendLength = PqGSSSendNext = PqGSSSendConsumed = 0;
+	PqGSSRecvLength = PqGSSResultLength = PqGSSResultNext = 0;
+
+	/*
 	 * Determine the max packet size which will fit in our buffer, after
 	 * accounting for the length.  be_gssapi_write will need this.
 	 */
 	major = gss_wrap_size_limit(&minor, port->gss->ctx, 1, GSS_C_QOP_DEFAULT,
-								PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32),
+								PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32),
 								&PqGSSMaxPktSize);
 
 	if (GSS_ERROR(major))
diff --git a/src/backend/libpq/be-secure-openssl.c b/src/backend/libpq/be-secure-openssl.c
index 64ff3ce3d6a..c8b63ef8249 100644
--- a/src/backend/libpq/be-secure-openssl.c
+++ b/src/backend/libpq/be-secure-openssl.c
@@ -1436,10 +1436,10 @@ initialize_ecdh(SSL_CTX *context, bool isServerStart)
 		 */
 		ereport(isServerStart ? FATAL : LOG,
 				errcode(ERRCODE_CONFIG_FILE_ERROR),
-				errmsg("failed to set group names specified in ssl_groups: %s",
+				errmsg("could not set group names specified in ssl_groups: %s",
 					   SSLerrmessageExt(ERR_get_error(),
 										_("No valid groups found"))),
-				errhint("Ensure that each group name is spelled correctly and supported by the installed version of OpenSSL"));
+				errhint("Ensure that each group name is spelled correctly and supported by the installed version of OpenSSL."));
 		return false;
 	}
 #endif
diff --git a/src/backend/libpq/hba.c b/src/backend/libpq/hba.c
index 332fad27835..fecee8224d0 100644
--- a/src/backend/libpq/hba.c
+++ b/src/backend/libpq/hba.c
@@ -2873,8 +2873,11 @@ check_ident_usermap(IdentLine *identLine, const char *usermap_name,
 			!token_has_regexp(identLine->pg_user) &&
 			(ofs = strstr(identLine->pg_user->string, "\\1")) != NULL)
 		{
+			const char *repl_str;
+			size_t		repl_len;
+			char	   *old_pg_user;
 			char	   *expanded_pg_user;
-			int			offset;
+			size_t		offset;
 
 			/* substitution of the first argument requested */
 			if (matches[1].rm_so < 0)
@@ -2886,18 +2889,33 @@ check_ident_usermap(IdentLine *identLine, const char *usermap_name,
 				*error_p = true;
 				return;
 			}
+			repl_str = system_user + matches[1].rm_so;
+			repl_len = matches[1].rm_eo - matches[1].rm_so;
 
 			/*
-			 * length: original length minus length of \1 plus length of match
-			 * plus null terminator
+			 * It's allowed to have more than one \1 in the string, and we'll
+			 * replace them all.  But that's pretty unusual so we optimize on
+			 * the assumption of only one occurrence, which motivates doing
+			 * repeated replacements instead of making two passes over the
+			 * string to determine the final length right away.
 			 */
-			expanded_pg_user = palloc0(strlen(identLine->pg_user->string) - 2 + (matches[1].rm_eo - matches[1].rm_so) + 1);
-			offset = ofs - identLine->pg_user->string;
-			memcpy(expanded_pg_user, identLine->pg_user->string, offset);
-			memcpy(expanded_pg_user + offset,
-				   system_user + matches[1].rm_so,
-				   matches[1].rm_eo - matches[1].rm_so);
-			strcat(expanded_pg_user, ofs + 2);
+			old_pg_user = identLine->pg_user->string;
+			do
+			{
+				/*
+				 * length: current length minus length of \1 plus length of
+				 * replacement plus null terminator
+				 */
+				expanded_pg_user = palloc(strlen(old_pg_user) - 2 + repl_len + 1);
+				/* ofs points into the old_pg_user string at this point */
+				offset = ofs - old_pg_user;
+				memcpy(expanded_pg_user, old_pg_user, offset);
+				memcpy(expanded_pg_user + offset, repl_str, repl_len);
+				strcpy(expanded_pg_user + offset + repl_len, ofs + 2);
+				if (old_pg_user != identLine->pg_user->string)
+					pfree(old_pg_user);
+				old_pg_user = expanded_pg_user;
+			} while ((ofs = strstr(old_pg_user + offset + repl_len, "\\1")) != NULL);
 
 			/*
 			 * Mark the token as quoted, so it will only be compared literally
diff --git a/src/backend/libpq/pg_ident.conf.sample b/src/backend/libpq/pg_ident.conf.sample
index f5225f26cdf..8ee6c0ba315 100644
--- a/src/backend/libpq/pg_ident.conf.sample
+++ b/src/backend/libpq/pg_ident.conf.sample
@@ -13,25 +13,25 @@
 # user names to their corresponding PostgreSQL user names.  Records
 # are of the form:
 #
-# MAPNAME  SYSTEM-USERNAME  PG-USERNAME
+# MAPNAME  SYSTEM-USERNAME  DATABASE-USERNAME
 #
 # (The uppercase quantities must be replaced by actual values.)
 #
 # MAPNAME is the (otherwise freely chosen) map name that was used in
 # pg_hba.conf.  SYSTEM-USERNAME is the detected user name of the
-# client.  PG-USERNAME is the requested PostgreSQL user name.  The
-# existence of a record specifies that SYSTEM-USERNAME may connect as
-# PG-USERNAME.
+# client.  DATABASE-USERNAME is the requested PostgreSQL user name.
+# The existence of a record specifies that SYSTEM-USERNAME may connect
+# as DATABASE-USERNAME.
 #
-# If SYSTEM-USERNAME starts with a slash (/), it will be treated as a
-# regular expression.  Optionally this can contain a capture (a
-# parenthesized subexpression).  The substring matching the capture
-# will be substituted for \1 (backslash-one) if present in
-# PG-USERNAME.
+# If SYSTEM-USERNAME starts with a slash (/), the rest of it will be
+# treated as a regular expression.  Optionally this can contain a capture
+# (a parenthesized subexpression).  The substring matching the capture
+# will be substituted for \1 (backslash-one) if that appears in
+# DATABASE-USERNAME.
 #
-# PG-USERNAME can be "all", a user name, a group name prefixed with "+", or
-# a regular expression (if it starts with a slash (/)).  If it is a regular
-# expression, the substring matching with \1 has no effect.
+# DATABASE-USERNAME can be "all", a user name, a group name prefixed with "+",
+# or a regular expression (if it starts with a slash (/)).  If it is a regular
+# expression, no substitution for \1 will occur.
 #
 # Multiple maps may be specified in this file and used by pg_hba.conf.
 #
@@ -69,4 +69,4 @@
 # Put your actual configuration here
 # ----------------------------------
 
-# MAPNAME       SYSTEM-USERNAME         PG-USERNAME
+# MAPNAME       SYSTEM-USERNAME         DATABASE-USERNAME
diff --git a/src/backend/main/main.c b/src/backend/main/main.c
index 7d63cf94a6b..bdcb5e4f261 100644
--- a/src/backend/main/main.c
+++ b/src/backend/main/main.c
@@ -125,13 +125,17 @@ main(int argc, char *argv[])
 	set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("postgres"));
 
 	/*
-	 * In the postmaster, absorb the environment values for LC_COLLATE and
-	 * LC_CTYPE.  Individual backends will change these later to settings
-	 * taken from pg_database, but the postmaster cannot do that.  If we leave
-	 * these set to "C" then message localization might not work well in the
-	 * postmaster.
+	 * Collation is handled by pg_locale.c, and the behavior is dependent on
+	 * the provider. strcoll(), etc., should not be called directly.
+	 */
+	init_locale("LC_COLLATE", LC_COLLATE, "C");
+
+	/*
+	 * In the postmaster, absorb the environment value for LC_CTYPE.
+	 * Individual backends will change it later to pg_database.datctype, but
+	 * the postmaster cannot do that.  If we leave it set to "C" then message
+	 * localization might not work well in the postmaster.
 	 */
-	init_locale("LC_COLLATE", LC_COLLATE, "");
 	init_locale("LC_CTYPE", LC_CTYPE, "");
 
 	/*
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 77659b0f760..9ecddb14231 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -1039,6 +1039,11 @@ _read${n}(void)
 			print $off "\tWRITE_UINT_FIELD($f);\n";
 			print $rff "\tREAD_UINT_FIELD($f);\n" unless $no_read;
 		}
+		elsif ($t eq 'int64')
+		{
+			print $off "\tWRITE_INT64_FIELD($f);\n";
+			print $rff "\tREAD_INT64_FIELD($f);\n" unless $no_read;
+		}
 		elsif ($t eq 'uint64'
 			|| $t eq 'AclMode')
 		{
@@ -1324,7 +1329,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
 			# Node type.  Squash constants if requested.
 			if ($query_jumble_squash)
 			{
-				print $jff "\tJUMBLE_ELEMENTS($f);\n"
+				print $jff "\tJUMBLE_ELEMENTS($f, node);\n"
 				  unless $query_jumble_ignore;
 			}
 			else
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index ceac3fd8620..eaf391fc2ab 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -51,6 +51,12 @@ static void outDouble(StringInfo str, double d);
 #define WRITE_UINT_FIELD(fldname) \
 	appendStringInfo(str, " :" CppAsString(fldname) " %u", node->fldname)
 
+/* Write a signed integer field (anything written with INT64_FORMAT) */
+#define WRITE_INT64_FIELD(fldname) \
+	appendStringInfo(str, \
+					 " :" CppAsString(fldname) " " INT64_FORMAT, \
+					 node->fldname)
+
 /* Write an unsigned integer field (anything written with UINT64_FORMAT) */
 #define WRITE_UINT64_FIELD(fldname) \
 	appendStringInfo(str, " :" CppAsString(fldname) " " UINT64_FORMAT, \
@@ -647,6 +653,8 @@ _outA_Expr(StringInfo str, const A_Expr *node)
 
 	WRITE_NODE_FIELD(lexpr);
 	WRITE_NODE_FIELD(rexpr);
+	WRITE_LOCATION_FIELD(rexpr_list_start);
+	WRITE_LOCATION_FIELD(rexpr_list_end);
 	WRITE_LOCATION_FIELD(location);
 }
 
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index d1e82a63f09..31f97151977 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -21,6 +21,11 @@
  * tree(s) generated from the query.  The executor can then use this value
  * to blame query costs on the proper queryId.
  *
+ * Arrays of two or more constants and PARAM_EXTERN parameters are "squashed"
+ * and contribute only once to the jumble.  This has the effect that queries
+ * that differ only on the length of such lists have the same queryId.
+ *
+ *
  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
@@ -56,16 +61,18 @@ int			compute_query_id = COMPUTE_QUERY_ID_AUTO;
 bool		query_id_enabled = false;
 
 static JumbleState *InitJumble(void);
-static uint64 DoJumble(JumbleState *jstate, Node *node);
+static int64 DoJumble(JumbleState *jstate, Node *node);
 static void AppendJumble(JumbleState *jstate,
 						 const unsigned char *value, Size size);
 static void FlushPendingNulls(JumbleState *jstate);
 static void RecordConstLocation(JumbleState *jstate,
-								int location, bool squashed);
+								bool extern_param,
+								int location, int len);
 static void _jumbleNode(JumbleState *jstate, Node *node);
-static void _jumbleElements(JumbleState *jstate, List *elements);
-static void _jumbleA_Const(JumbleState *jstate, Node *node);
 static void _jumbleList(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements, Node *node);
+static void _jumbleParam(JumbleState *jstate, Node *node);
+static void _jumbleA_Const(JumbleState *jstate, Node *node);
 static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
 static void _jumbleRangeTblEntry_eref(JumbleState *jstate,
 									  RangeTblEntry *rte,
@@ -141,12 +148,12 @@ JumbleQuery(Query *query)
 	 * If we are unlucky enough to get a hash of zero, use 1 instead for
 	 * normal statements and 2 for utility queries.
 	 */
-	if (query->queryId == UINT64CONST(0))
+	if (query->queryId == INT64CONST(0))
 	{
 		if (query->utilityStmt)
-			query->queryId = UINT64CONST(2);
+			query->queryId = INT64CONST(2);
 		else
-			query->queryId = UINT64CONST(1);
+			query->queryId = INT64CONST(1);
 	}
 
 	return jstate;
@@ -185,6 +192,7 @@ InitJumble(void)
 	jstate->clocations_count = 0;
 	jstate->highest_extern_param_id = 0;
 	jstate->pending_nulls = 0;
+	jstate->has_squashed_lists = false;
 #ifdef USE_ASSERT_CHECKING
 	jstate->total_jumble_len = 0;
 #endif
@@ -197,7 +205,7 @@ InitJumble(void)
  *		Jumble the given Node using the given JumbleState and return the resulting
  *		jumble hash.
  */
-static uint64
+static int64
 DoJumble(JumbleState *jstate, Node *node)
 {
 	/* Jumble the given node */
@@ -207,10 +215,14 @@ DoJumble(JumbleState *jstate, Node *node)
 	if (jstate->pending_nulls > 0)
 		FlushPendingNulls(jstate);
 
+	/* Squashed list found, reset highest_extern_param_id */
+	if (jstate->has_squashed_lists)
+		jstate->highest_extern_param_id = 0;
+
 	/* Process the jumble buffer and produce the hash value */
-	return DatumGetUInt64(hash_any_extended(jstate->jumble,
-											jstate->jumble_len,
-											0));
+	return DatumGetInt64(hash_any_extended(jstate->jumble,
+										   jstate->jumble_len,
+										   0));
 }
 
 /*
@@ -256,10 +268,10 @@ AppendJumbleInternal(JumbleState *jstate, const unsigned char *item,
 
 		if (unlikely(jumble_len >= JUMBLE_SIZE))
 		{
-			uint64		start_hash;
+			int64		start_hash;
 
-			start_hash = DatumGetUInt64(hash_any_extended(jumble,
-														  JUMBLE_SIZE, 0));
+			start_hash = DatumGetInt64(hash_any_extended(jumble,
+														 JUMBLE_SIZE, 0));
 			memcpy(jumble, &start_hash, sizeof(start_hash));
 			jumble_len = sizeof(start_hash);
 		}
@@ -373,15 +385,17 @@ FlushPendingNulls(JumbleState *jstate)
 
 
 /*
- * Record location of constant within query string of query tree that is
- * currently being walked.
+ * Record the location of some kind of constant within a query string.
+ * These are not only bare constants but also expressions that ultimately
+ * constitute a constant, such as those inside casts and simple function
+ * calls; if extern_param, then it corresponds to a PARAM_EXTERN Param.
  *
- * 'squashed' signals that the constant represents the first or the last
- * element in a series of merged constants, and everything but the first/last
- * element contributes nothing to the jumble hash.
+ * If length is -1, it indicates a single such constant element.  If
+ * it's a positive integer, it indicates the length of a squashable
+ * list of them.
  */
 static void
-RecordConstLocation(JumbleState *jstate, int location, bool squashed)
+RecordConstLocation(JumbleState *jstate, bool extern_param, int location, int len)
 {
 	/* -1 indicates unknown or undefined location */
 	if (location >= 0)
@@ -396,9 +410,15 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed)
 						 sizeof(LocationLen));
 		}
 		jstate->clocations[jstate->clocations_count].location = location;
-		/* initialize lengths to -1 to simplify third-party module usage */
-		jstate->clocations[jstate->clocations_count].squashed = squashed;
-		jstate->clocations[jstate->clocations_count].length = -1;
+
+		/*
+		 * Lengths are either positive integers (indicating a squashable
+		 * list), or -1.
+		 */
+		Assert(len > -1 || len == -1);
+		jstate->clocations[jstate->clocations_count].length = len;
+		jstate->clocations[jstate->clocations_count].squashed = (len > -1);
+		jstate->clocations[jstate->clocations_count].extern_param = extern_param;
 		jstate->clocations_count++;
 	}
 }
@@ -407,47 +427,74 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed)
  * Subroutine for _jumbleElements: Verify a few simple cases where we can
  * deduce that the expression is a constant:
  *
- * - Ignore a possible wrapping RelabelType and CoerceViaIO.
- * - If it's a FuncExpr, check that the function is an implicit
+ * - See through any wrapping RelabelType and CoerceViaIO layers.
+ * - If it's a FuncExpr, check that the function is a builtin
  *   cast and its arguments are Const.
- * - Otherwise test if the expression is a simple Const.
+ * - Otherwise test if the expression is a simple Const or a
+ *   PARAM_EXTERN param.
  */
 static bool
-IsSquashableConst(Node *element)
+IsSquashableConstant(Node *element)
 {
-	if (IsA(element, RelabelType))
-		element = (Node *) ((RelabelType *) element)->arg;
-
-	if (IsA(element, CoerceViaIO))
-		element = (Node *) ((CoerceViaIO *) element)->arg;
-
-	if (IsA(element, FuncExpr))
+restart:
+	switch (nodeTag(element))
 	{
-		FuncExpr   *func = (FuncExpr *) element;
-		ListCell   *temp;
+		case T_RelabelType:
+			/* Unwrap RelabelType */
+			element = (Node *) ((RelabelType *) element)->arg;
+			goto restart;
 
-		if (func->funcformat != COERCE_IMPLICIT_CAST &&
-			func->funcformat != COERCE_EXPLICIT_CAST)
-			return false;
+		case T_CoerceViaIO:
+			/* Unwrap CoerceViaIO */
+			element = (Node *) ((CoerceViaIO *) element)->arg;
+			goto restart;
 
-		if (func->funcid > FirstGenbkiObjectId)
-			return false;
+		case T_Const:
+			return true;
 
-		foreach(temp, func->args)
-		{
-			Node	   *arg = lfirst(temp);
+		case T_Param:
+			return castNode(Param, element)->paramkind == PARAM_EXTERN;
 
-			if (!IsA(arg, Const))	/* XXX we could recurse here instead */
-				return false;
-		}
+		case T_FuncExpr:
+			{
+				FuncExpr   *func = (FuncExpr *) element;
+				ListCell   *temp;
 
-		return true;
-	}
+				if (func->funcformat != COERCE_IMPLICIT_CAST &&
+					func->funcformat != COERCE_EXPLICIT_CAST)
+					return false;
 
-	if (!IsA(element, Const))
-		return false;
+				if (func->funcid > FirstGenbkiObjectId)
+					return false;
 
-	return true;
+				/*
+				 * We can check function arguments recursively, being careful
+				 * about recursing too deep.  At each recursion level it's
+				 * enough to test the stack on the first element.  (Note that
+				 * I wasn't able to hit this without bloating the stack
+				 * artificially in this function: the parser errors out before
+				 * stack size becomes a problem here.)
+				 */
+				foreach(temp, func->args)
+				{
+					Node	   *arg = lfirst(temp);
+
+					if (!IsA(arg, Const))
+					{
+						if (foreach_current_index(temp) == 0 &&
+							stack_is_too_deep())
+							return false;
+						else if (!IsSquashableConstant(arg))
+							return false;
+					}
+				}
+
+				return true;
+			}
+
+		default:
+			return false;
+	}
 }
 
 /*
@@ -457,39 +504,33 @@ IsSquashableConst(Node *element)
  * Return value indicates if squashing is possible.
  *
  * Note that this function searches only for explicit Const nodes with
- * possibly very simple decorations on top, and does not try to simplify
- * expressions.
+ * possibly very simple decorations on top and PARAM_EXTERN parameters,
+ * and does not try to simplify expressions.
  */
 static bool
-IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr)
+IsSquashableConstantList(List *elements)
 {
 	ListCell   *temp;
 
-	/*
-	 * If squashing is disabled, or the list is too short, we don't try to
-	 * squash it.
-	 */
+	/* If the list is too short, we don't try to squash it. */
 	if (list_length(elements) < 2)
 		return false;
 
 	foreach(temp, elements)
 	{
-		if (!IsSquashableConst(lfirst(temp)))
+		if (!IsSquashableConstant(lfirst(temp)))
 			return false;
 	}
 
-	*firstExpr = linitial(elements);
-	*lastExpr = llast(elements);
-
 	return true;
 }
 
 #define JUMBLE_NODE(item) \
 	_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_ELEMENTS(list) \
-	_jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_ELEMENTS(list, node) \
+	_jumbleElements(jstate, (List *) expr->list, node)
 #define JUMBLE_LOCATION(location) \
-	RecordConstLocation(jstate, expr->location, false)
+	RecordConstLocation(jstate, false, expr->location, -1)
 #define JUMBLE_FIELD(item) \
 do { \
 	if (sizeof(expr->item) == 8) \
@@ -516,42 +557,6 @@ do { \
 
 #include "queryjumblefuncs.funcs.c"
 
-/*
- * We jumble lists of constant elements as one individual item regardless
- * of how many elements are in the list.  This means different queries
- * jumble to the same query_id, if the only difference is the number of
- * elements in the list.
- */
-static void
-_jumbleElements(JumbleState *jstate, List *elements)
-{
-	Node	   *first,
-			   *last;
-
-	if (IsSquashableConstList(elements, &first, &last))
-	{
-		/*
-		 * If this list of elements is squashable, keep track of the location
-		 * of its first and last elements.  When reading back the locations
-		 * array, we'll see two consecutive locations with ->squashed set to
-		 * true, indicating the location of initial and final elements of this
-		 * list.
-		 *
-		 * For the limited set of cases we support now (implicit coerce via
-		 * FuncExpr, Const) it's fine to use exprLocation of the 'last'
-		 * expression, but if more complex composite expressions are to be
-		 * supported (e.g., OpExpr or FuncExpr as an explicit call), more
-		 * sophisticated tracking will be needed.
-		 */
-		RecordConstLocation(jstate, exprLocation(first), true);
-		RecordConstLocation(jstate, exprLocation(last), true);
-	}
-	else
-	{
-		_jumbleNode(jstate, (Node *) elements);
-	}
-}
-
 static void
 _jumbleNode(JumbleState *jstate, Node *node)
 {
@@ -593,26 +598,6 @@ _jumbleNode(JumbleState *jstate, Node *node)
 			break;
 	}
 
-	/* Special cases to handle outside the automated code */
-	switch (nodeTag(expr))
-	{
-		case T_Param:
-			{
-				Param	   *p = (Param *) node;
-
-				/*
-				 * Update the highest Param id seen, in order to start
-				 * normalization correctly.
-				 */
-				if (p->paramkind == PARAM_EXTERN &&
-					p->paramid > jstate->highest_extern_param_id)
-					jstate->highest_extern_param_id = p->paramid;
-			}
-			break;
-		default:
-			break;
-	}
-
 	/* Ensure we added something to the jumble buffer */
 	Assert(jstate->total_jumble_len > prev_jumble_len);
 }
@@ -648,6 +633,79 @@ _jumbleList(JumbleState *jstate, Node *node)
 	}
 }
 
+/*
+ * We try to jumble lists of expressions as one individual item regardless
+ * of how many elements are in the list. This is know as squashing, which
+ * results in different queries jumbling to the same query_id, if the only
+ * difference is the number of elements in the list.
+ *
+ * We allow constants and PARAM_EXTERN parameters to be squashed. To normalize
+ * such queries, we use the start and end locations of the list of elements in
+ * a list.
+ */
+static void
+_jumbleElements(JumbleState *jstate, List *elements, Node *node)
+{
+	bool		normalize_list = false;
+
+	if (IsSquashableConstantList(elements))
+	{
+		if (IsA(node, ArrayExpr))
+		{
+			ArrayExpr  *aexpr = (ArrayExpr *) node;
+
+			if (aexpr->list_start > 0 && aexpr->list_end > 0)
+			{
+				RecordConstLocation(jstate,
+									false,
+									aexpr->list_start + 1,
+									(aexpr->list_end - aexpr->list_start) - 1);
+				normalize_list = true;
+				jstate->has_squashed_lists = true;
+			}
+		}
+	}
+
+	if (!normalize_list)
+	{
+		_jumbleNode(jstate, (Node *) elements);
+	}
+}
+
+/*
+ * We store the highest param ID of extern params.  This can later be used
+ * to start the numbering of the placeholder for squashed lists.
+ */
+static void
+_jumbleParam(JumbleState *jstate, Node *node)
+{
+	Param	   *expr = (Param *) node;
+
+	JUMBLE_FIELD(paramkind);
+	JUMBLE_FIELD(paramid);
+	JUMBLE_FIELD(paramtype);
+	/* paramtypmode and paramcollid are ignored */
+
+	if (expr->paramkind == PARAM_EXTERN)
+	{
+		/*
+		 * At this point, only external parameter locations outside of
+		 * squashable lists will be recorded.
+		 */
+		RecordConstLocation(jstate, true, expr->location, -1);
+
+		/*
+		 * Update the highest Param id seen, in order to start normalization
+		 * correctly.
+		 *
+		 * Note: This value is reset at the end of jumbling if there exists a
+		 * squashable list. See the comment in the definition of JumbleState.
+		 */
+		if (expr->paramid > jstate->highest_extern_param_id)
+			jstate->highest_extern_param_id = expr->paramid;
+	}
+}
+
 static void
 _jumbleA_Const(JumbleState *jstate, Node *node)
 {
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 64d3a09f765..48b5d13b9b6 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -68,6 +68,12 @@
 	token = pg_strtok(&length);		/* get field value */ \
 	local_node->fldname = atoui(token)
 
+/* Read a signed integer field (anything written using INT64_FORMAT) */
+#define READ_INT64_FIELD(fldname) \
+	token = pg_strtok(&length); /* skip :fldname */ \
+	token = pg_strtok(&length); /* get field value */ \
+	local_node->fldname = strtoi64(token, NULL, 10)
+
 /* Read an unsigned integer field (anything written using UINT64_FORMAT) */
 #define READ_UINT64_FIELD(fldname) \
 	token = pg_strtok(&length);		/* skip :fldname */ \
@@ -520,6 +526,8 @@ _readA_Expr(void)
 
 	READ_NODE_FIELD(lexpr);
 	READ_NODE_FIELD(rexpr);
+	READ_LOCATION_FIELD(rexpr_list_start);
+	READ_LOCATION_FIELD(rexpr_list_end);
 	READ_LOCATION_FIELD(location);
 
 	READ_DONE();
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 3d44815ed5a..1f04a2c182c 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2247,7 +2247,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
  *	  Determines and returns the cost of an Append node.
  */
 void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
 {
 	ListCell   *l;
 
@@ -2309,26 +2309,52 @@ cost_append(AppendPath *apath)
 			foreach(l, apath->subpaths)
 			{
 				Path	   *subpath = (Path *) lfirst(l);
-				Path		sort_path;	/* dummy for result of cost_sort */
+				int			presorted_keys;
+				Path		sort_path;	/* dummy for result of
+										 * cost_sort/cost_incremental_sort */
 
-				if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+				if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+												 &presorted_keys))
 				{
 					/*
 					 * We'll need to insert a Sort node, so include costs for
-					 * that.  We can use the parent's LIMIT if any, since we
+					 * that.  We choose to use incremental sort if it is
+					 * enabled and there are presorted keys; otherwise we use
+					 * full sort.
+					 *
+					 * We can use the parent's LIMIT if any, since we
 					 * certainly won't pull more than that many tuples from
 					 * any child.
 					 */
-					cost_sort(&sort_path,
-							  NULL, /* doesn't currently need root */
-							  pathkeys,
-							  subpath->disabled_nodes,
-							  subpath->total_cost,
-							  subpath->rows,
-							  subpath->pathtarget->width,
-							  0.0,
-							  work_mem,
-							  apath->limit_tuples);
+					if (enable_incremental_sort && presorted_keys > 0)
+					{
+						cost_incremental_sort(&sort_path,
+											  root,
+											  pathkeys,
+											  presorted_keys,
+											  subpath->disabled_nodes,
+											  subpath->startup_cost,
+											  subpath->total_cost,
+											  subpath->rows,
+											  subpath->pathtarget->width,
+											  0.0,
+											  work_mem,
+											  apath->limit_tuples);
+					}
+					else
+					{
+						cost_sort(&sort_path,
+								  root,
+								  pathkeys,
+								  subpath->disabled_nodes,
+								  subpath->total_cost,
+								  subpath->rows,
+								  subpath->pathtarget->width,
+								  0.0,
+								  work_mem,
+								  apath->limit_tuples);
+					}
+
 					subpath = &sort_path;
 				}
 
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 26f0336f1e4..ebedc5574ca 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -154,13 +154,17 @@ add_paths_to_joinrel(PlannerInfo *root,
 	/*
 	 * See if the inner relation is provably unique for this outer rel.
 	 *
-	 * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
-	 * matter since the executor can make the equivalent optimization anyway;
-	 * we need not expend planner cycles on proofs.  For JOIN_UNIQUE_INNER, we
-	 * must be considering a semijoin whose inner side is not provably unique
-	 * (else reduce_unique_semijoins would've simplified it), so there's no
-	 * point in calling innerrel_is_unique.  However, if the LHS covers all of
-	 * the semijoin's min_lefthand, then it's appropriate to set inner_unique
+	 * We have some special cases: for JOIN_SEMI, it doesn't matter since the
+	 * executor can make the equivalent optimization anyway.  It also doesn't
+	 * help enable use of Memoize, since a semijoin with a provably unique
+	 * inner side should have been reduced to an inner join in that case.
+	 * Therefore, we need not expend planner cycles on proofs.  (For
+	 * JOIN_ANTI, although it doesn't help the executor for the same reason,
+	 * it can benefit Memoize paths.)  For JOIN_UNIQUE_INNER, we must be
+	 * considering a semijoin whose inner side is not provably unique (else
+	 * reduce_unique_semijoins would've simplified it), so there's no point in
+	 * calling innerrel_is_unique.  However, if the LHS covers all of the
+	 * semijoin's min_lefthand, then it's appropriate to set inner_unique
 	 * because the path produced by create_unique_path will be unique relative
 	 * to the LHS.  (If we have an LHS that's only part of the min_lefthand,
 	 * that is *not* true.)  For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
@@ -169,12 +173,6 @@ add_paths_to_joinrel(PlannerInfo *root,
 	switch (jointype)
 	{
 		case JOIN_SEMI:
-		case JOIN_ANTI:
-
-			/*
-			 * XXX it may be worth proving this to allow a Memoize to be
-			 * considered for Nested Loop Semi/Anti Joins.
-			 */
 			extra.inner_unique = false; /* well, unproven */
 			break;
 		case JOIN_UNIQUE_INNER:
@@ -715,16 +713,21 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
 		return NULL;
 
 	/*
-	 * Currently we don't do this for SEMI and ANTI joins unless they're
-	 * marked as inner_unique.  This is because nested loop SEMI/ANTI joins
-	 * don't scan the inner node to completion, which will mean memoize cannot
-	 * mark the cache entry as complete.
-	 *
-	 * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique
-	 * = true.  Should we?  See add_paths_to_joinrel()
+	 * Currently we don't do this for SEMI and ANTI joins, because nested loop
+	 * SEMI/ANTI joins don't scan the inner node to completion, which means
+	 * memoize cannot mark the cache entry as complete.  Nor can we mark the
+	 * cache entry as complete after fetching the first inner tuple, because
+	 * if that tuple and the current outer tuple don't satisfy the join
+	 * clauses, a second inner tuple that satisfies the parameters would find
+	 * the cache entry already marked as complete.  The only exception is when
+	 * the inner relation is provably unique, as in that case, there won't be
+	 * a second matching tuple and we can safely mark the cache entry as
+	 * complete after fetching the first inner tuple.  Note that in such
+	 * cases, the SEMI join should have been reduced to an inner join by
+	 * reduce_unique_semijoins.
 	 */
-	if (!extra->inner_unique && (jointype == JOIN_SEMI ||
-								 jointype == JOIN_ANTI))
+	if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) &&
+		!extra->inner_unique)
 		return NULL;
 
 	/*
@@ -876,16 +879,13 @@ try_nestloop_path(PlannerInfo *root,
 	/*
 	 * Check to see if proposed path is still parameterized, and reject if the
 	 * parameterization wouldn't be sensible --- unless allow_star_schema_join
-	 * says to allow it anyway.  Also, we must reject if have_dangerous_phv
-	 * doesn't like the look of it, which could only happen if the nestloop is
-	 * still parameterized.
+	 * says to allow it anyway.
 	 */
 	required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
 												  innerrelids, inner_paramrels);
 	if (required_outer &&
-		((!bms_overlap(required_outer, extra->param_source_rels) &&
-		  !allow_star_schema_join(root, outerrelids, inner_paramrels)) ||
-		 have_dangerous_phv(root, outerrelids, inner_paramrels)))
+		!bms_overlap(required_outer, extra->param_source_rels) &&
+		!allow_star_schema_join(root, outerrelids, inner_paramrels))
 	{
 		/* Waste no memory when we reject a path here */
 		bms_free(required_outer);
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 60d65762b5d..aad41b94009 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -565,9 +565,6 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 		 * Also, if the lateral reference is only indirect, we should reject
 		 * the join; whatever rel(s) the reference chain goes through must be
 		 * joined to first.
-		 *
-		 * Another case that might keep us from building a valid plan is the
-		 * implementation restriction described by have_dangerous_phv().
 		 */
 		lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids);
 		lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids);
@@ -584,9 +581,6 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 			/* check there is a direct reference from rel2 to rel1 */
 			if (!bms_overlap(rel1->relids, rel2->direct_lateral_relids))
 				return false;	/* only indirect refs, so reject */
-			/* check we won't have a dangerous PHV */
-			if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids))
-				return false;	/* might be unable to handle required PHV */
 		}
 		else if (lateral_rev)
 		{
@@ -599,9 +593,6 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
 			/* check there is a direct reference from rel1 to rel2 */
 			if (!bms_overlap(rel2->relids, rel1->direct_lateral_relids))
 				return false;	/* only indirect refs, so reject */
-			/* check we won't have a dangerous PHV */
-			if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids))
-				return false;	/* might be unable to handle required PHV */
 		}
 
 		/*
@@ -1279,57 +1270,6 @@ has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel)
 
 
 /*
- * There's a pitfall for creating parameterized nestloops: suppose the inner
- * rel (call it A) has a parameter that is a PlaceHolderVar, and that PHV's
- * minimum eval_at set includes the outer rel (B) and some third rel (C).
- * We might think we could create a B/A nestloop join that's parameterized by
- * C.  But we would end up with a plan in which the PHV's expression has to be
- * evaluated as a nestloop parameter at the B/A join; and the executor is only
- * set up to handle simple Vars as NestLoopParams.  Rather than add complexity
- * and overhead to the executor for such corner cases, it seems better to
- * forbid the join.  (Note that we can still make use of A's parameterized
- * path with pre-joined B+C as the outer rel.  have_join_order_restriction()
- * ensures that we will consider making such a join even if there are not
- * other reasons to do so.)
- *
- * So we check whether any PHVs used in the query could pose such a hazard.
- * We don't have any simple way of checking whether a risky PHV would actually
- * be used in the inner plan, and the case is so unusual that it doesn't seem
- * worth working very hard on it.
- *
- * This needs to be checked in two places.  If the inner rel's minimum
- * parameterization would trigger the restriction, then join_is_legal() should
- * reject the join altogether, because there will be no workable paths for it.
- * But joinpath.c has to check again for every proposed nestloop path, because
- * the inner path might have more than the minimum parameterization, causing
- * some PHV to be dangerous for it that otherwise wouldn't be.
- */
-bool
-have_dangerous_phv(PlannerInfo *root,
-				   Relids outer_relids, Relids inner_params)
-{
-	ListCell   *lc;
-
-	foreach(lc, root->placeholder_list)
-	{
-		PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
-
-		if (!bms_is_subset(phinfo->ph_eval_at, inner_params))
-			continue;			/* ignore, could not be a nestloop param */
-		if (!bms_overlap(phinfo->ph_eval_at, outer_relids))
-			continue;			/* ignore, not relevant to this join */
-		if (bms_is_subset(phinfo->ph_eval_at, outer_relids))
-			continue;			/* safe, it can be eval'd within outerrel */
-		/* Otherwise, it's potentially unsafe, so reject the join */
-		return true;
-	}
-
-	/* OK to perform the join */
-	return false;
-}
-
-
-/*
  * is_dummy_rel --- has relation been proven empty?
  */
 bool
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 4ad30b7627e..8a9f1d7a943 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1318,6 +1318,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
 			Oid		   *sortOperators;
 			Oid		   *collations;
 			bool	   *nullsFirst;
+			int			presorted_keys;
 
 			/*
 			 * Compute sort column info, and adjust subplan's tlist as needed.
@@ -1353,14 +1354,38 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags)
 						  numsortkeys * sizeof(bool)) == 0);
 
 			/* Now, insert a Sort node if subplan isn't sufficiently ordered */
-			if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+			if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+											 &presorted_keys))
 			{
-				Sort	   *sort = make_sort(subplan, numsortkeys,
+				Plan	   *sort_plan;
+
+				/*
+				 * We choose to use incremental sort if it is enabled and
+				 * there are presorted keys; otherwise we use full sort.
+				 */
+				if (enable_incremental_sort && presorted_keys > 0)
+				{
+					sort_plan = (Plan *)
+						make_incrementalsort(subplan, numsortkeys, presorted_keys,
 											 sortColIdx, sortOperators,
 											 collations, nullsFirst);
 
-				label_sort_with_costsize(root, sort, best_path->limit_tuples);
-				subplan = (Plan *) sort;
+					label_incrementalsort_with_costsize(root,
+														(IncrementalSort *) sort_plan,
+														pathkeys,
+														best_path->limit_tuples);
+				}
+				else
+				{
+					sort_plan = (Plan *) make_sort(subplan, numsortkeys,
+												   sortColIdx, sortOperators,
+												   collations, nullsFirst);
+
+					label_sort_with_costsize(root, (Sort *) sort_plan,
+											 best_path->limit_tuples);
+				}
+
+				subplan = sort_plan;
 			}
 		}
 
@@ -1491,6 +1516,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
 		Oid		   *sortOperators;
 		Oid		   *collations;
 		bool	   *nullsFirst;
+		int			presorted_keys;
 
 		/* Build the child plan */
 		/* Must insist that all children return the same tlist */
@@ -1525,14 +1551,38 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path,
 					  numsortkeys * sizeof(bool)) == 0);
 
 		/* Now, insert a Sort node if subplan isn't sufficiently ordered */
-		if (!pathkeys_contained_in(pathkeys, subpath->pathkeys))
+		if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+										 &presorted_keys))
 		{
-			Sort	   *sort = make_sort(subplan, numsortkeys,
+			Plan	   *sort_plan;
+
+			/*
+			 * We choose to use incremental sort if it is enabled and there
+			 * are presorted keys; otherwise we use full sort.
+			 */
+			if (enable_incremental_sort && presorted_keys > 0)
+			{
+				sort_plan = (Plan *)
+					make_incrementalsort(subplan, numsortkeys, presorted_keys,
 										 sortColIdx, sortOperators,
 										 collations, nullsFirst);
 
-			label_sort_with_costsize(root, sort, best_path->limit_tuples);
-			subplan = (Plan *) sort;
+				label_incrementalsort_with_costsize(root,
+													(IncrementalSort *) sort_plan,
+													pathkeys,
+													best_path->limit_tuples);
+			}
+			else
+			{
+				sort_plan = (Plan *) make_sort(subplan, numsortkeys,
+											   sortColIdx, sortOperators,
+											   collations, nullsFirst);
+
+				label_sort_with_costsize(root, (Sort *) sort_plan,
+										 best_path->limit_tuples);
+			}
+
+			subplan = sort_plan;
 		}
 
 		subplans = lappend(subplans, subplan);
@@ -4344,13 +4394,16 @@ create_nestloop_plan(PlannerInfo *root,
 	NestLoop   *join_plan;
 	Plan	   *outer_plan;
 	Plan	   *inner_plan;
+	Relids		outerrelids;
 	List	   *tlist = build_path_tlist(root, &best_path->jpath.path);
 	List	   *joinrestrictclauses = best_path->jpath.joinrestrictinfo;
 	List	   *joinclauses;
 	List	   *otherclauses;
-	Relids		outerrelids;
 	List	   *nestParams;
+	List	   *outer_tlist;
+	bool		outer_parallel_safe;
 	Relids		saveOuterRels = root->curOuterRels;
+	ListCell   *lc;
 
 	/*
 	 * If the inner path is parameterized by the topmost parent of the outer
@@ -4372,8 +4425,8 @@ create_nestloop_plan(PlannerInfo *root,
 	outer_plan = create_plan_recurse(root, best_path->jpath.outerjoinpath, 0);
 
 	/* For a nestloop, include outer relids in curOuterRels for inner side */
-	root->curOuterRels = bms_union(root->curOuterRels,
-								   best_path->jpath.outerjoinpath->parent->relids);
+	outerrelids = best_path->jpath.outerjoinpath->parent->relids;
+	root->curOuterRels = bms_union(root->curOuterRels, outerrelids);
 
 	inner_plan = create_plan_recurse(root, best_path->jpath.innerjoinpath, 0);
 
@@ -4412,9 +4465,66 @@ create_nestloop_plan(PlannerInfo *root,
 	 * Identify any nestloop parameters that should be supplied by this join
 	 * node, and remove them from root->curOuterParams.
 	 */
-	outerrelids = best_path->jpath.outerjoinpath->parent->relids;
-	nestParams = identify_current_nestloop_params(root, outerrelids);
+	nestParams = identify_current_nestloop_params(root,
+												  outerrelids,
+												  PATH_REQ_OUTER((Path *) best_path));
+
+	/*
+	 * While nestloop parameters that are Vars had better be available from
+	 * the outer_plan already, there are edge cases where nestloop parameters
+	 * that are PHVs won't be.  In such cases we must add them to the
+	 * outer_plan's tlist, since the executor's NestLoopParam machinery
+	 * requires the params to be simple outer-Var references to that tlist.
+	 * (This is cheating a little bit, because the outer path's required-outer
+	 * relids might not be enough to allow evaluating such a PHV.  But in
+	 * practice, if we could have evaluated the PHV at the nestloop node, we
+	 * can do so in the outer plan too.)
+	 */
+	outer_tlist = outer_plan->targetlist;
+	outer_parallel_safe = outer_plan->parallel_safe;
+	foreach(lc, nestParams)
+	{
+		NestLoopParam *nlp = (NestLoopParam *) lfirst(lc);
+		PlaceHolderVar *phv;
+		TargetEntry *tle;
+
+		if (IsA(nlp->paramval, Var))
+			continue;			/* nothing to do for simple Vars */
+		/* Otherwise it must be a PHV */
+		phv = castNode(PlaceHolderVar, nlp->paramval);
+
+		if (tlist_member((Expr *) phv, outer_tlist))
+			continue;			/* already available */
+
+		/*
+		 * It's possible that nestloop parameter PHVs selected to evaluate
+		 * here contain references to surviving root->curOuterParams items
+		 * (that is, they reference values that will be supplied by some
+		 * higher-level nestloop).  Those need to be converted to Params now.
+		 * Note: it's safe to do this after the tlist_member() check, because
+		 * equal() won't pay attention to phv->phexpr.
+		 */
+		phv->phexpr = (Expr *) replace_nestloop_params(root,
+													   (Node *) phv->phexpr);
+
+		/* Make a shallow copy of outer_tlist, if we didn't already */
+		if (outer_tlist == outer_plan->targetlist)
+			outer_tlist = list_copy(outer_tlist);
+		/* ... and add the needed expression */
+		tle = makeTargetEntry((Expr *) copyObject(phv),
+							  list_length(outer_tlist) + 1,
+							  NULL,
+							  true);
+		outer_tlist = lappend(outer_tlist, tle);
+		/* ... and track whether tlist is (still) parallel-safe */
+		if (outer_parallel_safe)
+			outer_parallel_safe = is_parallel_safe(root, (Node *) phv);
+	}
+	if (outer_tlist != outer_plan->targetlist)
+		outer_plan = change_plan_targetlist(outer_plan, outer_tlist,
+											outer_parallel_safe);
 
+	/* And finally, we can build the join plan node */
 	join_plan = make_nestloop(tlist,
 							  joinclauses,
 							  otherclauses,
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index ff65867eebe..549aedcfa99 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -6879,7 +6879,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
  *
  * tableOid is the table on which the index is to be built.  indexOid is the
  * OID of an index to be created or reindexed (which must be an index with
- * support for parallel builds - currently btree or BRIN).
+ * support for parallel builds - currently btree, GIN, or BRIN).
  *
  * Return value is the number of parallel worker processes to request.  It
  * may be unsafe to proceed if this is 0.  Note that this does not include the
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 26a3e050086..f45131c34c5 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -3333,6 +3333,13 @@ eval_const_expressions_mutator(Node *node,
 												  -1,
 												  coalesceexpr->coalescecollid);
 
+				/*
+				 * If there's exactly one surviving argument, we no longer
+				 * need COALESCE at all: the result is that argument
+				 */
+				if (list_length(newargs) == 1)
+					return (Node *) linitial(newargs);
+
 				newcoalesce = makeNode(CoalesceExpr);
 				newcoalesce->coalescetype = coalesceexpr->coalescetype;
 				newcoalesce->coalescecollid = coalesceexpr->coalescecollid;
diff --git a/src/backend/optimizer/util/paramassign.c b/src/backend/optimizer/util/paramassign.c
index 3bd3ce37c8f..4c13c5931b4 100644
--- a/src/backend/optimizer/util/paramassign.c
+++ b/src/backend/optimizer/util/paramassign.c
@@ -599,38 +599,46 @@ process_subquery_nestloop_params(PlannerInfo *root, List *subplan_params)
 }
 
 /*
- * Identify any NestLoopParams that should be supplied by a NestLoop plan
- * node with the specified lefthand rels.  Remove them from the active
- * root->curOuterParams list and return them as the result list.
+ * Identify any NestLoopParams that should be supplied by a NestLoop
+ * plan node with the specified lefthand rels and required-outer rels.
+ * Remove them from the active root->curOuterParams list and return
+ * them as the result list.
  *
- * XXX Here we also hack up the returned Vars and PHVs so that they do not
- * contain nullingrel sets exceeding what is available from the outer side.
- * This is needed if we have applied outer join identity 3,
- *		(A leftjoin B on (Pab)) leftjoin C on (Pb*c)
- *		= A leftjoin (B leftjoin C on (Pbc)) on (Pab)
- * and C contains lateral references to B.  It's still safe to apply the
- * identity, but the parser will have created those references in the form
- * "b*" (i.e., with varnullingrels listing the A/B join), while what we will
- * have available from the nestloop's outer side is just "b".  We deal with
- * that here by stripping the nullingrels down to what is available from the
- * outer side according to leftrelids.
- *
- * That fixes matters for the case of forward application of identity 3.
- * If the identity was applied in the reverse direction, we will have
- * parameter Vars containing too few nullingrel bits rather than too many.
- * Currently, that causes no problems because setrefs.c applies only a
- * subset check to nullingrels in NestLoopParams, but we'd have to work
- * harder if we ever want to tighten that check.  This is all pretty annoying
- * because it greatly weakens setrefs.c's cross-check, but the alternative
+ * Vars and PHVs appearing in the result list must have nullingrel sets
+ * that could validly appear in the lefthand rel's output.  Ordinarily that
+ * would be true already, but if we have applied outer join identity 3,
+ * there could be more or fewer nullingrel bits in the nodes appearing in
+ * curOuterParams than are in the nominal leftrelids.  We deal with that by
+ * forcing their nullingrel sets to include exactly the outer-join relids
+ * that appear in leftrelids and can null the respective Var or PHV.
+ * This fix is a bit ad-hoc and intellectually unsatisfactory, because it's
+ * essentially jumping to the conclusion that we've placed evaluation of
+ * the nestloop parameters correctly, and thus it defeats the intent of the
+ * subsequent nullingrel cross-checks in setrefs.c.  But the alternative
  * seems to be to generate multiple versions of each laterally-parameterized
  * subquery, which'd be unduly expensive.
  */
 List *
-identify_current_nestloop_params(PlannerInfo *root, Relids leftrelids)
+identify_current_nestloop_params(PlannerInfo *root,
+								 Relids leftrelids,
+								 Relids outerrelids)
 {
 	List	   *result;
+	Relids		allleftrelids;
 	ListCell   *cell;
 
+	/*
+	 * We'll be able to evaluate a PHV in the lefthand path if it uses the
+	 * lefthand rels plus any available required-outer rels.  But don't do so
+	 * if it uses *only* required-outer rels; in that case it should be
+	 * evaluated higher in the tree.  For Vars, no such hair-splitting is
+	 * necessary since they depend on only one relid.
+	 */
+	if (outerrelids)
+		allleftrelids = bms_union(leftrelids, outerrelids);
+	else
+		allleftrelids = leftrelids;
+
 	result = NIL;
 	foreach(cell, root->curOuterParams)
 	{
@@ -646,25 +654,60 @@ identify_current_nestloop_params(PlannerInfo *root, Relids leftrelids)
 			bms_is_member(nlp->paramval->varno, leftrelids))
 		{
 			Var		   *var = (Var *) nlp->paramval;
+			RelOptInfo *rel = root->simple_rel_array[var->varno];
 
 			root->curOuterParams = foreach_delete_current(root->curOuterParams,
 														  cell);
-			var->varnullingrels = bms_intersect(var->varnullingrels,
+			var->varnullingrels = bms_intersect(rel->nulling_relids,
 												leftrelids);
 			result = lappend(result, nlp);
 		}
-		else if (IsA(nlp->paramval, PlaceHolderVar) &&
-				 bms_is_subset(find_placeholder_info(root,
-													 (PlaceHolderVar *) nlp->paramval)->ph_eval_at,
-							   leftrelids))
+		else if (IsA(nlp->paramval, PlaceHolderVar))
 		{
 			PlaceHolderVar *phv = (PlaceHolderVar *) nlp->paramval;
+			PlaceHolderInfo *phinfo = find_placeholder_info(root, phv);
+			Relids		eval_at = phinfo->ph_eval_at;
 
-			root->curOuterParams = foreach_delete_current(root->curOuterParams,
-														  cell);
-			phv->phnullingrels = bms_intersect(phv->phnullingrels,
-											   leftrelids);
-			result = lappend(result, nlp);
+			if (bms_is_subset(eval_at, allleftrelids) &&
+				bms_overlap(eval_at, leftrelids))
+			{
+				root->curOuterParams = foreach_delete_current(root->curOuterParams,
+															  cell);
+
+				/*
+				 * Deal with an edge case: if the PHV was pulled up out of a
+				 * subquery and it contains a subquery that was originally
+				 * pushed down from this query level, then that will still be
+				 * represented as a SubLink, because SS_process_sublinks won't
+				 * recurse into outer PHVs, so it didn't get transformed
+				 * during expression preprocessing in the subquery.  We need a
+				 * version of the PHV that has a SubPlan, which we can get
+				 * from the current query level's placeholder_list.  This is
+				 * quite grotty of course, but dealing with it earlier in the
+				 * handling of subplan params would be just as grotty, and it
+				 * might end up being a waste of cycles if we don't decide to
+				 * treat the PHV as a NestLoopParam.  (Perhaps that whole
+				 * mechanism should be redesigned someday, but today is not
+				 * that day.)
+				 */
+				if (root->parse->hasSubLinks)
+				{
+					phv = copyObject(phinfo->ph_var);
+
+					/*
+					 * The ph_var will have empty nullingrels, but that
+					 * doesn't matter since we're about to overwrite
+					 * phv->phnullingrels.  Other fields should be OK already.
+					 */
+					nlp->paramval = (Var *) phv;
+				}
+
+				phv->phnullingrels =
+					bms_intersect(get_placeholder_nulling_relids(root, phinfo),
+								  leftrelids);
+
+				result = lappend(result, nlp);
+			}
 		}
 	}
 	return result;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e0192d4a491..9cc602788ea 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1404,12 +1404,12 @@ create_append_path(PlannerInfo *root,
 			pathnode->path.total_cost = child->total_cost;
 		}
 		else
-			cost_append(pathnode);
+			cost_append(pathnode, root);
 		/* Must do this last, else cost_append complains */
 		pathnode->path.pathkeys = child->pathkeys;
 	}
 	else
-		cost_append(pathnode);
+		cost_append(pathnode, root);
 
 	/* If the caller provided a row estimate, override the computed value. */
 	if (rows >= 0)
@@ -1515,6 +1515,9 @@ create_merge_append_path(PlannerInfo *root,
 	foreach(l, subpaths)
 	{
 		Path	   *subpath = (Path *) lfirst(l);
+		int			presorted_keys;
+		Path		sort_path;	/* dummy for result of
+								 * cost_sort/cost_incremental_sort */
 
 		/* All child paths should be unparameterized */
 		Assert(bms_is_empty(PATH_REQ_OUTER(subpath)));
@@ -1523,32 +1526,52 @@ create_merge_append_path(PlannerInfo *root,
 		pathnode->path.parallel_safe = pathnode->path.parallel_safe &&
 			subpath->parallel_safe;
 
-		if (pathkeys_contained_in(pathkeys, subpath->pathkeys))
+		if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys,
+										 &presorted_keys))
 		{
-			/* Subpath is adequately ordered, we won't need to sort it */
-			input_disabled_nodes += subpath->disabled_nodes;
-			input_startup_cost += subpath->startup_cost;
-			input_total_cost += subpath->total_cost;
-		}
-		else
-		{
-			/* We'll need to insert a Sort node, so include cost for that */
-			Path		sort_path;	/* dummy for result of cost_sort */
+			/*
+			 * We'll need to insert a Sort node, so include costs for that. We
+			 * choose to use incremental sort if it is enabled and there are
+			 * presorted keys; otherwise we use full sort.
+			 *
+			 * We can use the parent's LIMIT if any, since we certainly won't
+			 * pull more than that many tuples from any child.
+			 */
+			if (enable_incremental_sort && presorted_keys > 0)
+			{
+				cost_incremental_sort(&sort_path,
+									  root,
+									  pathkeys,
+									  presorted_keys,
+									  subpath->disabled_nodes,
+									  subpath->startup_cost,
+									  subpath->total_cost,
+									  subpath->rows,
+									  subpath->pathtarget->width,
+									  0.0,
+									  work_mem,
+									  pathnode->limit_tuples);
+			}
+			else
+			{
+				cost_sort(&sort_path,
+						  root,
+						  pathkeys,
+						  subpath->disabled_nodes,
+						  subpath->total_cost,
+						  subpath->rows,
+						  subpath->pathtarget->width,
+						  0.0,
+						  work_mem,
+						  pathnode->limit_tuples);
+			}
 
-			cost_sort(&sort_path,
-					  root,
-					  pathkeys,
-					  subpath->disabled_nodes,
-					  subpath->total_cost,
-					  subpath->rows,
-					  subpath->pathtarget->width,
-					  0.0,
-					  work_mem,
-					  pathnode->limit_tuples);
-			input_disabled_nodes += sort_path.disabled_nodes;
-			input_startup_cost += sort_path.startup_cost;
-			input_total_cost += sort_path.total_cost;
+			subpath = &sort_path;
 		}
+
+		input_disabled_nodes += subpath->disabled_nodes;
+		input_startup_cost += subpath->startup_cost;
+		input_total_cost += subpath->total_cost;
 	}
 
 	/*
diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c
index 41a4c81e94a..e1cd00a72fb 100644
--- a/src/backend/optimizer/util/placeholder.c
+++ b/src/backend/optimizer/util/placeholder.c
@@ -545,3 +545,43 @@ contain_placeholder_references_walker(Node *node,
 	return expression_tree_walker(node, contain_placeholder_references_walker,
 								  context);
 }
+
+/*
+ * Compute the set of outer-join relids that can null a placeholder.
+ *
+ * This is analogous to RelOptInfo.nulling_relids for Vars, but we compute it
+ * on-the-fly rather than saving it somewhere.  Currently the value is needed
+ * at most once per query, so there's little value in doing otherwise.  If it
+ * ever gains more widespread use, perhaps we should cache the result in
+ * PlaceHolderInfo.
+ */
+Relids
+get_placeholder_nulling_relids(PlannerInfo *root, PlaceHolderInfo *phinfo)
+{
+	Relids		result = NULL;
+	int			relid = -1;
+
+	/*
+	 * Form the union of all potential nulling OJs for each baserel included
+	 * in ph_eval_at.
+	 */
+	while ((relid = bms_next_member(phinfo->ph_eval_at, relid)) > 0)
+	{
+		RelOptInfo *rel = root->simple_rel_array[relid];
+
+		/* ignore the RTE_GROUP RTE */
+		if (relid == root->group_rtindex)
+			continue;
+
+		if (rel == NULL)		/* must be an outer join */
+		{
+			Assert(bms_is_member(relid, root->outer_join_rels));
+			continue;
+		}
+		result = bms_add_members(result, rel->nulling_relids);
+	}
+
+	/* Now remove any OJs already included in ph_eval_at, and we're done. */
+	result = bms_del_members(result, phinfo->ph_eval_at);
+	return result;
+}
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index a16fdd65601..34f7c17f576 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -239,102 +239,23 @@ parse_sub_analyze(Node *parseTree, ParseState *parentParseState,
 }
 
 /*
- * setQueryLocationAndLength
- * 		Set query's location and length from statement and ParseState
- *
- * Some statements, like PreparableStmt, can be located within parentheses.
- * For example "(SELECT 1)" or "COPY (UPDATE ...) to x;".  For those, we
- * cannot use the whole string from the statement's location or the SQL
- * string would yield incorrectly.  The parser will set stmt_len, reflecting
- * the size of the statement within the parentheses.  Thus, when stmt_len is
- * available, we need to use it for the Query's stmt_len.
- *
- * For other cases, the parser can't provide the length of individual
- * statements.  However, we have the statement's location plus the length
- * (p_stmt_len) and location (p_stmt_location) of the top level RawStmt,
- * stored in pstate.  Thus, the statement's length is the RawStmt's length
- * minus how much we've advanced in the RawStmt's string.  If p_stmt_len
- * is 0, the SQL string is used up to its end.
- */
-static void
-setQueryLocationAndLength(ParseState *pstate, Query *qry, Node *parseTree)
-{
-	ParseLoc	stmt_len = 0;
-
-	switch (nodeTag(parseTree))
-	{
-		case T_InsertStmt:
-			qry->stmt_location = ((InsertStmt *) parseTree)->stmt_location;
-			stmt_len = ((InsertStmt *) parseTree)->stmt_len;
-			break;
-
-		case T_DeleteStmt:
-			qry->stmt_location = ((DeleteStmt *) parseTree)->stmt_location;
-			stmt_len = ((DeleteStmt *) parseTree)->stmt_len;
-			break;
-
-		case T_UpdateStmt:
-			qry->stmt_location = ((UpdateStmt *) parseTree)->stmt_location;
-			stmt_len = ((UpdateStmt *) parseTree)->stmt_len;
-			break;
-
-		case T_MergeStmt:
-			qry->stmt_location = ((MergeStmt *) parseTree)->stmt_location;
-			stmt_len = ((MergeStmt *) parseTree)->stmt_len;
-			break;
-
-		case T_SelectStmt:
-			qry->stmt_location = ((SelectStmt *) parseTree)->stmt_location;
-			stmt_len = ((SelectStmt *) parseTree)->stmt_len;
-			break;
-
-		case T_PLAssignStmt:
-			qry->stmt_location = ((PLAssignStmt *) parseTree)->location;
-			break;
-
-		default:
-			qry->stmt_location = pstate->p_stmt_location;
-			break;
-	}
-
-	if (stmt_len > 0)
-	{
-		/* Statement's length is known, use it */
-		qry->stmt_len = stmt_len;
-	}
-	else if (pstate->p_stmt_len > 0)
-	{
-		/*
-		 * The top RawStmt's length is known, so calculate the statement's
-		 * length from the statement's location and the RawStmt's length and
-		 * location.
-		 */
-		qry->stmt_len = pstate->p_stmt_len - (qry->stmt_location - pstate->p_stmt_location);
-	}
-
-	/* The calculated statement length should be calculated as positive. */
-	Assert(qry->stmt_len >= 0);
-}
-
-/*
  * transformTopLevelStmt -
  *	  transform a Parse tree into a Query tree.
  *
- * This function is just responsible for storing location data
- * from the RawStmt into the ParseState.
+ * This function is just responsible for transferring statement location data
+ * from the RawStmt into the finished Query.
  */
 Query *
 transformTopLevelStmt(ParseState *pstate, RawStmt *parseTree)
 {
 	Query	   *result;
 
-	/* Store RawStmt's length and location in pstate */
-	pstate->p_stmt_len = parseTree->stmt_len;
-	pstate->p_stmt_location = parseTree->stmt_location;
-
 	/* We're at top level, so allow SELECT INTO */
 	result = transformOptionalSelectInto(pstate, parseTree->stmt);
 
+	result->stmt_location = parseTree->stmt_location;
+	result->stmt_len = parseTree->stmt_len;
+
 	return result;
 }
 
@@ -503,7 +424,6 @@ transformStmt(ParseState *pstate, Node *parseTree)
 	/* Mark as original query until we learn differently */
 	result->querySource = QSRC_ORIGINAL;
 	result->canSetTag = true;
-	setQueryLocationAndLength(pstate, result, parseTree);
 
 	return result;
 }
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 0b5652071d1..73345bb3c70 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -154,7 +154,6 @@ static void base_yyerror(YYLTYPE *yylloc, core_yyscan_t yyscanner,
 						 const char *msg);
 static RawStmt *makeRawStmt(Node *stmt, int stmt_location);
 static void updateRawStmtEnd(RawStmt *rs, int end_location);
-static void updatePreparableStmtEnd(Node *n, int end_location);
 static Node *makeColumnRef(char *colname, List *indirection,
 						   int location, core_yyscan_t yyscanner);
 static Node *makeTypeCast(Node *arg, TypeName *typename, int location);
@@ -178,13 +177,13 @@ static void insertSelectOptions(SelectStmt *stmt,
 								SelectLimit *limitClause,
 								WithClause *withClause,
 								core_yyscan_t yyscanner);
-static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location);
+static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg);
 static Node *doNegate(Node *n, int location);
 static void doNegateFloat(Float *v);
 static Node *makeAndExpr(Node *lexpr, Node *rexpr, int location);
 static Node *makeOrExpr(Node *lexpr, Node *rexpr, int location);
 static Node *makeNotExpr(Node *expr, int location);
-static Node *makeAArrayExpr(List *elements, int location);
+static Node *makeAArrayExpr(List *elements, int location, int end_location);
 static Node *makeSQLValueFunction(SQLValueFunctionOp op, int32 typmod,
 								  int location);
 static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args,
@@ -523,7 +522,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %type <defelt>	def_elem reloption_elem old_aggr_elem operator_def_elem
 %type <node>	def_arg columnElem where_clause where_or_current_clause
 				a_expr b_expr c_expr AexprConst indirection_el opt_slice_bound
-				columnref in_expr having_clause func_table xmltable array_expr
+				columnref having_clause func_table xmltable array_expr
 				OptWhereClause operator_def_arg
 %type <list>	opt_column_and_period_list
 %type <list>	rowsfrom_item rowsfrom_list opt_col_def_list
@@ -2035,6 +2034,13 @@ CheckPointStmt:
 
 					$$ = (Node *) n;
 				}
+			| CHECKPOINT '(' utility_option_list ')'
+				{
+					CheckPointStmt *n = makeNode(CheckPointStmt);
+
+					$$ = (Node *) n;
+					n->options = $3;
+				}
 		;
 
 
@@ -2669,6 +2675,12 @@ alter_table_cmd:
 						c->alterDeferrability = true;
 					if ($4 & CAS_NO_INHERIT)
 						c->alterInheritability = true;
+					/* handle unsupported case with specific error message */
+					if ($4 & CAS_NOT_VALID)
+						ereport(ERROR,
+								errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+								errmsg("constraints cannot be altered to be NOT VALID"),
+								parser_errposition(@4));
 					processCASbits($4, @4, "FOREIGN KEY",
 									&c->deferrable,
 									&c->initdeferred,
@@ -3417,7 +3429,6 @@ CopyStmt:	COPY opt_binary qualified_name opt_column_list
 				{
 					CopyStmt *n = makeNode(CopyStmt);
 
-					updatePreparableStmtEnd($3, @4);
 					n->relation = NULL;
 					n->query = $3;
 					n->attlist = NIL;
@@ -6037,6 +6048,26 @@ CreateTrigStmt:
 			EXECUTE FUNCTION_or_PROCEDURE func_name '(' TriggerFuncArgs ')'
 				{
 					CreateTrigStmt *n = makeNode(CreateTrigStmt);
+					bool		dummy;
+
+					if (($11 & CAS_NOT_VALID) != 0)
+						ereport(ERROR,
+								errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+								errmsg("constraint triggers cannot be marked %s",
+									   "NOT VALID"),
+								parser_errposition(@11));
+					if (($11 & CAS_NO_INHERIT) != 0)
+						ereport(ERROR,
+								errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+								errmsg("constraint triggers cannot be marked %s",
+									   "NO INHERIT"),
+								parser_errposition(@11));
+					if (($11 & CAS_NOT_ENFORCED) != 0)
+						ereport(ERROR,
+								errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+								errmsg("constraint triggers cannot be marked %s",
+									   "NOT ENFORCED"),
+								parser_errposition(@11));
 
 					n->replace = $2;
 					if (n->replace) /* not supported, see CreateTrigger */
@@ -6056,7 +6087,7 @@ CreateTrigStmt:
 					n->whenClause = $15;
 					n->transitionRels = NIL;
 					processCASbits($11, @11, "TRIGGER",
-								   &n->deferrable, &n->initdeferred, NULL,
+								   &n->deferrable, &n->initdeferred, &dummy,
 								   NULL, NULL, yyscanner);
 					n->constrrel = $10;
 					$$ = (Node *) n;
@@ -7479,6 +7510,8 @@ fetch_args:	cursor_name
 					n->portalname = $1;
 					n->direction = FETCH_FORWARD;
 					n->howMany = 1;
+					n->location = -1;
+					n->direction_keyword = FETCH_KEYWORD_NONE;
 					$$ = (Node *) n;
 				}
 			| from_in cursor_name
@@ -7488,6 +7521,19 @@ fetch_args:	cursor_name
 					n->portalname = $2;
 					n->direction = FETCH_FORWARD;
 					n->howMany = 1;
+					n->location = -1;
+					n->direction_keyword = FETCH_KEYWORD_NONE;
+					$$ = (Node *) n;
+				}
+			| SignedIconst opt_from_in cursor_name
+				{
+					FetchStmt *n = makeNode(FetchStmt);
+
+					n->portalname = $3;
+					n->direction = FETCH_FORWARD;
+					n->howMany = $1;
+					n->location = @1;
+					n->direction_keyword = FETCH_KEYWORD_NONE;
 					$$ = (Node *) n;
 				}
 			| NEXT opt_from_in cursor_name
@@ -7497,6 +7543,8 @@ fetch_args:	cursor_name
 					n->portalname = $3;
 					n->direction = FETCH_FORWARD;
 					n->howMany = 1;
+					n->location = -1;
+					n->direction_keyword = FETCH_KEYWORD_NEXT;
 					$$ = (Node *) n;
 				}
 			| PRIOR opt_from_in cursor_name
@@ -7506,6 +7554,8 @@ fetch_args:	cursor_name
 					n->portalname = $3;
 					n->direction = FETCH_BACKWARD;
 					n->howMany = 1;
+					n->location = -1;
+					n->direction_keyword = FETCH_KEYWORD_PRIOR;
 					$$ = (Node *) n;
 				}
 			| FIRST_P opt_from_in cursor_name
@@ -7515,6 +7565,8 @@ fetch_args:	cursor_name
 					n->portalname = $3;
 					n->direction = FETCH_ABSOLUTE;
 					n->howMany = 1;
+					n->location = -1;
+					n->direction_keyword = FETCH_KEYWORD_FIRST;
 					$$ = (Node *) n;
 				}
 			| LAST_P opt_from_in cursor_name
@@ -7524,6 +7576,8 @@ fetch_args:	cursor_name
 					n->portalname = $3;
 					n->direction = FETCH_ABSOLUTE;
 					n->howMany = -1;
+					n->location = -1;
+					n->direction_keyword = FETCH_KEYWORD_LAST;
 					$$ = (Node *) n;
 				}
 			| ABSOLUTE_P SignedIconst opt_from_in cursor_name
@@ -7533,6 +7587,8 @@ fetch_args:	cursor_name
 					n->portalname = $4;
 					n->direction = FETCH_ABSOLUTE;
 					n->howMany = $2;
+					n->location = @2;
+					n->direction_keyword = FETCH_KEYWORD_ABSOLUTE;
 					$$ = (Node *) n;
 				}
 			| RELATIVE_P SignedIconst opt_from_in cursor_name
@@ -7542,15 +7598,8 @@ fetch_args:	cursor_name
 					n->portalname = $4;
 					n->direction = FETCH_RELATIVE;
 					n->howMany = $2;
-					$$ = (Node *) n;
-				}
-			| SignedIconst opt_from_in cursor_name
-				{
-					FetchStmt *n = makeNode(FetchStmt);
-
-					n->portalname = $3;
-					n->direction = FETCH_FORWARD;
-					n->howMany = $1;
+					n->location = @2;
+					n->direction_keyword = FETCH_KEYWORD_RELATIVE;
 					$$ = (Node *) n;
 				}
 			| ALL opt_from_in cursor_name
@@ -7560,6 +7609,8 @@ fetch_args:	cursor_name
 					n->portalname = $3;
 					n->direction = FETCH_FORWARD;
 					n->howMany = FETCH_ALL;
+					n->location = -1;
+					n->direction_keyword = FETCH_KEYWORD_ALL;
 					$$ = (Node *) n;
 				}
 			| FORWARD opt_from_in cursor_name
@@ -7569,6 +7620,8 @@ fetch_args:	cursor_name
 					n->portalname = $3;
 					n->direction = FETCH_FORWARD;
 					n->howMany = 1;
+					n->location = -1;
+					n->direction_keyword = FETCH_KEYWORD_FORWARD;
 					$$ = (Node *) n;
 				}
 			| FORWARD SignedIconst opt_from_in cursor_name
@@ -7578,6 +7631,8 @@ fetch_args:	cursor_name
 					n->portalname = $4;
 					n->direction = FETCH_FORWARD;
 					n->howMany = $2;
+					n->location = @2;
+					n->direction_keyword = FETCH_KEYWORD_FORWARD;
 					$$ = (Node *) n;
 				}
 			| FORWARD ALL opt_from_in cursor_name
@@ -7587,6 +7642,8 @@ fetch_args:	cursor_name
 					n->portalname = $4;
 					n->direction = FETCH_FORWARD;
 					n->howMany = FETCH_ALL;
+					n->location = -1;
+					n->direction_keyword = FETCH_KEYWORD_FORWARD_ALL;
 					$$ = (Node *) n;
 				}
 			| BACKWARD opt_from_in cursor_name
@@ -7596,6 +7653,8 @@ fetch_args:	cursor_name
 					n->portalname = $3;
 					n->direction = FETCH_BACKWARD;
 					n->howMany = 1;
+					n->location = -1;
+					n->direction_keyword = FETCH_KEYWORD_BACKWARD;
 					$$ = (Node *) n;
 				}
 			| BACKWARD SignedIconst opt_from_in cursor_name
@@ -7605,6 +7664,8 @@ fetch_args:	cursor_name
 					n->portalname = $4;
 					n->direction = FETCH_BACKWARD;
 					n->howMany = $2;
+					n->location = @2;
+					n->direction_keyword = FETCH_KEYWORD_BACKWARD;
 					$$ = (Node *) n;
 				}
 			| BACKWARD ALL opt_from_in cursor_name
@@ -7614,6 +7675,8 @@ fetch_args:	cursor_name
 					n->portalname = $4;
 					n->direction = FETCH_BACKWARD;
 					n->howMany = FETCH_ALL;
+					n->location = -1;
+					n->direction_keyword = FETCH_KEYWORD_BACKWARD_ALL;
 					$$ = (Node *) n;
 				}
 		;
@@ -11629,7 +11692,7 @@ AlterDomainStmt:
 				{
 					AlterDomainStmt *n = makeNode(AlterDomainStmt);
 
-					n->subtype = 'T';
+					n->subtype = AD_AlterDefault;
 					n->typeName = $3;
 					n->def = $4;
 					$$ = (Node *) n;
@@ -11639,7 +11702,7 @@ AlterDomainStmt:
 				{
 					AlterDomainStmt *n = makeNode(AlterDomainStmt);
 
-					n->subtype = 'N';
+					n->subtype = AD_DropNotNull;
 					n->typeName = $3;
 					$$ = (Node *) n;
 				}
@@ -11648,7 +11711,7 @@ AlterDomainStmt:
 				{
 					AlterDomainStmt *n = makeNode(AlterDomainStmt);
 
-					n->subtype = 'O';
+					n->subtype = AD_SetNotNull;
 					n->typeName = $3;
 					$$ = (Node *) n;
 				}
@@ -11657,7 +11720,7 @@ AlterDomainStmt:
 				{
 					AlterDomainStmt *n = makeNode(AlterDomainStmt);
 
-					n->subtype = 'C';
+					n->subtype = AD_AddConstraint;
 					n->typeName = $3;
 					n->def = $5;
 					$$ = (Node *) n;
@@ -11667,7 +11730,7 @@ AlterDomainStmt:
 				{
 					AlterDomainStmt *n = makeNode(AlterDomainStmt);
 
-					n->subtype = 'X';
+					n->subtype = AD_DropConstraint;
 					n->typeName = $3;
 					n->name = $6;
 					n->behavior = $7;
@@ -11679,7 +11742,7 @@ AlterDomainStmt:
 				{
 					AlterDomainStmt *n = makeNode(AlterDomainStmt);
 
-					n->subtype = 'X';
+					n->subtype = AD_DropConstraint;
 					n->typeName = $3;
 					n->name = $8;
 					n->behavior = $9;
@@ -11691,7 +11754,7 @@ AlterDomainStmt:
 				{
 					AlterDomainStmt *n = makeNode(AlterDomainStmt);
 
-					n->subtype = 'V';
+					n->subtype = AD_ValidateConstraint;
 					n->typeName = $3;
 					n->name = $6;
 					$$ = (Node *) n;
@@ -12240,7 +12303,6 @@ InsertStmt:
 					$5->onConflictClause = $6;
 					$5->returningClause = $7;
 					$5->withClause = $1;
-					$5->stmt_location = @$;
 					$$ = (Node *) $5;
 				}
 		;
@@ -12431,7 +12493,6 @@ DeleteStmt: opt_with_clause DELETE_P FROM relation_expr_opt_alias
 					n->whereClause = $6;
 					n->returningClause = $7;
 					n->withClause = $1;
-					n->stmt_location = @$;
 					$$ = (Node *) n;
 				}
 		;
@@ -12506,7 +12567,6 @@ UpdateStmt: opt_with_clause UPDATE relation_expr_opt_alias
 					n->whereClause = $7;
 					n->returningClause = $8;
 					n->withClause = $1;
-					n->stmt_location = @$;
 					$$ = (Node *) n;
 				}
 		;
@@ -12584,7 +12644,6 @@ MergeStmt:
 					m->joinCondition = $8;
 					m->mergeWhenClauses = $9;
 					m->returningClause = $10;
-					m->stmt_location = @$;
 
 					$$ = (Node *) m;
 				}
@@ -12825,20 +12884,7 @@ SelectStmt: select_no_parens			%prec UMINUS
 		;
 
 select_with_parens:
-			'(' select_no_parens ')'
-				{
-					SelectStmt *n = (SelectStmt *) $2;
-
-					/*
-					 * As SelectStmt's location starts at the SELECT keyword,
-					 * we need to track the length of the SelectStmt within
-					 * parentheses to be able to extract the relevant part
-					 * of the query.  Without this, the RawStmt's length would
-					 * be used and would include the closing parenthesis.
-					 */
-					n->stmt_len = @3 - @2;
-					$$ = $2;
-				}
+			'(' select_no_parens ')'				{ $$ = $2; }
 			| '(' select_with_parens ')'			{ $$ = $2; }
 		;
 
@@ -12960,7 +13006,6 @@ simple_select:
 					n->groupDistinct = ($7)->distinct;
 					n->havingClause = $8;
 					n->windowClause = $9;
-					n->stmt_location = @1;
 					$$ = (Node *) n;
 				}
 			| SELECT distinct_clause target_list
@@ -12978,7 +13023,6 @@ simple_select:
 					n->groupDistinct = ($7)->distinct;
 					n->havingClause = $8;
 					n->windowClause = $9;
-					n->stmt_location = @1;
 					$$ = (Node *) n;
 				}
 			| values_clause							{ $$ = $1; }
@@ -12999,20 +13043,19 @@ simple_select:
 
 					n->targetList = list_make1(rt);
 					n->fromClause = list_make1($2);
-					n->stmt_location = @1;
 					$$ = (Node *) n;
 				}
 			| select_clause UNION set_quantifier select_clause
 				{
-					$$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
+					$$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4);
 				}
 			| select_clause INTERSECT set_quantifier select_clause
 				{
-					$$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
+					$$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4);
 				}
 			| select_clause EXCEPT set_quantifier select_clause
 				{
-					$$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
+					$$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4);
 				}
 		;
 
@@ -13590,7 +13633,6 @@ values_clause:
 				{
 					SelectStmt *n = makeNode(SelectStmt);
 
-					n->stmt_location = @1;
 					n->valuesLists = list_make1($3);
 					$$ = (Node *) n;
 				}
@@ -15287,49 +15329,50 @@ a_expr:		c_expr									{ $$ = $1; }
 												   (Node *) list_make2($5, $7),
 												   @2);
 				}
-			| a_expr IN_P in_expr
+			| a_expr IN_P select_with_parens
 				{
-					/* in_expr returns a SubLink or a list of a_exprs */
-					if (IsA($3, SubLink))
-					{
-						/* generate foo = ANY (subquery) */
-						SubLink	   *n = (SubLink *) $3;
+					/* generate foo = ANY (subquery) */
+					SubLink	   *n = makeNode(SubLink);
 
-						n->subLinkType = ANY_SUBLINK;
-						n->subLinkId = 0;
-						n->testexpr = $1;
-						n->operName = NIL;		/* show it's IN not = ANY */
-						n->location = @2;
-						$$ = (Node *) n;
-					}
-					else
-					{
-						/* generate scalar IN expression */
-						$$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "=", $1, $3, @2);
-					}
+					n->subselect = $3;
+					n->subLinkType = ANY_SUBLINK;
+					n->subLinkId = 0;
+					n->testexpr = $1;
+					n->operName = NIL;		/* show it's IN not = ANY */
+					n->location = @2;
+					$$ = (Node *) n;
 				}
-			| a_expr NOT_LA IN_P in_expr						%prec NOT_LA
+			| a_expr IN_P '(' expr_list ')'
 				{
-					/* in_expr returns a SubLink or a list of a_exprs */
-					if (IsA($4, SubLink))
-					{
-						/* generate NOT (foo = ANY (subquery)) */
-						/* Make an = ANY node */
-						SubLink	   *n = (SubLink *) $4;
-
-						n->subLinkType = ANY_SUBLINK;
-						n->subLinkId = 0;
-						n->testexpr = $1;
-						n->operName = NIL;		/* show it's IN not = ANY */
-						n->location = @2;
-						/* Stick a NOT on top; must have same parse location */
-						$$ = makeNotExpr((Node *) n, @2);
-					}
-					else
-					{
-						/* generate scalar NOT IN expression */
-						$$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "<>", $1, $4, @2);
-					}
+					/* generate scalar IN expression */
+					A_Expr *n = makeSimpleA_Expr(AEXPR_IN, "=", $1, (Node *) $4, @2);
+
+					n->rexpr_list_start = @3;
+					n->rexpr_list_end = @5;
+					$$ = (Node *) n;
+				}
+			| a_expr NOT_LA IN_P select_with_parens			%prec NOT_LA
+				{
+					/* generate NOT (foo = ANY (subquery)) */
+					SubLink	   *n = makeNode(SubLink);
+
+					n->subselect = $4;
+					n->subLinkType = ANY_SUBLINK;
+					n->subLinkId = 0;
+					n->testexpr = $1;
+					n->operName = NIL;		/* show it's IN not = ANY */
+					n->location = @2;
+					/* Stick a NOT on top; must have same parse location */
+					$$ = makeNotExpr((Node *) n, @2);
+				}
+			| a_expr NOT_LA IN_P '(' expr_list ')'
+				{
+					/* generate scalar NOT IN expression */
+					A_Expr *n = makeSimpleA_Expr(AEXPR_IN, "<>", $1, (Node *) $5, @2);
+
+					n->rexpr_list_start = @4;
+					n->rexpr_list_end = @6;
+					$$ = (Node *) n;
 				}
 			| a_expr subquery_Op sub_type select_with_parens	%prec Op
 				{
@@ -16764,15 +16807,15 @@ type_list:	Typename								{ $$ = list_make1($1); }
 
 array_expr: '[' expr_list ']'
 				{
-					$$ = makeAArrayExpr($2, @1);
+					$$ = makeAArrayExpr($2, @1, @3);
 				}
 			| '[' array_expr_list ']'
 				{
-					$$ = makeAArrayExpr($2, @1);
+					$$ = makeAArrayExpr($2, @1, @3);
 				}
 			| '[' ']'
 				{
-					$$ = makeAArrayExpr(NIL, @1);
+					$$ = makeAArrayExpr(NIL, @1, @2);
 				}
 		;
 
@@ -16894,17 +16937,6 @@ trim_list:	a_expr FROM expr_list					{ $$ = lappend($3, $1); }
 			| expr_list								{ $$ = $1; }
 		;
 
-in_expr:	select_with_parens
-				{
-					SubLink	   *n = makeNode(SubLink);
-
-					n->subselect = $1;
-					/* other fields will be filled later */
-					$$ = (Node *) n;
-				}
-			| '(' expr_list ')'						{ $$ = (Node *) $2; }
-		;
-
 /*
  * Define SQL-style CASE clause.
  * - Full specification
@@ -18748,47 +18780,6 @@ updateRawStmtEnd(RawStmt *rs, int end_location)
 	rs->stmt_len = end_location - rs->stmt_location;
 }
 
-/*
- * Adjust a PreparableStmt to reflect that it doesn't run to the end of the
- * string.
- */
-static void
-updatePreparableStmtEnd(Node *n, int end_location)
-{
-	if (IsA(n, SelectStmt))
-	{
-		SelectStmt *stmt = (SelectStmt *) n;
-
-		stmt->stmt_len = end_location - stmt->stmt_location;
-	}
-	else if (IsA(n, InsertStmt))
-	{
-		InsertStmt *stmt = (InsertStmt *) n;
-
-		stmt->stmt_len = end_location - stmt->stmt_location;
-	}
-	else if (IsA(n, UpdateStmt))
-	{
-		UpdateStmt *stmt = (UpdateStmt *) n;
-
-		stmt->stmt_len = end_location - stmt->stmt_location;
-	}
-	else if (IsA(n, DeleteStmt))
-	{
-		DeleteStmt *stmt = (DeleteStmt *) n;
-
-		stmt->stmt_len = end_location - stmt->stmt_location;
-	}
-	else if (IsA(n, MergeStmt))
-	{
-		MergeStmt  *stmt = (MergeStmt *) n;
-
-		stmt->stmt_len = end_location - stmt->stmt_location;
-	}
-	else
-		elog(ERROR, "unexpected node type %d", (int) n->type);
-}
-
 static Node *
 makeColumnRef(char *colname, List *indirection,
 			  int location, core_yyscan_t yyscanner)
@@ -19167,14 +19158,11 @@ insertSelectOptions(SelectStmt *stmt,
 					 errmsg("multiple WITH clauses not allowed"),
 					 parser_errposition(exprLocation((Node *) withClause))));
 		stmt->withClause = withClause;
-
-		/* Update SelectStmt's location to the start of the WITH clause */
-		stmt->stmt_location = withClause->location;
 	}
 }
 
 static Node *
-makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location)
+makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg)
 {
 	SelectStmt *n = makeNode(SelectStmt);
 
@@ -19182,7 +19170,6 @@ makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location)
 	n->all = all;
 	n->larg = (SelectStmt *) larg;
 	n->rarg = (SelectStmt *) rarg;
-	n->stmt_location = location;
 	return (Node *) n;
 }
 
@@ -19300,12 +19287,14 @@ makeNotExpr(Node *expr, int location)
 }
 
 static Node *
-makeAArrayExpr(List *elements, int location)
+makeAArrayExpr(List *elements, int location, int location_end)
 {
 	A_ArrayExpr *n = makeNode(A_ArrayExpr);
 
 	n->elements = elements;
 	n->location = location;
+	n->list_start = location;
+	n->list_end = location_end;
 	return (Node *) n;
 }
 
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 1f8e2d54673..d66276801c6 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -1223,6 +1223,8 @@ transformAExprIn(ParseState *pstate, A_Expr *a)
 			newa->element_typeid = scalar_type;
 			newa->elements = aexprs;
 			newa->multidims = false;
+			newa->list_start = a->rexpr_list_start;
+			newa->list_end = a->rexpr_list_end;
 			newa->location = -1;
 
 			result = (Node *) make_scalar_array_op(pstate,
@@ -2165,6 +2167,8 @@ transformArrayExpr(ParseState *pstate, A_ArrayExpr *a,
 	/* array_collid will be set by parse_collate.c */
 	newa->element_typeid = element_type;
 	newa->elements = newcoercedelems;
+	newa->list_start = a->list_start;
+	newa->list_end = a->list_end;
 	newa->location = a->location;
 
 	return (Node *) newa;
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index 62015431fdf..afcf54169c3 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -1279,6 +1279,28 @@ transformTableLikeClause(CreateStmtContext *cxt, TableLikeClause *table_like_cla
 		lst = RelationGetNotNullConstraints(RelationGetRelid(relation), false,
 											true);
 		cxt->nnconstraints = list_concat(cxt->nnconstraints, lst);
+
+		/* Copy comments on not-null constraints */
+		if (table_like_clause->options & CREATE_TABLE_LIKE_COMMENTS)
+		{
+			foreach_node(Constraint, nnconstr, lst)
+			{
+				if ((comment = GetComment(get_relation_constraint_oid(RelationGetRelid(relation),
+																	  nnconstr->conname, false),
+										  ConstraintRelationId,
+										  0)) != NULL)
+				{
+					CommentStmt *stmt = makeNode(CommentStmt);
+
+					stmt->objtype = OBJECT_TABCONSTRAINT;
+					stmt->object = (Node *) list_make3(makeString(cxt->relation->schemaname),
+													   makeString(cxt->relation->relname),
+													   makeString(nnconstr->conname));
+					stmt->comment = comment;
+					cxt->alist = lappend(cxt->alist, stmt);
+				}
+			}
+		}
 	}
 
 	/*
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 4d4a1a3197e..9474095f271 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -781,10 +781,6 @@ ProcessAutoVacLauncherInterrupts(void)
 	if (LogMemoryContextPending)
 		ProcessLogMemoryContextInterrupt();
 
-	/* Publish memory contexts of this process */
-	if (PublishMemoryContextPending)
-		ProcessGetMemoryContextInterrupt();
-
 	/* Process sinval catchup interrupts that happened while sleeping */
 	ProcessCatchupInterrupt();
 }
@@ -2077,6 +2073,12 @@ do_autovacuum(void)
 				}
 			}
 		}
+
+		/* Release stuff to avoid per-relation leakage */
+		if (relopts)
+			pfree(relopts);
+		if (tabentry)
+			pfree(tabentry);
 	}
 
 	table_endscan(relScan);
@@ -2093,7 +2095,8 @@ do_autovacuum(void)
 		Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
 		PgStat_StatTabEntry *tabentry;
 		Oid			relid;
-		AutoVacOpts *relopts = NULL;
+		AutoVacOpts *relopts;
+		bool		free_relopts = false;
 		bool		dovacuum;
 		bool		doanalyze;
 		bool		wraparound;
@@ -2111,7 +2114,9 @@ do_autovacuum(void)
 		 * main rel
 		 */
 		relopts = extract_autovac_opts(tuple, pg_class_desc);
-		if (relopts == NULL)
+		if (relopts)
+			free_relopts = true;
+		else
 		{
 			av_relation *hentry;
 			bool		found;
@@ -2132,6 +2137,12 @@ do_autovacuum(void)
 		/* ignore analyze for toast tables */
 		if (dovacuum)
 			table_oids = lappend_oid(table_oids, relid);
+
+		/* Release stuff to avoid leakage */
+		if (free_relopts)
+			pfree(relopts);
+		if (tabentry)
+			pfree(tabentry);
 	}
 
 	table_endscan(relScan);
@@ -2223,6 +2234,12 @@ do_autovacuum(void)
 						get_namespace_name(classForm->relnamespace),
 						NameStr(classForm->relname))));
 
+		/*
+		 * Deletion might involve TOAST table access, so ensure we have a
+		 * valid snapshot.
+		 */
+		PushActiveSnapshot(GetTransactionSnapshot());
+
 		object.classId = RelationRelationId;
 		object.objectId = relid;
 		object.objectSubId = 0;
@@ -2235,6 +2252,7 @@ do_autovacuum(void)
 		 * To commit the deletion, end current transaction and start a new
 		 * one.  Note this also releases the locks we took.
 		 */
+		PopActiveSnapshot();
 		CommitTransactionCommand();
 		StartTransactionCommand();
 
@@ -2503,6 +2521,8 @@ deleted:
 		pg_atomic_test_set_flag(&MyWorkerInfo->wi_dobalance);
 	}
 
+	list_free(table_oids);
+
 	/*
 	 * Perform additional work items, as requested by backends.
 	 */
@@ -2684,8 +2704,8 @@ deleted2:
 /*
  * extract_autovac_opts
  *
- * Given a relation's pg_class tuple, return the AutoVacOpts portion of
- * reloptions, if set; otherwise, return NULL.
+ * Given a relation's pg_class tuple, return a palloc'd copy of the
+ * AutoVacOpts portion of reloptions, if set; otherwise, return NULL.
  *
  * Note: callers do not have a relation lock on the table at this point,
  * so the table could have been dropped, and its catalog rows gone, after
@@ -2734,6 +2754,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
 	autovac_table *tab = NULL;
 	bool		wraparound;
 	AutoVacOpts *avopts;
+	bool		free_avopts = false;
 
 	/* fetch the relation's relcache entry */
 	classTup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
@@ -2746,8 +2767,10 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
 	 * main table reloptions if the toast table itself doesn't have.
 	 */
 	avopts = extract_autovac_opts(classTup, pg_class_desc);
-	if (classForm->relkind == RELKIND_TOASTVALUE &&
-		avopts == NULL && table_toast_map != NULL)
+	if (avopts)
+		free_avopts = true;
+	else if (classForm->relkind == RELKIND_TOASTVALUE &&
+			 table_toast_map != NULL)
 	{
 		av_relation *hentry;
 		bool		found;
@@ -2856,6 +2879,8 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
 						 avopts->vacuum_cost_delay >= 0));
 	}
 
+	if (free_avopts)
+		pfree(avopts);
 	heap_freetuple(classTup);
 	return tab;
 }
@@ -2887,6 +2912,10 @@ recheck_relation_needs_vacanalyze(Oid relid,
 							  effective_multixact_freeze_max_age,
 							  dovacuum, doanalyze, wraparound);
 
+	/* Release tabentry to avoid leakage */
+	if (tabentry)
+		pfree(tabentry);
+
 	/* ignore ANALYZE for toast tables */
 	if (classForm->relkind == RELKIND_TOASTVALUE)
 		*doanalyze = false;
@@ -3144,20 +3173,24 @@ autovacuum_do_vac_analyze(autovac_table *tab, BufferAccessStrategy bstrategy)
 	VacuumRelation *rel;
 	List	   *rel_list;
 	MemoryContext vac_context;
+	MemoryContext old_context;
 
 	/* Let pgstat know what we're doing */
 	autovac_report_activity(tab);
 
+	/* Create a context that vacuum() can use as cross-transaction storage */
+	vac_context = AllocSetContextCreate(CurrentMemoryContext,
+										"Vacuum",
+										ALLOCSET_DEFAULT_SIZES);
+
 	/* Set up one VacuumRelation target, identified by OID, for vacuum() */
+	old_context = MemoryContextSwitchTo(vac_context);
 	rangevar = makeRangeVar(tab->at_nspname, tab->at_relname, -1);
 	rel = makeVacuumRelation(rangevar, tab->at_relid, NIL);
 	rel_list = list_make1(rel);
+	MemoryContextSwitchTo(old_context);
 
-	vac_context = AllocSetContextCreate(CurrentMemoryContext,
-										"Vacuum",
-										ALLOCSET_DEFAULT_SIZES);
-
-	vacuum(rel_list, &tab->at_params, bstrategy, vac_context, true);
+	vacuum(rel_list, tab->at_params, bstrategy, vac_context, true);
 
 	MemoryContextDelete(vac_context);
 }
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index d3cb3f1891c..2809e298a44 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -42,6 +42,8 @@
 #include "access/xlog.h"
 #include "access/xlog_internal.h"
 #include "access/xlogrecovery.h"
+#include "catalog/pg_authid.h"
+#include "commands/defrem.h"
 #include "libpq/pqsignal.h"
 #include "miscadmin.h"
 #include "pgstat.h"
@@ -61,6 +63,7 @@
 #include "storage/shmem.h"
 #include "storage/smgr.h"
 #include "storage/spin.h"
+#include "utils/acl.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
 #include "utils/resowner.h"
@@ -161,7 +164,7 @@ static pg_time_t last_xlog_switch_time;
 static void ProcessCheckpointerInterrupts(void);
 static void CheckArchiveTimeout(void);
 static bool IsCheckpointOnSchedule(double progress);
-static bool ImmediateCheckpointRequested(void);
+static bool FastCheckpointRequested(void);
 static bool CompactCheckpointerRequestQueue(void);
 static void UpdateSharedMemoryConfig(void);
 
@@ -663,10 +666,6 @@ ProcessCheckpointerInterrupts(void)
 	/* Perform logging of memory contexts of this process */
 	if (LogMemoryContextPending)
 		ProcessLogMemoryContextInterrupt();
-
-	/* Publish memory contexts of this process */
-	if (PublishMemoryContextPending)
-		ProcessGetMemoryContextInterrupt();
 }
 
 /*
@@ -738,12 +737,12 @@ CheckArchiveTimeout(void)
 }
 
 /*
- * Returns true if an immediate checkpoint request is pending.  (Note that
- * this does not check the *current* checkpoint's IMMEDIATE flag, but whether
- * there is one pending behind it.)
+ * Returns true if a fast checkpoint request is pending.  (Note that this does
+ * not check the *current* checkpoint's FAST flag, but whether there is one
+ * pending behind it.)
  */
 static bool
-ImmediateCheckpointRequested(void)
+FastCheckpointRequested(void)
 {
 	volatile CheckpointerShmemStruct *cps = CheckpointerShmem;
 
@@ -751,7 +750,7 @@ ImmediateCheckpointRequested(void)
 	 * We don't need to acquire the ckpt_lck in this case because we're only
 	 * looking at a single flag bit.
 	 */
-	if (cps->ckpt_flags & CHECKPOINT_IMMEDIATE)
+	if (cps->ckpt_flags & CHECKPOINT_FAST)
 		return true;
 	return false;
 }
@@ -764,7 +763,7 @@ ImmediateCheckpointRequested(void)
  * checkpoint_completion_target.
  *
  * The checkpoint request flags should be passed in; currently the only one
- * examined is CHECKPOINT_IMMEDIATE, which disables delays between writes.
+ * examined is CHECKPOINT_FAST, which disables delays between writes.
  *
  * 'progress' is an estimate of how much of the work has been done, as a
  * fraction between 0.0 meaning none, and 1.0 meaning all done.
@@ -782,10 +781,10 @@ CheckpointWriteDelay(int flags, double progress)
 	 * Perform the usual duties and take a nap, unless we're behind schedule,
 	 * in which case we just try to catch up as quickly as possible.
 	 */
-	if (!(flags & CHECKPOINT_IMMEDIATE) &&
+	if (!(flags & CHECKPOINT_FAST) &&
 		!ShutdownXLOGPending &&
 		!ShutdownRequestPending &&
-		!ImmediateCheckpointRequested() &&
+		!FastCheckpointRequested() &&
 		IsCheckpointOnSchedule(progress))
 	{
 		if (ConfigReloadPending)
@@ -981,17 +980,67 @@ CheckpointerShmemInit(void)
 }
 
 /*
+ * ExecCheckpoint
+ *		Primary entry point for manual CHECKPOINT commands
+ *
+ * This is mainly a wrapper for RequestCheckpoint().
+ */
+void
+ExecCheckpoint(ParseState *pstate, CheckPointStmt *stmt)
+{
+	bool		fast = true;
+	bool		unlogged = false;
+
+	foreach_ptr(DefElem, opt, stmt->options)
+	{
+		if (strcmp(opt->defname, "mode") == 0)
+		{
+			char	   *mode = defGetString(opt);
+
+			if (strcmp(mode, "spread") == 0)
+				fast = false;
+			else if (strcmp(mode, "fast") != 0)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("unrecognized MODE option \"%s\"", mode),
+						 parser_errposition(pstate, opt->location)));
+		}
+		else if (strcmp(opt->defname, "flush_unlogged") == 0)
+			unlogged = defGetBoolean(opt);
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("unrecognized CHECKPOINT option \"%s\"", opt->defname),
+					 parser_errposition(pstate, opt->location)));
+	}
+
+	if (!has_privs_of_role(GetUserId(), ROLE_PG_CHECKPOINT))
+		ereport(ERROR,
+				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+		/* translator: %s is name of an SQL command (e.g., CHECKPOINT) */
+				 errmsg("permission denied to execute %s command",
+						"CHECKPOINT"),
+				 errdetail("Only roles with privileges of the \"%s\" role may execute this command.",
+						   "pg_checkpoint")));
+
+	RequestCheckpoint(CHECKPOINT_WAIT |
+					  (fast ? CHECKPOINT_FAST : 0) |
+					  (unlogged ? CHECKPOINT_FLUSH_UNLOGGED : 0) |
+					  (RecoveryInProgress() ? 0 : CHECKPOINT_FORCE));
+}
+
+/*
  * RequestCheckpoint
  *		Called in backend processes to request a checkpoint
  *
  * flags is a bitwise OR of the following:
  *	CHECKPOINT_IS_SHUTDOWN: checkpoint is for database shutdown.
  *	CHECKPOINT_END_OF_RECOVERY: checkpoint is for end of WAL recovery.
- *	CHECKPOINT_IMMEDIATE: finish the checkpoint ASAP,
+ *	CHECKPOINT_FAST: finish the checkpoint ASAP,
  *		ignoring checkpoint_completion_target parameter.
  *	CHECKPOINT_FORCE: force a checkpoint even if no XLOG activity has occurred
  *		since the last one (implied by CHECKPOINT_IS_SHUTDOWN or
- *		CHECKPOINT_END_OF_RECOVERY).
+ *		CHECKPOINT_END_OF_RECOVERY, and the CHECKPOINT command).
  *	CHECKPOINT_WAIT: wait for completion before returning (otherwise,
  *		just signal checkpointer to do it, and return).
  *	CHECKPOINT_CAUSE_XLOG: checkpoint is requested due to xlog filling.
@@ -1013,7 +1062,7 @@ RequestCheckpoint(int flags)
 		 * There's no point in doing slow checkpoints in a standalone backend,
 		 * because there's no other backends the checkpoint could disrupt.
 		 */
-		CreateCheckPoint(flags | CHECKPOINT_IMMEDIATE);
+		CreateCheckPoint(flags | CHECKPOINT_FAST);
 
 		/* Free all smgr objects, as CheckpointerMain() normally would. */
 		smgrdestroyall();
diff --git a/src/backend/postmaster/interrupt.c b/src/backend/postmaster/interrupt.c
index f24f574e748..0ae9bf906ec 100644
--- a/src/backend/postmaster/interrupt.c
+++ b/src/backend/postmaster/interrupt.c
@@ -48,10 +48,6 @@ ProcessMainLoopInterrupts(void)
 	/* Perform logging of memory contexts of this process */
 	if (LogMemoryContextPending)
 		ProcessLogMemoryContextInterrupt();
-
-	/* Publish memory contexts of this process */
-	if (PublishMemoryContextPending)
-		ProcessGetMemoryContextInterrupt();
 }
 
 /*
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index cb7408acf4c..78e39e5f866 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -718,15 +718,15 @@ pgarch_readyXlog(char *xlog)
 		/*
 		 * Store the file in our max-heap if it has a high enough priority.
 		 */
-		if (arch_files->arch_heap->bh_size < NUM_FILES_PER_DIRECTORY_SCAN)
+		if (binaryheap_size(arch_files->arch_heap) < NUM_FILES_PER_DIRECTORY_SCAN)
 		{
 			/* If the heap isn't full yet, quickly add it. */
-			arch_file = arch_files->arch_filenames[arch_files->arch_heap->bh_size];
+			arch_file = arch_files->arch_filenames[binaryheap_size(arch_files->arch_heap)];
 			strcpy(arch_file, basename);
 			binaryheap_add_unordered(arch_files->arch_heap, CStringGetDatum(arch_file));
 
 			/* If we just filled the heap, make it a valid one. */
-			if (arch_files->arch_heap->bh_size == NUM_FILES_PER_DIRECTORY_SCAN)
+			if (binaryheap_size(arch_files->arch_heap) == NUM_FILES_PER_DIRECTORY_SCAN)
 				binaryheap_build(arch_files->arch_heap);
 		}
 		else if (ready_file_comparator(binaryheap_first(arch_files->arch_heap),
@@ -744,21 +744,21 @@ pgarch_readyXlog(char *xlog)
 	FreeDir(rldir);
 
 	/* If no files were found, simply return. */
-	if (arch_files->arch_heap->bh_size == 0)
+	if (binaryheap_empty(arch_files->arch_heap))
 		return false;
 
 	/*
 	 * If we didn't fill the heap, we didn't make it a valid one.  Do that
 	 * now.
 	 */
-	if (arch_files->arch_heap->bh_size < NUM_FILES_PER_DIRECTORY_SCAN)
+	if (binaryheap_size(arch_files->arch_heap) < NUM_FILES_PER_DIRECTORY_SCAN)
 		binaryheap_build(arch_files->arch_heap);
 
 	/*
 	 * Fill arch_files array with the files to archive in ascending order of
 	 * priority.
 	 */
-	arch_files->arch_files_size = arch_files->arch_heap->bh_size;
+	arch_files->arch_files_size = binaryheap_size(arch_files->arch_heap);
 	for (int i = 0; i < arch_files->arch_files_size; i++)
 		arch_files->arch_files[i] = DatumGetCString(binaryheap_remove_first(arch_files->arch_heap));
 
@@ -867,10 +867,6 @@ ProcessPgArchInterrupts(void)
 	if (LogMemoryContextPending)
 		ProcessLogMemoryContextInterrupt();
 
-	/* Publish memory contexts of this process */
-	if (PublishMemoryContextPending)
-		ProcessGetMemoryContextInterrupt();
-
 	if (ConfigReloadPending)
 	{
 		char	   *archiveLib = pstrdup(XLogArchiveLibrary);
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 490f7ce3664..cca9b946e53 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -4337,15 +4337,15 @@ maybe_start_bgworkers(void)
 static bool
 maybe_reap_io_worker(int pid)
 {
-	for (int id = 0; id < MAX_IO_WORKERS; ++id)
+	for (int i = 0; i < MAX_IO_WORKERS; ++i)
 	{
-		if (io_worker_children[id] &&
-			io_worker_children[id]->pid == pid)
+		if (io_worker_children[i] &&
+			io_worker_children[i]->pid == pid)
 		{
-			ReleasePostmasterChildSlot(io_worker_children[id]);
+			ReleasePostmasterChildSlot(io_worker_children[i]);
 
 			--io_worker_count;
-			io_worker_children[id] = NULL;
+			io_worker_children[i] = NULL;
 			return true;
 		}
 	}
@@ -4389,22 +4389,22 @@ maybe_adjust_io_workers(void)
 	while (io_worker_count < io_workers)
 	{
 		PMChild    *child;
-		int			id;
+		int			i;
 
 		/* find unused entry in io_worker_children array */
-		for (id = 0; id < MAX_IO_WORKERS; ++id)
+		for (i = 0; i < MAX_IO_WORKERS; ++i)
 		{
-			if (io_worker_children[id] == NULL)
+			if (io_worker_children[i] == NULL)
 				break;
 		}
-		if (id == MAX_IO_WORKERS)
-			elog(ERROR, "could not find a free IO worker ID");
+		if (i == MAX_IO_WORKERS)
+			elog(ERROR, "could not find a free IO worker slot");
 
 		/* Try to launch one. */
 		child = StartChildProcess(B_IO_WORKER);
 		if (child != NULL)
 		{
-			io_worker_children[id] = child;
+			io_worker_children[i] = child;
 			++io_worker_count;
 		}
 		else
@@ -4415,11 +4415,11 @@ maybe_adjust_io_workers(void)
 	if (io_worker_count > io_workers)
 	{
 		/* ask the IO worker in the highest slot to exit */
-		for (int id = MAX_IO_WORKERS - 1; id >= 0; --id)
+		for (int i = MAX_IO_WORKERS - 1; i >= 0; --i)
 		{
-			if (io_worker_children[id] != NULL)
+			if (io_worker_children[i] != NULL)
 			{
-				kill(io_worker_children[id]->pid, SIGUSR2);
+				kill(io_worker_children[i]->pid, SIGUSR2);
 				break;
 			}
 		}
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 7149a67fcbc..27e86cf393f 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -192,10 +192,6 @@ ProcessStartupProcInterrupts(void)
 	/* Perform logging of memory contexts of this process */
 	if (LogMemoryContextPending)
 		ProcessLogMemoryContextInterrupt();
-
-	/* Publish memory contexts of this process */
-	if (PublishMemoryContextPending)
-		ProcessGetMemoryContextInterrupt();
 }
 
 
diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c
index c7a76711cc5..777c9a8d555 100644
--- a/src/backend/postmaster/walsummarizer.c
+++ b/src/backend/postmaster/walsummarizer.c
@@ -385,7 +385,7 @@ WalSummarizerMain(const void *startup_data, size_t startup_data_len)
 
 			switch_lsn = tliSwitchPoint(current_tli, tles, &switch_tli);
 			ereport(DEBUG1,
-					errmsg_internal("switch point from TLI %u to TLI %u is at %X/%X",
+					errmsg_internal("switch point from TLI %u to TLI %u is at %X/%08X",
 									current_tli, switch_tli, LSN_FORMAT_ARGS(switch_lsn)));
 		}
 
@@ -741,7 +741,7 @@ WaitForWalSummarization(XLogRecPtr lsn)
 				ereport(ERROR,
 						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 						 errmsg("WAL summarization is not progressing"),
-						 errdetail("Summarization is needed through %X/%X, but is stuck at %X/%X on disk and %X/%X in memory.",
+						 errdetail("Summarization is needed through %X/%08X, but is stuck at %X/%08X on disk and %X/%08X in memory.",
 								   LSN_FORMAT_ARGS(lsn),
 								   LSN_FORMAT_ARGS(summarized_lsn),
 								   LSN_FORMAT_ARGS(pending_lsn))));
@@ -755,12 +755,12 @@ WaitForWalSummarization(XLogRecPtr lsn)
 												current_time) / 1000;
 			ereport(WARNING,
 					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-					 errmsg_plural("still waiting for WAL summarization through %X/%X after %ld second",
-								   "still waiting for WAL summarization through %X/%X after %ld seconds",
+					 errmsg_plural("still waiting for WAL summarization through %X/%08X after %ld second",
+								   "still waiting for WAL summarization through %X/%08X after %ld seconds",
 								   elapsed_seconds,
 								   LSN_FORMAT_ARGS(lsn),
 								   elapsed_seconds),
-					 errdetail("Summarization has reached %X/%X on disk and %X/%X in memory.",
+					 errdetail("Summarization has reached %X/%08X on disk and %X/%08X in memory.",
 							   LSN_FORMAT_ARGS(summarized_lsn),
 							   LSN_FORMAT_ARGS(pending_lsn))));
 		}
@@ -879,10 +879,6 @@ ProcessWalSummarizerInterrupts(void)
 	/* Perform logging of memory contexts of this process */
 	if (LogMemoryContextPending)
 		ProcessLogMemoryContextInterrupt();
-
-	/* Publish memory contexts of this process */
-	if (PublishMemoryContextPending)
-		ProcessGetMemoryContextInterrupt();
 }
 
 /*
@@ -985,7 +981,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
 			if (private_data->end_of_wal)
 			{
 				ereport(DEBUG1,
-						errmsg_internal("could not read WAL from timeline %u at %X/%X: end of WAL at %X/%X",
+						errmsg_internal("could not read WAL from timeline %u at %X/%08X: end of WAL at %X/%08X",
 										tli,
 										LSN_FORMAT_ARGS(start_lsn),
 										LSN_FORMAT_ARGS(private_data->read_upto)));
@@ -1004,8 +1000,8 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
 			}
 			else
 				ereport(ERROR,
-						(errmsg("could not find a valid record after %X/%X",
-								LSN_FORMAT_ARGS(start_lsn))));
+						errmsg("could not find a valid record after %X/%08X",
+							   LSN_FORMAT_ARGS(start_lsn)));
 		}
 
 		/* We shouldn't go backward. */
@@ -1038,7 +1034,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
 				 * able to read a complete record.
 				 */
 				ereport(DEBUG1,
-						errmsg_internal("could not read WAL from timeline %u at %X/%X: end of WAL at %X/%X",
+						errmsg_internal("could not read WAL from timeline %u at %X/%08X: end of WAL at %X/%08X",
 										tli,
 										LSN_FORMAT_ARGS(xlogreader->EndRecPtr),
 										LSN_FORMAT_ARGS(private_data->read_upto)));
@@ -1049,13 +1045,13 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
 			if (errormsg)
 				ereport(ERROR,
 						(errcode_for_file_access(),
-						 errmsg("could not read WAL from timeline %u at %X/%X: %s",
+						 errmsg("could not read WAL from timeline %u at %X/%08X: %s",
 								tli, LSN_FORMAT_ARGS(xlogreader->EndRecPtr),
 								errormsg)));
 			else
 				ereport(ERROR,
 						(errcode_for_file_access(),
-						 errmsg("could not read WAL from timeline %u at %X/%X",
+						 errmsg("could not read WAL from timeline %u at %X/%08X",
 								tli, LSN_FORMAT_ARGS(xlogreader->EndRecPtr))));
 		}
 
@@ -1226,7 +1222,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
 
 		/* Tell the user what we did. */
 		ereport(DEBUG1,
-				errmsg_internal("summarized WAL on TLI %u from %X/%X to %X/%X",
+				errmsg_internal("summarized WAL on TLI %u from %X/%08X to %X/%08X",
 								tli,
 								LSN_FORMAT_ARGS(summary_start_lsn),
 								LSN_FORMAT_ARGS(summary_end_lsn)));
@@ -1238,7 +1234,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact,
 	/* If we skipped a non-zero amount of WAL, log a debug message. */
 	if (summary_end_lsn > summary_start_lsn && fast_forward)
 		ereport(DEBUG1,
-				errmsg_internal("skipped summarizing WAL on TLI %u from %X/%X to %X/%X",
+				errmsg_internal("skipped summarizing WAL on TLI %u from %X/%08X to %X/%08X",
 								tli,
 								LSN_FORMAT_ARGS(summary_start_lsn),
 								LSN_FORMAT_ARGS(summary_end_lsn)));
@@ -1584,7 +1580,7 @@ summarizer_read_local_xlog_page(XLogReaderState *state,
 
 					/* Debugging output. */
 					ereport(DEBUG1,
-							errmsg_internal("timeline %u became historic, can read up to %X/%X",
+							errmsg_internal("timeline %u became historic, can read up to %X/%08X",
 											private_data->tli, LSN_FORMAT_ARGS(private_data->read_upto)));
 				}
 
diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index 78193cfb964..d9eab5357bc 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -20,58 +20,13 @@
 #include "common/unicode_category.h"
 #include "utils/pg_locale.h"
 
-/*
- * For the libc provider, to provide as much functionality as possible on a
- * variety of platforms without going so far as to implement everything from
- * scratch, we use several implementation strategies depending on the
- * situation:
- *
- * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
- * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
- * collations don't give a fig about multibyte characters.
- *
- * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
- * This assumes that every platform uses Unicode codepoints directly
- * as the wchar_t representation of Unicode.  (XXX: ICU makes this assumption
- * even for non-UTF8 encodings, which may be a problem.)  On some platforms
- * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
- *
- * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
- * values up to 255, and punt for values above that.  This is 100% correct
- * only in single-byte encodings such as LATINn.  However, non-Unicode
- * multibyte encodings are mostly Far Eastern character sets for which the
- * properties being tested here aren't very relevant for higher code values
- * anyway.  The difficulty with using the <wctype.h> functions with
- * non-Unicode multibyte encodings is that we can have no certainty that
- * the platform's wchar_t representation matches what we do in pg_wchar
- * conversions.
- *
- * As a special case, in the "default" collation, (2) and (3) force ASCII
- * letters to follow ASCII upcase/downcase rules, while in a non-default
- * collation we just let the library functions do what they will.  The case
- * where this matters is treatment of I/i in Turkish, and the behavior is
- * meant to match the upper()/lower() SQL functions.
- *
- * We store the active collation setting in static variables.  In principle
- * it could be passed down to here via the regex library's "struct vars" data
- * structure; but that would require somewhat invasive changes in the regex
- * library, and right now there's no real benefit to be gained from that.
- *
- * NB: the coding here assumes pg_wchar is an unsigned type.
- */
-
-typedef enum
-{
-	PG_REGEX_STRATEGY_C,		/* C locale (encoding independent) */
-	PG_REGEX_STRATEGY_BUILTIN,	/* built-in Unicode semantics */
-	PG_REGEX_STRATEGY_LIBC_WIDE,	/* Use locale_t <wctype.h> functions */
-	PG_REGEX_STRATEGY_LIBC_1BYTE,	/* Use locale_t <ctype.h> functions */
-	PG_REGEX_STRATEGY_ICU,		/* Use ICU uchar.h functions */
-} PG_Locale_Strategy;
-
-static PG_Locale_Strategy pg_regex_strategy;
 static pg_locale_t pg_regex_locale;
 
+static struct pg_locale_struct dummy_c_locale = {
+	.collate_is_c = true,
+	.ctype_is_c = true,
+};
+
 /*
  * Hard-wired character properties for C locale
  */
@@ -228,7 +183,6 @@ void
 pg_set_regex_collation(Oid collation)
 {
 	pg_locale_t locale = 0;
-	PG_Locale_Strategy strategy;
 
 	if (!OidIsValid(collation))
 	{
@@ -249,8 +203,7 @@ pg_set_regex_collation(Oid collation)
 		 * catalog access is available, so we can't call
 		 * pg_newlocale_from_collation().
 		 */
-		strategy = PG_REGEX_STRATEGY_C;
-		locale = 0;
+		locale = &dummy_c_locale;
 	}
 	else
 	{
@@ -267,113 +220,41 @@ pg_set_regex_collation(Oid collation)
 			 * C/POSIX collations use this path regardless of database
 			 * encoding
 			 */
-			strategy = PG_REGEX_STRATEGY_C;
-			locale = 0;
-		}
-		else if (locale->provider == COLLPROVIDER_BUILTIN)
-		{
-			Assert(GetDatabaseEncoding() == PG_UTF8);
-			strategy = PG_REGEX_STRATEGY_BUILTIN;
-		}
-#ifdef USE_ICU
-		else if (locale->provider == COLLPROVIDER_ICU)
-		{
-			strategy = PG_REGEX_STRATEGY_ICU;
-		}
-#endif
-		else
-		{
-			Assert(locale->provider == COLLPROVIDER_LIBC);
-			if (GetDatabaseEncoding() == PG_UTF8)
-				strategy = PG_REGEX_STRATEGY_LIBC_WIDE;
-			else
-				strategy = PG_REGEX_STRATEGY_LIBC_1BYTE;
+			locale = &dummy_c_locale;
 		}
 	}
 
-	pg_regex_strategy = strategy;
 	pg_regex_locale = locale;
 }
 
 static int
 pg_wc_isdigit(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISDIGIT));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isdigit(c, !pg_regex_locale->info.builtin.casemap_full);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isdigit(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISDIGIT));
+	else
+		return pg_regex_locale->ctype->wc_isdigit(c, pg_regex_locale);
 }
 
 static int
 pg_wc_isalpha(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISALPHA));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isalpha(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isalpha(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISALPHA));
+	else
+		return pg_regex_locale->ctype->wc_isalpha(c, pg_regex_locale);
 }
 
 static int
 pg_wc_isalnum(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISALNUM));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isalnum(c, !pg_regex_locale->info.builtin.casemap_full);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isalnum(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISALNUM));
+	else
+		return pg_regex_locale->ctype->wc_isalnum(c, pg_regex_locale);
 }
 
 static int
@@ -388,231 +269,87 @@ pg_wc_isword(pg_wchar c)
 static int
 pg_wc_isupper(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISUPPER));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isupper(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isupper_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isupper(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISUPPER));
+	else
+		return pg_regex_locale->ctype->wc_isupper(c, pg_regex_locale);
 }
 
 static int
 pg_wc_islower(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISLOWER));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_islower(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					islower_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_islower(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISLOWER));
+	else
+		return pg_regex_locale->ctype->wc_islower(c, pg_regex_locale);
 }
 
 static int
 pg_wc_isgraph(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISGRAPH));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isgraph(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isgraph(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISGRAPH));
+	else
+		return pg_regex_locale->ctype->wc_isgraph(c, pg_regex_locale);
 }
 
 static int
 pg_wc_isprint(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISPRINT));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isprint(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isprint_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isprint(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISPRINT));
+	else
+		return pg_regex_locale->ctype->wc_isprint(c, pg_regex_locale);
 }
 
 static int
 pg_wc_ispunct(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISPUNCT));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_ispunct(c, !pg_regex_locale->info.builtin.casemap_full);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_ispunct(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISPUNCT));
+	else
+		return pg_regex_locale->ctype->wc_ispunct(c, pg_regex_locale);
 }
 
 static int
 pg_wc_isspace(pg_wchar c)
 {
-	switch (pg_regex_strategy)
-	{
-		case PG_REGEX_STRATEGY_C:
-			return (c <= (pg_wchar) 127 &&
-					(pg_char_properties[c] & PG_ISSPACE));
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return pg_u_isspace(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			return (c <= (pg_wchar) UCHAR_MAX &&
-					isspace_l((unsigned char) c, pg_regex_locale->info.lt));
-			break;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_isspace(c);
-#endif
-			break;
-	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	if (pg_regex_locale->ctype_is_c)
+		return (c <= (pg_wchar) 127 &&
+				(pg_char_properties[c] & PG_ISSPACE));
+	else
+		return pg_regex_locale->ctype->wc_isspace(c, pg_regex_locale);
 }
 
 static pg_wchar
 pg_wc_toupper(pg_wchar c)
 {
-	switch (pg_regex_strategy)
+	if (pg_regex_locale->ctype_is_c)
 	{
-		case PG_REGEX_STRATEGY_C:
-			if (c <= (pg_wchar) 127)
-				return pg_ascii_toupper((unsigned char) c);
-			return c;
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return unicode_uppercase_simple(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			/* force C behavior for ASCII characters, per comments above */
-			if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
-				return pg_ascii_toupper((unsigned char) c);
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return towupper_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			/* force C behavior for ASCII characters, per comments above */
-			if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
-				return pg_ascii_toupper((unsigned char) c);
-			if (c <= (pg_wchar) UCHAR_MAX)
-				return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
-			return c;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_toupper(c);
-#endif
-			break;
+		if (c <= (pg_wchar) 127)
+			return pg_ascii_toupper((unsigned char) c);
+		return c;
 	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	else
+		return pg_regex_locale->ctype->wc_toupper(c, pg_regex_locale);
 }
 
 static pg_wchar
 pg_wc_tolower(pg_wchar c)
 {
-	switch (pg_regex_strategy)
+	if (pg_regex_locale->ctype_is_c)
 	{
-		case PG_REGEX_STRATEGY_C:
-			if (c <= (pg_wchar) 127)
-				return pg_ascii_tolower((unsigned char) c);
-			return c;
-		case PG_REGEX_STRATEGY_BUILTIN:
-			return unicode_lowercase_simple(c);
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			/* force C behavior for ASCII characters, per comments above */
-			if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
-				return pg_ascii_tolower((unsigned char) c);
-			if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
-				return towlower_l((wint_t) c, pg_regex_locale->info.lt);
-			/* FALL THRU */
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-			/* force C behavior for ASCII characters, per comments above */
-			if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
-				return pg_ascii_tolower((unsigned char) c);
-			if (c <= (pg_wchar) UCHAR_MAX)
-				return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
-			return c;
-		case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
-			return u_tolower(c);
-#endif
-			break;
+		if (c <= (pg_wchar) 127)
+			return pg_ascii_tolower((unsigned char) c);
+		return c;
 	}
-	return 0;					/* can't get here, but keep compiler quiet */
+	else
+		return pg_regex_locale->ctype->wc_tolower(c, pg_regex_locale);
 }
 
 
@@ -738,37 +475,25 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
 	 * would always be true for production values of MAX_SIMPLE_CHR, but it's
 	 * useful to allow it to be small for testing purposes.)
 	 */
-	switch (pg_regex_strategy)
+	if (pg_regex_locale->ctype_is_c)
 	{
-		case PG_REGEX_STRATEGY_C:
 #if MAX_SIMPLE_CHR >= 127
-			max_chr = (pg_wchar) 127;
-			pcc->cv.cclasscode = -1;
+		max_chr = (pg_wchar) 127;
+		pcc->cv.cclasscode = -1;
 #else
-			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
+		max_chr = (pg_wchar) MAX_SIMPLE_CHR;
 #endif
-			break;
-		case PG_REGEX_STRATEGY_BUILTIN:
-			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-			break;
-		case PG_REGEX_STRATEGY_LIBC_WIDE:
-			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-			break;
-		case PG_REGEX_STRATEGY_LIBC_1BYTE:
-#if MAX_SIMPLE_CHR >= UCHAR_MAX
-			max_chr = (pg_wchar) UCHAR_MAX;
+	}
+	else
+	{
+		if (pg_regex_locale->ctype->max_chr != 0 &&
+			pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR)
+		{
+			max_chr = pg_regex_locale->ctype->max_chr;
 			pcc->cv.cclasscode = -1;
-#else
-			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-#endif
-			break;
-		case PG_REGEX_STRATEGY_ICU:
+		}
+		else
 			max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-			break;
-		default:
-			Assert(false);
-			max_chr = 0;		/* can't get here, but keep compiler quiet */
-			break;
 	}
 
 	/*
diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
index 7b4ddf7a8f5..f7b5d093681 100644
--- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
+++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c
@@ -534,7 +534,7 @@ libpqrcv_startstreaming(WalReceiverConn *conn,
 	if (options->logical)
 		appendStringInfoString(&cmd, " LOGICAL");
 
-	appendStringInfo(&cmd, " %X/%X", LSN_FORMAT_ARGS(options->startpoint));
+	appendStringInfo(&cmd, " %X/%08X", LSN_FORMAT_ARGS(options->startpoint));
 
 	/*
 	 * Additional options are different depending on if we are doing logical
diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c
index 10677da56b2..4aed0dfcebb 100644
--- a/src/backend/replication/logical/launcher.c
+++ b/src/backend/replication/logical/launcher.c
@@ -175,12 +175,14 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
 							   uint16 generation,
 							   BackgroundWorkerHandle *handle)
 {
-	BgwHandleStatus status;
-	int			rc;
+	bool		result = false;
+	bool		dropped_latch = false;
 
 	for (;;)
 	{
+		BgwHandleStatus status;
 		pid_t		pid;
+		int			rc;
 
 		CHECK_FOR_INTERRUPTS();
 
@@ -189,8 +191,9 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
 		/* Worker either died or has started. Return false if died. */
 		if (!worker->in_use || worker->proc)
 		{
+			result = worker->in_use;
 			LWLockRelease(LogicalRepWorkerLock);
-			return worker->in_use;
+			break;
 		}
 
 		LWLockRelease(LogicalRepWorkerLock);
@@ -205,7 +208,7 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
 			if (generation == worker->generation)
 				logicalrep_worker_cleanup(worker);
 			LWLockRelease(LogicalRepWorkerLock);
-			return false;
+			break;				/* result is already false */
 		}
 
 		/*
@@ -220,8 +223,18 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
 		{
 			ResetLatch(MyLatch);
 			CHECK_FOR_INTERRUPTS();
+			dropped_latch = true;
 		}
 	}
+
+	/*
+	 * If we had to clear a latch event in order to wait, be sure to restore
+	 * it before exiting.  Otherwise caller may miss events.
+	 */
+	if (dropped_latch)
+		SetLatch(MyLatch);
+
+	return result;
 }
 
 /*
@@ -328,7 +341,7 @@ logicalrep_worker_launch(LogicalRepWorkerType wtype,
 	if (max_active_replication_origins == 0)
 		ereport(ERROR,
 				(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
-				 errmsg("cannot start logical replication workers when \"max_active_replication_origins\"=0")));
+				 errmsg("cannot start logical replication workers when \"max_active_replication_origins\" is 0")));
 
 	/*
 	 * We need to do the modification of the shared memory under lock so that
@@ -1016,7 +1029,7 @@ logicalrep_launcher_attach_dshmem(void)
 		last_start_times_dsa = dsa_attach(LogicalRepCtx->last_start_dsa);
 		dsa_pin_mapping(last_start_times_dsa);
 		last_start_times = dshash_attach(last_start_times_dsa, &dsh_params,
-										 LogicalRepCtx->last_start_dsh, 0);
+										 LogicalRepCtx->last_start_dsh, NULL);
 	}
 
 	MemoryContextSwitchTo(oldcontext);
@@ -1194,10 +1207,21 @@ ApplyLauncherMain(Datum main_arg)
 				(elapsed = TimestampDifferenceMilliseconds(last_start, now)) >= wal_retrieve_retry_interval)
 			{
 				ApplyLauncherSetWorkerStartTime(sub->oid, now);
-				logicalrep_worker_launch(WORKERTYPE_APPLY,
-										 sub->dbid, sub->oid, sub->name,
-										 sub->owner, InvalidOid,
-										 DSM_HANDLE_INVALID);
+				if (!logicalrep_worker_launch(WORKERTYPE_APPLY,
+											  sub->dbid, sub->oid, sub->name,
+											  sub->owner, InvalidOid,
+											  DSM_HANDLE_INVALID))
+				{
+					/*
+					 * We get here either if we failed to launch a worker
+					 * (perhaps for resource-exhaustion reasons) or if we
+					 * launched one but it immediately quit.  Either way, it
+					 * seems appropriate to try again after
+					 * wal_retrieve_retry_interval.
+					 */
+					wait_time = Min(wait_time,
+									wal_retrieve_retry_interval);
+				}
 			}
 			else
 			{
diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c
index 1d56d0c4ef3..7e363a7c05b 100644
--- a/src/backend/replication/logical/logical.c
+++ b/src/backend/replication/logical/logical.c
@@ -29,6 +29,7 @@
 #include "postgres.h"
 
 #include "access/xact.h"
+#include "access/xlog_internal.h"
 #include "access/xlogutils.h"
 #include "fmgr.h"
 #include "miscadmin.h"
@@ -41,6 +42,7 @@
 #include "storage/proc.h"
 #include "storage/procarray.h"
 #include "utils/builtins.h"
+#include "utils/injection_point.h"
 #include "utils/inval.h"
 #include "utils/memutils.h"
 
@@ -565,7 +567,7 @@ CreateDecodingContext(XLogRecPtr start_lsn,
 		 * kinds of client errors; so the client may wish to check that
 		 * confirmed_flush_lsn matches its expectations.
 		 */
-		elog(LOG, "%X/%X has been already streamed, forwarding to %X/%X",
+		elog(LOG, "%X/%08X has been already streamed, forwarding to %X/%08X",
 			 LSN_FORMAT_ARGS(start_lsn),
 			 LSN_FORMAT_ARGS(slot->data.confirmed_flush));
 
@@ -608,7 +610,7 @@ CreateDecodingContext(XLogRecPtr start_lsn,
 	ereport(LOG,
 			(errmsg("starting logical decoding for slot \"%s\"",
 					NameStr(slot->data.name)),
-			 errdetail("Streaming transactions committing after %X/%X, reading WAL from %X/%X.",
+			 errdetail("Streaming transactions committing after %X/%08X, reading WAL from %X/%08X.",
 					   LSN_FORMAT_ARGS(slot->data.confirmed_flush),
 					   LSN_FORMAT_ARGS(slot->data.restart_lsn))));
 
@@ -635,7 +637,7 @@ DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
 	/* Initialize from where to start reading WAL. */
 	XLogBeginRead(ctx->reader, slot->data.restart_lsn);
 
-	elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%X",
+	elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%08X",
 		 LSN_FORMAT_ARGS(slot->data.restart_lsn));
 
 	/* Wait for a consistent starting point */
@@ -756,7 +758,7 @@ output_plugin_error_callback(void *arg)
 
 	/* not all callbacks have an associated LSN  */
 	if (state->report_location != InvalidXLogRecPtr)
-		errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%X",
+		errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%08X",
 				   NameStr(state->ctx->slot->data.name),
 				   NameStr(state->ctx->slot->data.plugin),
 				   state->callback_name,
@@ -1723,7 +1725,7 @@ LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
 	SpinLockRelease(&slot->mutex);
 
 	if (got_new_xmin)
-		elog(DEBUG1, "got new catalog xmin %u at %X/%X", xmin,
+		elog(DEBUG1, "got new catalog xmin %u at %X/%08X", xmin,
 			 LSN_FORMAT_ARGS(current_lsn));
 
 	/* candidate already valid with the current flush position, apply */
@@ -1783,7 +1785,7 @@ LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart
 		slot->candidate_restart_lsn = restart_lsn;
 		SpinLockRelease(&slot->mutex);
 
-		elog(DEBUG1, "got new restart lsn %X/%X at %X/%X",
+		elog(DEBUG1, "got new restart lsn %X/%08X at %X/%08X",
 			 LSN_FORMAT_ARGS(restart_lsn),
 			 LSN_FORMAT_ARGS(current_lsn));
 	}
@@ -1798,7 +1800,7 @@ LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart
 		confirmed_flush = slot->data.confirmed_flush;
 		SpinLockRelease(&slot->mutex);
 
-		elog(DEBUG1, "failed to increase restart lsn: proposed %X/%X, after %X/%X, current candidate %X/%X, current after %X/%X, flushed up to %X/%X",
+		elog(DEBUG1, "failed to increase restart lsn: proposed %X/%08X, after %X/%08X, current candidate %X/%08X, current after %X/%08X, flushed up to %X/%08X",
 			 LSN_FORMAT_ARGS(restart_lsn),
 			 LSN_FORMAT_ARGS(current_lsn),
 			 LSN_FORMAT_ARGS(candidate_restart_lsn),
@@ -1825,9 +1827,13 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn)
 	{
 		bool		updated_xmin = false;
 		bool		updated_restart = false;
+		XLogRecPtr	restart_lsn pg_attribute_unused();
 
 		SpinLockAcquire(&MyReplicationSlot->mutex);
 
+		/* remember the old restart lsn */
+		restart_lsn = MyReplicationSlot->data.restart_lsn;
+
 		/*
 		 * Prevent moving the confirmed_flush backwards, as this could lead to
 		 * data duplication issues caused by replicating already replicated
@@ -1881,6 +1887,18 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn)
 		/* first write new xmin to disk, so we know what's up after a crash */
 		if (updated_xmin || updated_restart)
 		{
+#ifdef USE_INJECTION_POINTS
+			XLogSegNo	seg1,
+						seg2;
+
+			XLByteToSeg(restart_lsn, seg1, wal_segment_size);
+			XLByteToSeg(MyReplicationSlot->data.restart_lsn, seg2, wal_segment_size);
+
+			/* trigger injection point, but only if segment changes */
+			if (seg1 != seg2)
+				INJECTION_POINT("logical-replication-slot-advance-segment", NULL);
+#endif
+
 			ReplicationSlotMarkDirty();
 			ReplicationSlotSave();
 			elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart);
diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c
index a17bacf88e7..87f10e50dcc 100644
--- a/src/backend/replication/logical/origin.c
+++ b/src/backend/replication/logical/origin.c
@@ -826,9 +826,9 @@ StartupReplicationOrigin(void)
 		last_state++;
 
 		ereport(LOG,
-				(errmsg("recovered replication state of node %d to %X/%X",
-						disk_state.roident,
-						LSN_FORMAT_ARGS(disk_state.remote_lsn))));
+				errmsg("recovered replication state of node %d to %X/%08X",
+					   disk_state.roident,
+					   LSN_FORMAT_ARGS(disk_state.remote_lsn)));
 	}
 
 	/* now check checksum */
diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 67655111875..7b4e8629553 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -109,10 +109,22 @@
 #include "storage/procarray.h"
 #include "storage/sinval.h"
 #include "utils/builtins.h"
+#include "utils/inval.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
 #include "utils/relfilenumbermap.h"
 
+/*
+ * Each transaction has an 8MB limit for invalidation messages distributed from
+ * other transactions. This limit is set considering scenarios with many
+ * concurrent logical decoding operations. When the distributed invalidation
+ * messages reach this threshold, the transaction is marked as
+ * RBTXN_DISTR_INVAL_OVERFLOWED to invalidate the complete cache as we have lost
+ * some inval messages and hence don't know what needs to be invalidated.
+ */
+#define MAX_DISTR_INVAL_MSG_PER_TXN \
+	((8 * 1024 * 1024) / sizeof(SharedInvalidationMessage))
+
 /* entry for a hash table we use to map from xid to our transaction state */
 typedef struct ReorderBufferTXNByIdEnt
 {
@@ -472,6 +484,12 @@ ReorderBufferFreeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
 		txn->invalidations = NULL;
 	}
 
+	if (txn->invalidations_distributed)
+	{
+		pfree(txn->invalidations_distributed);
+		txn->invalidations_distributed = NULL;
+	}
+
 	/* Reset the toast hash */
 	ReorderBufferToastReset(rb, txn);
 
@@ -1397,7 +1415,7 @@ ReorderBufferIterTXNNext(ReorderBuffer *rb, ReorderBufferIterTXNState *state)
 	int32		off;
 
 	/* nothing there anymore */
-	if (state->heap->bh_size == 0)
+	if (binaryheap_empty(state->heap))
 		return NULL;
 
 	off = DatumGetInt32(binaryheap_first(state->heap));
@@ -2661,7 +2679,17 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
 		AbortCurrentTransaction();
 
 		/* make sure there's no cache pollution */
-		ReorderBufferExecuteInvalidations(txn->ninvalidations, txn->invalidations);
+		if (rbtxn_distr_inval_overflowed(txn))
+		{
+			Assert(txn->ninvalidations_distributed == 0);
+			InvalidateSystemCaches();
+		}
+		else
+		{
+			ReorderBufferExecuteInvalidations(txn->ninvalidations, txn->invalidations);
+			ReorderBufferExecuteInvalidations(txn->ninvalidations_distributed,
+											  txn->invalidations_distributed);
+		}
 
 		if (using_subtxn)
 			RollbackAndReleaseCurrentSubTransaction();
@@ -2710,8 +2738,17 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
 		AbortCurrentTransaction();
 
 		/* make sure there's no cache pollution */
-		ReorderBufferExecuteInvalidations(txn->ninvalidations,
-										  txn->invalidations);
+		if (rbtxn_distr_inval_overflowed(txn))
+		{
+			Assert(txn->ninvalidations_distributed == 0);
+			InvalidateSystemCaches();
+		}
+		else
+		{
+			ReorderBufferExecuteInvalidations(txn->ninvalidations, txn->invalidations);
+			ReorderBufferExecuteInvalidations(txn->ninvalidations_distributed,
+											  txn->invalidations_distributed);
+		}
 
 		if (using_subtxn)
 			RollbackAndReleaseCurrentSubTransaction();
@@ -3060,7 +3097,8 @@ ReorderBufferAbort(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn,
 		 * We might have decoded changes for this transaction that could load
 		 * the cache as per the current transaction's view (consider DDL's
 		 * happened in this transaction). We don't want the decoding of future
-		 * transactions to use those cache entries so execute invalidations.
+		 * transactions to use those cache entries so execute only the inval
+		 * messages in this transaction.
 		 */
 		if (txn->ninvalidations > 0)
 			ReorderBufferImmediateInvalidation(rb, txn->ninvalidations,
@@ -3147,9 +3185,10 @@ ReorderBufferForget(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
 	txn->final_lsn = lsn;
 
 	/*
-	 * Process cache invalidation messages if there are any. Even if we're not
-	 * interested in the transaction's contents, it could have manipulated the
-	 * catalog and we need to update the caches according to that.
+	 * Process only cache invalidation messages in this transaction if there
+	 * are any. Even if we're not interested in the transaction's contents, it
+	 * could have manipulated the catalog and we need to update the caches
+	 * according to that.
 	 */
 	if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
 		ReorderBufferImmediateInvalidation(rb, txn->ninvalidations,
@@ -3422,6 +3461,57 @@ ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid,
 }
 
 /*
+ * Add new invalidation messages to the reorder buffer queue.
+ */
+static void
+ReorderBufferQueueInvalidations(ReorderBuffer *rb, TransactionId xid,
+								XLogRecPtr lsn, Size nmsgs,
+								SharedInvalidationMessage *msgs)
+{
+	ReorderBufferChange *change;
+
+	change = ReorderBufferAllocChange(rb);
+	change->action = REORDER_BUFFER_CHANGE_INVALIDATION;
+	change->data.inval.ninvalidations = nmsgs;
+	change->data.inval.invalidations = (SharedInvalidationMessage *)
+		palloc(sizeof(SharedInvalidationMessage) * nmsgs);
+	memcpy(change->data.inval.invalidations, msgs,
+		   sizeof(SharedInvalidationMessage) * nmsgs);
+
+	ReorderBufferQueueChange(rb, xid, lsn, change, false);
+}
+
+/*
+ * A helper function for ReorderBufferAddInvalidations() and
+ * ReorderBufferAddDistributedInvalidations() to accumulate the invalidation
+ * messages to the **invals_out.
+ */
+static void
+ReorderBufferAccumulateInvalidations(SharedInvalidationMessage **invals_out,
+									 uint32 *ninvals_out,
+									 SharedInvalidationMessage *msgs_new,
+									 Size nmsgs_new)
+{
+	if (*ninvals_out == 0)
+	{
+		*ninvals_out = nmsgs_new;
+		*invals_out = (SharedInvalidationMessage *)
+			palloc(sizeof(SharedInvalidationMessage) * nmsgs_new);
+		memcpy(*invals_out, msgs_new, sizeof(SharedInvalidationMessage) * nmsgs_new);
+	}
+	else
+	{
+		/* Enlarge the array of inval messages */
+		*invals_out = (SharedInvalidationMessage *)
+			repalloc(*invals_out, sizeof(SharedInvalidationMessage) *
+					 (*ninvals_out + nmsgs_new));
+		memcpy(*invals_out + *ninvals_out, msgs_new,
+			   nmsgs_new * sizeof(SharedInvalidationMessage));
+		*ninvals_out += nmsgs_new;
+	}
+}
+
+/*
  * Accumulate the invalidations for executing them later.
  *
  * This needs to be called for each XLOG_XACT_INVALIDATIONS message and
@@ -3441,7 +3531,6 @@ ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid,
 {
 	ReorderBufferTXN *txn;
 	MemoryContext oldcontext;
-	ReorderBufferChange *change;
 
 	txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
 
@@ -3456,35 +3545,76 @@ ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid,
 
 	Assert(nmsgs > 0);
 
-	/* Accumulate invalidations. */
-	if (txn->ninvalidations == 0)
-	{
-		txn->ninvalidations = nmsgs;
-		txn->invalidations = (SharedInvalidationMessage *)
-			palloc(sizeof(SharedInvalidationMessage) * nmsgs);
-		memcpy(txn->invalidations, msgs,
-			   sizeof(SharedInvalidationMessage) * nmsgs);
-	}
-	else
+	ReorderBufferAccumulateInvalidations(&txn->invalidations,
+										 &txn->ninvalidations,
+										 msgs, nmsgs);
+
+	ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
+
+	MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Accumulate the invalidations distributed by other committed transactions
+ * for executing them later.
+ *
+ * This function is similar to ReorderBufferAddInvalidations() but stores
+ * the given inval messages to the txn->invalidations_distributed with the
+ * overflow check.
+ *
+ * This needs to be called by committed transactions to distribute their
+ * inval messages to in-progress transactions.
+ */
+void
+ReorderBufferAddDistributedInvalidations(ReorderBuffer *rb, TransactionId xid,
+										 XLogRecPtr lsn, Size nmsgs,
+										 SharedInvalidationMessage *msgs)
+{
+	ReorderBufferTXN *txn;
+	MemoryContext oldcontext;
+
+	txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
+
+	oldcontext = MemoryContextSwitchTo(rb->context);
+
+	/*
+	 * Collect all the invalidations under the top transaction, if available,
+	 * so that we can execute them all together.  See comments
+	 * ReorderBufferAddInvalidations.
+	 */
+	txn = rbtxn_get_toptxn(txn);
+
+	Assert(nmsgs > 0);
+
+	if (!rbtxn_distr_inval_overflowed(txn))
 	{
-		txn->invalidations = (SharedInvalidationMessage *)
-			repalloc(txn->invalidations, sizeof(SharedInvalidationMessage) *
-					 (txn->ninvalidations + nmsgs));
+		/*
+		 * Check the transaction has enough space for storing distributed
+		 * invalidation messages.
+		 */
+		if (txn->ninvalidations_distributed + nmsgs >= MAX_DISTR_INVAL_MSG_PER_TXN)
+		{
+			/*
+			 * Mark the invalidation message as overflowed and free up the
+			 * messages accumulated so far.
+			 */
+			txn->txn_flags |= RBTXN_DISTR_INVAL_OVERFLOWED;
 
-		memcpy(txn->invalidations + txn->ninvalidations, msgs,
-			   nmsgs * sizeof(SharedInvalidationMessage));
-		txn->ninvalidations += nmsgs;
+			if (txn->invalidations_distributed)
+			{
+				pfree(txn->invalidations_distributed);
+				txn->invalidations_distributed = NULL;
+				txn->ninvalidations_distributed = 0;
+			}
+		}
+		else
+			ReorderBufferAccumulateInvalidations(&txn->invalidations_distributed,
+												 &txn->ninvalidations_distributed,
+												 msgs, nmsgs);
 	}
 
-	change = ReorderBufferAllocChange(rb);
-	change->action = REORDER_BUFFER_CHANGE_INVALIDATION;
-	change->data.inval.ninvalidations = nmsgs;
-	change->data.inval.invalidations = (SharedInvalidationMessage *)
-		palloc(sizeof(SharedInvalidationMessage) * nmsgs);
-	memcpy(change->data.inval.invalidations, msgs,
-		   sizeof(SharedInvalidationMessage) * nmsgs);
-
-	ReorderBufferQueueChange(rb, xid, lsn, change, false);
+	/* Queue the invalidation messages into the transaction */
+	ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
 
 	MemoryContextSwitchTo(oldcontext);
 }
diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c
index 656e66e0ae0..2f0c08b8fbd 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -211,9 +211,9 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
 		 * impact the users, so we used DEBUG1 level to log the message.
 		 */
 		ereport(slot->data.persistency == RS_TEMPORARY ? LOG : DEBUG1,
-				errmsg("could not synchronize replication slot \"%s\" because remote slot precedes local slot",
+				errmsg("could not synchronize replication slot \"%s\"",
 					   remote_slot->name),
-				errdetail("The remote slot has LSN %X/%X and catalog xmin %u, but the local slot has LSN %X/%X and catalog xmin %u.",
+				errdetail("Synchronization could lead to data loss, because the remote slot needs WAL at LSN %X/%08X and catalog xmin %u, but the standby has LSN %X/%08X and catalog xmin %u.",
 						  LSN_FORMAT_ARGS(remote_slot->restart_lsn),
 						  remote_slot->catalog_xmin,
 						  LSN_FORMAT_ARGS(slot->data.restart_lsn),
@@ -275,7 +275,7 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
 				ereport(ERROR,
 						errmsg_internal("synchronized confirmed_flush for slot \"%s\" differs from remote slot",
 										remote_slot->name),
-						errdetail_internal("Remote slot has LSN %X/%X but local slot has LSN %X/%X.",
+						errdetail_internal("Remote slot has LSN %X/%08X but local slot has LSN %X/%08X.",
 										   LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
 										   LSN_FORMAT_ARGS(slot->data.confirmed_flush)));
 		}
@@ -593,7 +593,7 @@ update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
 	{
 		ereport(LOG,
 				errmsg("could not synchronize replication slot \"%s\"", remote_slot->name),
-				errdetail("Logical decoding could not find consistent point from local slot's LSN %X/%X.",
+				errdetail("Synchronization could lead to data loss, because the standby could not build a consistent snapshot to decode WALs at LSN %X/%08X.",
 						  LSN_FORMAT_ARGS(slot->data.restart_lsn)));
 
 		return false;
@@ -642,7 +642,7 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
 		ereport(AmLogicalSlotSyncWorkerProcess() ? LOG : ERROR,
 				errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 				errmsg("skipping slot synchronization because the received slot sync"
-					   " LSN %X/%X for slot \"%s\" is ahead of the standby position %X/%X",
+					   " LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X",
 					   LSN_FORMAT_ARGS(remote_slot->confirmed_lsn),
 					   remote_slot->name,
 					   LSN_FORMAT_ARGS(latestFlushPtr)));
@@ -733,7 +733,7 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid)
 				ereport(ERROR,
 						errmsg_internal("cannot synchronize local slot \"%s\"",
 										remote_slot->name),
-						errdetail_internal("Local slot's start streaming location LSN(%X/%X) is ahead of remote slot's LSN(%X/%X).",
+						errdetail_internal("Local slot's start streaming location LSN(%X/%08X) is ahead of remote slot's LSN(%X/%08X).",
 										   LSN_FORMAT_ARGS(slot->data.confirmed_flush),
 										   LSN_FORMAT_ARGS(remote_slot->confirmed_lsn)));
 
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 0d7bddbe4ed..8532bfd27e5 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -774,7 +774,7 @@ SnapBuildDistributeSnapshotAndInval(SnapBuild *builder, XLogRecPtr lsn, Transact
 		if (rbtxn_is_prepared(txn))
 			continue;
 
-		elog(DEBUG2, "adding a new snapshot and invalidations to %u at %X/%X",
+		elog(DEBUG2, "adding a new snapshot and invalidations to %u at %X/%08X",
 			 txn->xid, LSN_FORMAT_ARGS(lsn));
 
 		/*
@@ -794,6 +794,13 @@ SnapBuildDistributeSnapshotAndInval(SnapBuild *builder, XLogRecPtr lsn, Transact
 		 * contents built by the current transaction even after its decoding,
 		 * which should have been invalidated due to concurrent catalog
 		 * changing transaction.
+		 *
+		 * Distribute only the invalidation messages generated by the current
+		 * committed transaction. Invalidation messages received from other
+		 * transactions would have already been propagated to the relevant
+		 * in-progress transactions. This transaction would have processed
+		 * those invalidations, ensuring that subsequent transactions observe
+		 * a consistent cache state.
 		 */
 		if (txn->xid != xid)
 		{
@@ -807,8 +814,9 @@ SnapBuildDistributeSnapshotAndInval(SnapBuild *builder, XLogRecPtr lsn, Transact
 			{
 				Assert(msgs != NULL);
 
-				ReorderBufferAddInvalidations(builder->reorder, txn->xid, lsn,
-											  ninvalidations, msgs);
+				ReorderBufferAddDistributedInvalidations(builder->reorder,
+														 txn->xid, lsn,
+														 ninvalidations, msgs);
 			}
 		}
 	}
@@ -1263,10 +1271,10 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
 									builder->initial_xmin_horizon))
 	{
 		ereport(DEBUG1,
-				(errmsg_internal("skipping snapshot at %X/%X while building logical decoding snapshot, xmin horizon too low",
-								 LSN_FORMAT_ARGS(lsn)),
-				 errdetail_internal("initial xmin horizon of %u vs the snapshot's %u",
-									builder->initial_xmin_horizon, running->oldestRunningXid)));
+				errmsg_internal("skipping snapshot at %X/%08X while building logical decoding snapshot, xmin horizon too low",
+								LSN_FORMAT_ARGS(lsn)),
+				errdetail_internal("initial xmin horizon of %u vs the snapshot's %u",
+								   builder->initial_xmin_horizon, running->oldestRunningXid));
 
 
 		SnapBuildWaitSnapshot(running, builder->initial_xmin_horizon);
@@ -1302,9 +1310,9 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
 		builder->next_phase_at = InvalidTransactionId;
 
 		ereport(LOG,
-				(errmsg("logical decoding found consistent point at %X/%X",
-						LSN_FORMAT_ARGS(lsn)),
-				 errdetail("There are no running transactions.")));
+				errmsg("logical decoding found consistent point at %X/%08X",
+					   LSN_FORMAT_ARGS(lsn)),
+				errdetail("There are no running transactions."));
 
 		return false;
 	}
@@ -1351,10 +1359,10 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
 		Assert(TransactionIdIsNormal(builder->xmax));
 
 		ereport(LOG,
-				(errmsg("logical decoding found initial starting point at %X/%X",
-						LSN_FORMAT_ARGS(lsn)),
-				 errdetail("Waiting for transactions (approximately %d) older than %u to end.",
-						   running->xcnt, running->nextXid)));
+				errmsg("logical decoding found initial starting point at %X/%08X",
+					   LSN_FORMAT_ARGS(lsn)),
+				errdetail("Waiting for transactions (approximately %d) older than %u to end.",
+						  running->xcnt, running->nextXid));
 
 		SnapBuildWaitSnapshot(running, running->nextXid);
 	}
@@ -1375,10 +1383,10 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
 		builder->next_phase_at = running->nextXid;
 
 		ereport(LOG,
-				(errmsg("logical decoding found initial consistent point at %X/%X",
-						LSN_FORMAT_ARGS(lsn)),
-				 errdetail("Waiting for transactions (approximately %d) older than %u to end.",
-						   running->xcnt, running->nextXid)));
+				errmsg("logical decoding found initial consistent point at %X/%08X",
+					   LSN_FORMAT_ARGS(lsn)),
+				errdetail("Waiting for transactions (approximately %d) older than %u to end.",
+						  running->xcnt, running->nextXid));
 
 		SnapBuildWaitSnapshot(running, running->nextXid);
 	}
@@ -1399,9 +1407,9 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn
 		builder->next_phase_at = InvalidTransactionId;
 
 		ereport(LOG,
-				(errmsg("logical decoding found consistent point at %X/%X",
-						LSN_FORMAT_ARGS(lsn)),
-				 errdetail("There are no old transactions anymore.")));
+				errmsg("logical decoding found consistent point at %X/%08X",
+					   LSN_FORMAT_ARGS(lsn)),
+				errdetail("There are no old transactions anymore."));
 	}
 
 	/*
@@ -1905,9 +1913,9 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
 	Assert(builder->state == SNAPBUILD_CONSISTENT);
 
 	ereport(LOG,
-			(errmsg("logical decoding found consistent point at %X/%X",
-					LSN_FORMAT_ARGS(lsn)),
-			 errdetail("Logical decoding will begin using saved snapshot.")));
+			errmsg("logical decoding found consistent point at %X/%08X",
+				   LSN_FORMAT_ARGS(lsn)),
+			errdetail("Logical decoding will begin using saved snapshot."));
 	return true;
 
 snapshot_not_interesting:
@@ -2053,7 +2061,7 @@ SnapBuildSnapshotExists(XLogRecPtr lsn)
 	int			ret;
 	struct stat stat_buf;
 
-	sprintf(path, "%s/%X-%X.snap",
+	sprintf(path, "%s/%08X-%08X.snap",
 			PG_LOGICAL_SNAPSHOTS_DIR,
 			LSN_FORMAT_ARGS(lsn));
 
diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c
index 8e1e8762f62..e4fd6347fd1 100644
--- a/src/backend/replication/logical/tablesync.c
+++ b/src/backend/replication/logical/tablesync.c
@@ -603,14 +603,19 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
 						TimestampDifferenceExceeds(hentry->last_start_time, now,
 												   wal_retrieve_retry_interval))
 					{
-						logicalrep_worker_launch(WORKERTYPE_TABLESYNC,
-												 MyLogicalRepWorker->dbid,
-												 MySubscription->oid,
-												 MySubscription->name,
-												 MyLogicalRepWorker->userid,
-												 rstate->relid,
-												 DSM_HANDLE_INVALID);
+						/*
+						 * Set the last_start_time even if we fail to start
+						 * the worker, so that we won't retry until
+						 * wal_retrieve_retry_interval has elapsed.
+						 */
 						hentry->last_start_time = now;
+						(void) logicalrep_worker_launch(WORKERTYPE_TABLESYNC,
+														MyLogicalRepWorker->dbid,
+														MySubscription->oid,
+														MySubscription->name,
+														MyLogicalRepWorker->userid,
+														rstate->relid,
+														DSM_HANDLE_INVALID);
 					}
 				}
 			}
@@ -1548,7 +1553,7 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos)
 copy_table_done:
 
 	elog(DEBUG1,
-		 "LogicalRepSyncTableStart: '%s' origin_startpos lsn %X/%X",
+		 "LogicalRepSyncTableStart: '%s' origin_startpos lsn %X/%08X",
 		 originname, LSN_FORMAT_ARGS(*origin_startpos));
 
 	/*
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 4151a4b2a96..c5fb627aa56 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -109,13 +109,6 @@
  * If ever a user needs to be aware of the tri-state value, they can fetch it
  * from the pg_subscription catalog (see column subtwophasestate).
  *
- * We don't allow to toggle two_phase option of a subscription because it can
- * lead to an inconsistent replica. Consider, initially, it was on and we have
- * received some prepare then we turn it off, now at commit time the server
- * will send the entire transaction data along with the commit. With some more
- * analysis, we can allow changing this option from off to on but not sure if
- * that alone would be useful.
- *
  * Finally, to avoid problems mentioned in previous paragraphs from any
  * subsequent (not READY) tablesyncs (need to toggle two_phase option from 'on'
  * to 'off' and then again back to 'on') there is a restriction for
@@ -1023,7 +1016,7 @@ apply_handle_commit(StringInfo s)
 	if (commit_data.commit_lsn != remote_final_lsn)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROTOCOL_VIOLATION),
-				 errmsg_internal("incorrect commit LSN %X/%X in commit message (expected %X/%X)",
+				 errmsg_internal("incorrect commit LSN %X/%08X in commit message (expected %X/%08X)",
 								 LSN_FORMAT_ARGS(commit_data.commit_lsn),
 								 LSN_FORMAT_ARGS(remote_final_lsn))));
 
@@ -1115,7 +1108,7 @@ apply_handle_prepare(StringInfo s)
 	if (prepare_data.prepare_lsn != remote_final_lsn)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROTOCOL_VIOLATION),
-				 errmsg_internal("incorrect prepare LSN %X/%X in prepare message (expected %X/%X)",
+				 errmsg_internal("incorrect prepare LSN %X/%08X in prepare message (expected %X/%08X)",
 								 LSN_FORMAT_ARGS(prepare_data.prepare_lsn),
 								 LSN_FORMAT_ARGS(remote_final_lsn))));
 
@@ -3910,7 +3903,7 @@ send_feedback(XLogRecPtr recvpos, bool force, bool requestReply)
 	pq_sendint64(reply_message, now);	/* sendTime */
 	pq_sendbyte(reply_message, requestReply);	/* replyRequested */
 
-	elog(DEBUG2, "sending feedback (force %d) to recv %X/%X, write %X/%X, flush %X/%X",
+	elog(DEBUG2, "sending feedback (force %d) to recv %X/%08X, write %X/%08X, flush %X/%08X",
 		 force,
 		 LSN_FORMAT_ARGS(recvpos),
 		 LSN_FORMAT_ARGS(writepos),
@@ -4626,8 +4619,16 @@ run_apply_worker()
 		walrcv_startstreaming(LogRepWorkerWalRcvConn, &options);
 
 		StartTransactionCommand();
+
+		/*
+		 * Updating pg_subscription might involve TOAST table access, so
+		 * ensure we have a valid snapshot.
+		 */
+		PushActiveSnapshot(GetTransactionSnapshot());
+
 		UpdateTwoPhaseState(MySubscription->oid, LOGICALREP_TWOPHASE_STATE_ENABLED);
 		MySubscription->twophasestate = LOGICALREP_TWOPHASE_STATE_ENABLED;
+		PopActiveSnapshot();
 		CommitTransactionCommand();
 	}
 	else
@@ -4843,7 +4844,15 @@ DisableSubscriptionAndExit(void)
 
 	/* Disable the subscription */
 	StartTransactionCommand();
+
+	/*
+	 * Updating pg_subscription might involve TOAST table access, so ensure we
+	 * have a valid snapshot.
+	 */
+	PushActiveSnapshot(GetTransactionSnapshot());
+
 	DisableSubscription(MySubscription->oid);
+	PopActiveSnapshot();
 	CommitTransactionCommand();
 
 	/* Ensure we remove no-longer-useful entry for worker's start time */
@@ -4900,7 +4909,7 @@ maybe_start_skipping_changes(XLogRecPtr finish_lsn)
 	skip_xact_finish_lsn = finish_lsn;
 
 	ereport(LOG,
-			errmsg("logical replication starts skipping transaction at LSN %X/%X",
+			errmsg("logical replication starts skipping transaction at LSN %X/%08X",
 				   LSN_FORMAT_ARGS(skip_xact_finish_lsn)));
 }
 
@@ -4914,8 +4923,8 @@ stop_skipping_changes(void)
 		return;
 
 	ereport(LOG,
-			(errmsg("logical replication completed skipping transaction at LSN %X/%X",
-					LSN_FORMAT_ARGS(skip_xact_finish_lsn))));
+			errmsg("logical replication completed skipping transaction at LSN %X/%08X",
+				   LSN_FORMAT_ARGS(skip_xact_finish_lsn)));
 
 	/* Stop skipping changes */
 	skip_xact_finish_lsn = InvalidXLogRecPtr;
@@ -4948,6 +4957,12 @@ clear_subscription_skip_lsn(XLogRecPtr finish_lsn)
 	}
 
 	/*
+	 * Updating pg_subscription might involve TOAST table access, so ensure we
+	 * have a valid snapshot.
+	 */
+	PushActiveSnapshot(GetTransactionSnapshot());
+
+	/*
 	 * Protect subskiplsn of pg_subscription from being concurrently updated
 	 * while clearing it.
 	 */
@@ -4997,7 +5012,7 @@ clear_subscription_skip_lsn(XLogRecPtr finish_lsn)
 		if (myskiplsn != finish_lsn)
 			ereport(WARNING,
 					errmsg("skip-LSN of subscription \"%s\" cleared", MySubscription->name),
-					errdetail("Remote transaction's finish WAL location (LSN) %X/%X did not match skip-LSN %X/%X.",
+					errdetail("Remote transaction's finish WAL location (LSN) %X/%08X did not match skip-LSN %X/%08X.",
 							  LSN_FORMAT_ARGS(finish_lsn),
 							  LSN_FORMAT_ARGS(myskiplsn)));
 	}
@@ -5005,6 +5020,8 @@ clear_subscription_skip_lsn(XLogRecPtr finish_lsn)
 	heap_freetuple(tup);
 	table_close(rel, NoLock);
 
+	PopActiveSnapshot();
+
 	if (started_tx)
 		CommitTransactionCommand();
 }
@@ -5032,7 +5049,7 @@ apply_error_callback(void *arg)
 					   logicalrep_message_type(errarg->command),
 					   errarg->remote_xid);
 		else
-			errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" in transaction %u, finished at %X/%X",
+			errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" in transaction %u, finished at %X/%08X",
 					   errarg->origin_name,
 					   logicalrep_message_type(errarg->command),
 					   errarg->remote_xid,
@@ -5050,7 +5067,7 @@ apply_error_callback(void *arg)
 						   errarg->rel->remoterel.relname,
 						   errarg->remote_xid);
 			else
-				errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" for replication target relation \"%s.%s\" in transaction %u, finished at %X/%X",
+				errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" for replication target relation \"%s.%s\" in transaction %u, finished at %X/%08X",
 						   errarg->origin_name,
 						   logicalrep_message_type(errarg->command),
 						   errarg->rel->remoterel.nspname,
@@ -5069,7 +5086,7 @@ apply_error_callback(void *arg)
 						   errarg->rel->remoterel.attnames[errarg->remote_attnum],
 						   errarg->remote_xid);
 			else
-				errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" for replication target relation \"%s.%s\" column \"%s\" in transaction %u, finished at %X/%X",
+				errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" for replication target relation \"%s.%s\" column \"%s\" in transaction %u, finished at %X/%08X",
 						   errarg->origin_name,
 						   logicalrep_message_type(errarg->command),
 						   errarg->rel->remoterel.nspname,
diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c
index 693a766e6d7..f4c977262c5 100644
--- a/src/backend/replication/pgoutput/pgoutput.c
+++ b/src/backend/replication/pgoutput/pgoutput.c
@@ -297,10 +297,12 @@ parse_output_parameters(List *options, PGOutputData *data)
 	bool		two_phase_option_given = false;
 	bool		origin_option_given = false;
 
+	/* Initialize optional parameters to defaults */
 	data->binary = false;
 	data->streaming = LOGICALREP_STREAM_OFF;
 	data->messages = false;
 	data->two_phase = false;
+	data->publish_no_origin = false;
 
 	foreach(lc, options)
 	{
@@ -1789,7 +1791,7 @@ LoadPublications(List *pubnames)
 		else
 			ereport(WARNING,
 					errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-					errmsg("skipped loading publication: %s", pubname),
+					errmsg("skipped loading publication \"%s\"", pubname),
 					errdetail("The publication does not exist at this point in the WAL."),
 					errhint("Create the publication if it does not exist."));
 	}
diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y
index 7440aae5a1a..8a649199ec6 100644
--- a/src/backend/replication/repl_gram.y
+++ b/src/backend/replication/repl_gram.y
@@ -279,7 +279,7 @@ alter_replication_slot:
 			;
 
 /*
- * START_REPLICATION [SLOT slot] [PHYSICAL] %X/%X [TIMELINE %u]
+ * START_REPLICATION [SLOT slot] [PHYSICAL] %X/%08X [TIMELINE %u]
  */
 start_replication:
 			K_START_REPLICATION opt_slot opt_physical RECPTR opt_timeline
@@ -295,7 +295,7 @@ start_replication:
 				}
 			;
 
-/* START_REPLICATION SLOT slot LOGICAL %X/%X options */
+/* START_REPLICATION SLOT slot LOGICAL %X/%08X options */
 start_logical_replication:
 			K_START_REPLICATION K_SLOT IDENT K_LOGICAL RECPTR plugin_options
 				{
diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l
index 014ea8d25c6..b6930e28659 100644
--- a/src/backend/replication/repl_scanner.l
+++ b/src/backend/replication/repl_scanner.l
@@ -155,7 +155,7 @@ UPLOAD_MANIFEST		{ return K_UPLOAD_MANIFEST; }
 {hexdigit}+\/{hexdigit}+		{
 					uint32	hi,
 							lo;
-					if (sscanf(yytext, "%X/%X", &hi, &lo) != 2)
+					if (sscanf(yytext, "%X/%08X", &hi, &lo) != 2)
 						replication_yyerror(NULL, yyscanner, "invalid streaming start location");
 					yylval->recptr = ((uint64) hi) << 32 | lo;
 					return RECPTR;
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 600b87fa9cb..e44ad576bc7 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -154,7 +154,7 @@ int			max_replication_slots = 10; /* the maximum number of replication
  * Invalidate replication slots that have remained idle longer than this
  * duration; '0' disables it.
  */
-int			idle_replication_slot_timeout_mins = 0;
+int			idle_replication_slot_timeout_secs = 0;
 
 /*
  * This GUC lists streaming replication standby server slot names that
@@ -424,6 +424,7 @@ ReplicationSlotCreate(const char *name, bool db_specific,
 	slot->candidate_restart_valid = InvalidXLogRecPtr;
 	slot->candidate_restart_lsn = InvalidXLogRecPtr;
 	slot->last_saved_confirmed_flush = InvalidXLogRecPtr;
+	slot->last_saved_restart_lsn = InvalidXLogRecPtr;
 	slot->inactive_since = 0;
 
 	/*
@@ -1165,20 +1166,41 @@ ReplicationSlotsComputeRequiredLSN(void)
 	{
 		ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
 		XLogRecPtr	restart_lsn;
+		XLogRecPtr	last_saved_restart_lsn;
 		bool		invalidated;
+		ReplicationSlotPersistency persistency;
 
 		if (!s->in_use)
 			continue;
 
 		SpinLockAcquire(&s->mutex);
+		persistency = s->data.persistency;
 		restart_lsn = s->data.restart_lsn;
 		invalidated = s->data.invalidated != RS_INVAL_NONE;
+		last_saved_restart_lsn = s->last_saved_restart_lsn;
 		SpinLockRelease(&s->mutex);
 
 		/* invalidated slots need not apply */
 		if (invalidated)
 			continue;
 
+		/*
+		 * For persistent slot use last_saved_restart_lsn to compute the
+		 * oldest LSN for removal of WAL segments.  The segments between
+		 * last_saved_restart_lsn and restart_lsn might be needed by a
+		 * persistent slot in the case of database crash.  Non-persistent
+		 * slots can't survive the database crash, so we don't care about
+		 * last_saved_restart_lsn for them.
+		 */
+		if (persistency == RS_PERSISTENT)
+		{
+			if (last_saved_restart_lsn != InvalidXLogRecPtr &&
+				restart_lsn > last_saved_restart_lsn)
+			{
+				restart_lsn = last_saved_restart_lsn;
+			}
+		}
+
 		if (restart_lsn != InvalidXLogRecPtr &&
 			(min_required == InvalidXLogRecPtr ||
 			 restart_lsn < min_required))
@@ -1216,7 +1238,9 @@ ReplicationSlotsComputeLogicalRestartLSN(void)
 	{
 		ReplicationSlot *s;
 		XLogRecPtr	restart_lsn;
+		XLogRecPtr	last_saved_restart_lsn;
 		bool		invalidated;
+		ReplicationSlotPersistency persistency;
 
 		s = &ReplicationSlotCtl->replication_slots[i];
 
@@ -1230,14 +1254,33 @@ ReplicationSlotsComputeLogicalRestartLSN(void)
 
 		/* read once, it's ok if it increases while we're checking */
 		SpinLockAcquire(&s->mutex);
+		persistency = s->data.persistency;
 		restart_lsn = s->data.restart_lsn;
 		invalidated = s->data.invalidated != RS_INVAL_NONE;
+		last_saved_restart_lsn = s->last_saved_restart_lsn;
 		SpinLockRelease(&s->mutex);
 
 		/* invalidated slots need not apply */
 		if (invalidated)
 			continue;
 
+		/*
+		 * For persistent slot use last_saved_restart_lsn to compute the
+		 * oldest LSN for removal of WAL segments.  The segments between
+		 * last_saved_restart_lsn and restart_lsn might be needed by a
+		 * persistent slot in the case of database crash.  Non-persistent
+		 * slots can't survive the database crash, so we don't care about
+		 * last_saved_restart_lsn for them.
+		 */
+		if (persistency == RS_PERSISTENT)
+		{
+			if (last_saved_restart_lsn != InvalidXLogRecPtr &&
+				restart_lsn > last_saved_restart_lsn)
+			{
+				restart_lsn = last_saved_restart_lsn;
+			}
+		}
+
 		if (restart_lsn == InvalidXLogRecPtr)
 			continue;
 
@@ -1455,6 +1498,7 @@ ReplicationSlotReserveWal(void)
 
 	Assert(slot != NULL);
 	Assert(slot->data.restart_lsn == InvalidXLogRecPtr);
+	Assert(slot->last_saved_restart_lsn == InvalidXLogRecPtr);
 
 	/*
 	 * The replication slot mechanism is used to prevent removal of required
@@ -1547,8 +1591,8 @@ ReportSlotInvalidation(ReplicationSlotInvalidationCause cause,
 				uint64		ex = oldestLSN - restart_lsn;
 
 				appendStringInfo(&err_detail,
-								 ngettext("The slot's restart_lsn %X/%X exceeds the limit by %" PRIu64 " byte.",
-										  "The slot's restart_lsn %X/%X exceeds the limit by %" PRIu64 " bytes.",
+								 ngettext("The slot's restart_lsn %X/%08X exceeds the limit by %" PRIu64 " byte.",
+										  "The slot's restart_lsn %X/%08X exceeds the limit by %" PRIu64 " bytes.",
 										  ex),
 								 LSN_FORMAT_ARGS(restart_lsn),
 								 ex);
@@ -1568,13 +1612,10 @@ ReportSlotInvalidation(ReplicationSlotInvalidationCause cause,
 
 		case RS_INVAL_IDLE_TIMEOUT:
 			{
-				int			minutes = slot_idle_seconds / SECS_PER_MINUTE;
-				int			secs = slot_idle_seconds % SECS_PER_MINUTE;
-
 				/* translator: %s is a GUC variable name */
-				appendStringInfo(&err_detail, _("The slot's idle time of %dmin %02ds exceeds the configured \"%s\" duration of %dmin."),
-								 minutes, secs, "idle_replication_slot_timeout",
-								 idle_replication_slot_timeout_mins);
+				appendStringInfo(&err_detail, _("The slot's idle time of %lds exceeds the configured \"%s\" duration of %ds."),
+								 slot_idle_seconds, "idle_replication_slot_timeout",
+								 idle_replication_slot_timeout_secs);
 				/* translator: %s is a GUC variable name */
 				appendStringInfo(&err_hint, _("You might need to increase \"%s\"."),
 								 "idle_replication_slot_timeout");
@@ -1612,7 +1653,7 @@ ReportSlotInvalidation(ReplicationSlotInvalidationCause cause,
 static inline bool
 CanInvalidateIdleSlot(ReplicationSlot *s)
 {
-	return (idle_replication_slot_timeout_mins != 0 &&
+	return (idle_replication_slot_timeout_secs != 0 &&
 			!XLogRecPtrIsInvalid(s->data.restart_lsn) &&
 			s->inactive_since > 0 &&
 			!(RecoveryInProgress() && s->data.synced));
@@ -1673,9 +1714,9 @@ DetermineSlotInvalidationCause(uint32 possible_causes, ReplicationSlot *s,
 		if (CanInvalidateIdleSlot(s))
 		{
 			/*
-			 * We simulate the invalidation due to idle_timeout as the minimum
-			 * time idle time is one minute which makes tests take a long
-			 * time.
+			 * Simulate the invalidation due to idle_timeout to test the
+			 * timeout behavior promptly, without waiting for it to trigger
+			 * naturally.
 			 */
 #ifdef USE_INJECTION_POINTS
 			if (IS_INJECTION_POINT_ATTACHED("slot-timeout-inval"))
@@ -1690,7 +1731,7 @@ DetermineSlotInvalidationCause(uint32 possible_causes, ReplicationSlot *s,
 			 * idle_replication_slot_timeout GUC.
 			 */
 			if (TimestampDifferenceExceedsSeconds(s->inactive_since, now,
-												  idle_replication_slot_timeout_mins * SECS_PER_MINUTE))
+												  idle_replication_slot_timeout_secs))
 			{
 				*inactive_since = s->inactive_since;
 				return RS_INVAL_IDLE_TIMEOUT;
@@ -1835,7 +1876,10 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes,
 			 * just rely on .invalidated.
 			 */
 			if (invalidation_cause == RS_INVAL_WAL_REMOVED)
+			{
 				s->data.restart_lsn = InvalidXLogRecPtr;
+				s->last_saved_restart_lsn = InvalidXLogRecPtr;
+			}
 
 			/* Let caller know */
 			*invalidated = true;
@@ -1844,15 +1888,6 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes,
 		SpinLockRelease(&s->mutex);
 
 		/*
-		 * The logical replication slots shouldn't be invalidated as GUC
-		 * max_slot_wal_keep_size is set to -1 and
-		 * idle_replication_slot_timeout is set to 0 during the binary
-		 * upgrade. See check_old_cluster_for_valid_slots() where we ensure
-		 * that no invalidated before the upgrade.
-		 */
-		Assert(!(*invalidated && SlotIsLogical(s) && IsBinaryUpgrade));
-
-		/*
 		 * Calculate the idle time duration of the slot if slot is marked
 		 * invalidated with RS_INVAL_IDLE_TIMEOUT.
 		 */
@@ -1998,6 +2033,10 @@ restart:
 		if (!s->in_use)
 			continue;
 
+		/* Prevent invalidation of logical slots during binary upgrade */
+		if (SlotIsLogical(s) && IsBinaryUpgrade)
+			continue;
+
 		if (InvalidatePossiblyObsoleteSlot(possible_causes, s, oldestLSN, dboid,
 										   snapshotConflictHorizon,
 										   &invalidated))
@@ -2032,6 +2071,7 @@ void
 CheckPointReplicationSlots(bool is_shutdown)
 {
 	int			i;
+	bool		last_saved_restart_lsn_updated = false;
 
 	elog(DEBUG1, "performing replication slot checkpoint");
 
@@ -2076,9 +2116,23 @@ CheckPointReplicationSlots(bool is_shutdown)
 			SpinLockRelease(&s->mutex);
 		}
 
+		/*
+		 * Track if we're going to update slot's last_saved_restart_lsn. We
+		 * need this to know if we need to recompute the required LSN.
+		 */
+		if (s->last_saved_restart_lsn != s->data.restart_lsn)
+			last_saved_restart_lsn_updated = true;
+
 		SaveSlotToPath(s, path, LOG);
 	}
 	LWLockRelease(ReplicationSlotAllocationLock);
+
+	/*
+	 * Recompute the required LSN if SaveSlotToPath() updated
+	 * last_saved_restart_lsn for any slot.
+	 */
+	if (last_saved_restart_lsn_updated)
+		ReplicationSlotsComputeRequiredLSN();
 }
 
 /*
@@ -2354,6 +2408,7 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
 	if (!slot->just_dirtied)
 		slot->dirty = false;
 	slot->last_saved_confirmed_flush = cp.slotdata.confirmed_flush;
+	slot->last_saved_restart_lsn = cp.slotdata.restart_lsn;
 	SpinLockRelease(&slot->mutex);
 
 	LWLockRelease(&slot->io_in_progress_lock);
@@ -2569,6 +2624,7 @@ RestoreSlotFromDisk(const char *name)
 		slot->effective_xmin = cp.slotdata.xmin;
 		slot->effective_catalog_xmin = cp.slotdata.catalog_xmin;
 		slot->last_saved_confirmed_flush = cp.slotdata.confirmed_flush;
+		slot->last_saved_restart_lsn = cp.slotdata.restart_lsn;
 
 		slot->candidate_catalog_xmin = InvalidTransactionId;
 		slot->candidate_xmin_lsn = InvalidXLogRecPtr;
@@ -2993,22 +3049,3 @@ WaitForStandbyConfirmation(XLogRecPtr wait_for_lsn)
 
 	ConditionVariableCancelSleep();
 }
-
-/*
- * GUC check_hook for idle_replication_slot_timeout
- *
- * The value of idle_replication_slot_timeout must be set to 0 during
- * a binary upgrade. See start_postmaster() in pg_upgrade for more details.
- */
-bool
-check_idle_replication_slot_timeout(int *newval, void **extra, GucSource source)
-{
-	if (IsBinaryUpgrade && *newval != 0)
-	{
-		GUC_check_errdetail("\"%s\" must be set to 0 during binary upgrade mode.",
-							"idle_replication_slot_timeout");
-		return false;
-	}
-
-	return true;
-}
diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c
index 36cc2ed4e44..69f4c6157c5 100644
--- a/src/backend/replication/slotfuncs.c
+++ b/src/backend/replication/slotfuncs.c
@@ -566,7 +566,7 @@ pg_replication_slot_advance(PG_FUNCTION_ARGS)
 	if (moveto < minlsn)
 		ereport(ERROR,
 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-				 errmsg("cannot advance replication slot to %X/%X, minimum is %X/%X",
+				 errmsg("cannot advance replication slot to %X/%08X, minimum is %X/%08X",
 						LSN_FORMAT_ARGS(moveto), LSN_FORMAT_ARGS(minlsn))));
 
 	/* Do the actual slot update, depending on the slot type */
diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c
index cc35984ad00..32cf3a48b89 100644
--- a/src/backend/replication/syncrep.c
+++ b/src/backend/replication/syncrep.c
@@ -258,7 +258,7 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
 	{
 		char		buffer[32];
 
-		sprintf(buffer, "waiting for %X/%X", LSN_FORMAT_ARGS(lsn));
+		sprintf(buffer, "waiting for %X/%08X", LSN_FORMAT_ARGS(lsn));
 		set_ps_display_suffix(buffer);
 	}
 
@@ -566,7 +566,7 @@ SyncRepReleaseWaiters(void)
 
 	LWLockRelease(SyncRepLock);
 
-	elog(DEBUG3, "released %d procs up to write %X/%X, %d procs up to flush %X/%X, %d procs up to apply %X/%X",
+	elog(DEBUG3, "released %d procs up to write %X/%08X, %d procs up to flush %X/%08X, %d procs up to apply %X/%08X",
 		 numwrite, LSN_FORMAT_ARGS(writePtr),
 		 numflush, LSN_FORMAT_ARGS(flushPtr),
 		 numapply, LSN_FORMAT_ARGS(applyPtr));
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 8c4d0fd9aed..b6281101711 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -386,12 +386,12 @@ WalReceiverMain(const void *startup_data, size_t startup_data_len)
 		{
 			if (first_stream)
 				ereport(LOG,
-						(errmsg("started streaming WAL from primary at %X/%X on timeline %u",
-								LSN_FORMAT_ARGS(startpoint), startpointTLI)));
+						errmsg("started streaming WAL from primary at %X/%08X on timeline %u",
+							   LSN_FORMAT_ARGS(startpoint), startpointTLI));
 			else
 				ereport(LOG,
-						(errmsg("restarted WAL streaming at %X/%X on timeline %u",
-								LSN_FORMAT_ARGS(startpoint), startpointTLI)));
+						errmsg("restarted WAL streaming at %X/%08X on timeline %u",
+							   LSN_FORMAT_ARGS(startpoint), startpointTLI));
 			first_stream = false;
 
 			/* Initialize LogstreamResult and buffers for processing messages */
@@ -470,7 +470,7 @@ WalReceiverMain(const void *startup_data, size_t startup_data_len)
 						{
 							ereport(LOG,
 									(errmsg("replication terminated by primary server"),
-									 errdetail("End of WAL reached on timeline %u at %X/%X.",
+									 errdetail("End of WAL reached on timeline %u at %X/%08X.",
 											   startpointTLI,
 											   LSN_FORMAT_ARGS(LogstreamResult.Write))));
 							endofwal = true;
@@ -711,7 +711,7 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
 	{
 		char		activitymsg[50];
 
-		snprintf(activitymsg, sizeof(activitymsg), "restarting at %X/%X",
+		snprintf(activitymsg, sizeof(activitymsg), "restarting at %X/%08X",
 				 LSN_FORMAT_ARGS(*startpoint));
 		set_ps_display(activitymsg);
 	}
@@ -1014,7 +1014,7 @@ XLogWalRcvFlush(bool dying, TimeLineID tli)
 		{
 			char		activitymsg[50];
 
-			snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%X",
+			snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%08X",
 					 LSN_FORMAT_ARGS(LogstreamResult.Write));
 			set_ps_display(activitymsg);
 		}
@@ -1138,7 +1138,7 @@ XLogWalRcvSendReply(bool force, bool requestReply)
 	pq_sendbyte(&reply_message, requestReply ? 1 : 0);
 
 	/* Send it */
-	elog(DEBUG2, "sending write %X/%X flush %X/%X apply %X/%X%s",
+	elog(DEBUG2, "sending write %X/%08X flush %X/%08X apply %X/%08X%s",
 		 LSN_FORMAT_ARGS(writePtr),
 		 LSN_FORMAT_ARGS(flushPtr),
 		 LSN_FORMAT_ARGS(applyPtr),
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 9fa8beb6103..28b8591efa5 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -408,7 +408,7 @@ IdentifySystem(void)
 	else
 		logptr = GetFlushRecPtr(&currTLI);
 
-	snprintf(xloc, sizeof(xloc), "%X/%X", LSN_FORMAT_ARGS(logptr));
+	snprintf(xloc, sizeof(xloc), "%X/%08X", LSN_FORMAT_ARGS(logptr));
 
 	if (MyDatabaseId != InvalidOid)
 	{
@@ -515,7 +515,7 @@ ReadReplicationSlot(ReadReplicationSlotCmd *cmd)
 		{
 			char		xloc[64];
 
-			snprintf(xloc, sizeof(xloc), "%X/%X",
+			snprintf(xloc, sizeof(xloc), "%X/%08X",
 					 LSN_FORMAT_ARGS(slot_contents.data.restart_lsn));
 			values[i] = CStringGetTextDatum(xloc);
 			nulls[i] = false;
@@ -892,12 +892,12 @@ StartReplication(StartReplicationCmd *cmd)
 				switchpoint < cmd->startpoint)
 			{
 				ereport(ERROR,
-						(errmsg("requested starting point %X/%X on timeline %u is not in this server's history",
-								LSN_FORMAT_ARGS(cmd->startpoint),
-								cmd->timeline),
-						 errdetail("This server's history forked from timeline %u at %X/%X.",
-								   cmd->timeline,
-								   LSN_FORMAT_ARGS(switchpoint))));
+						errmsg("requested starting point %X/%08X on timeline %u is not in this server's history",
+							   LSN_FORMAT_ARGS(cmd->startpoint),
+							   cmd->timeline),
+						errdetail("This server's history forked from timeline %u at %X/%08X.",
+								  cmd->timeline,
+								  LSN_FORMAT_ARGS(switchpoint)));
 			}
 			sendTimeLineValidUpto = switchpoint;
 		}
@@ -939,9 +939,9 @@ StartReplication(StartReplicationCmd *cmd)
 		if (FlushPtr < cmd->startpoint)
 		{
 			ereport(ERROR,
-					(errmsg("requested starting point %X/%X is ahead of the WAL flush position of this server %X/%X",
-							LSN_FORMAT_ARGS(cmd->startpoint),
-							LSN_FORMAT_ARGS(FlushPtr))));
+					errmsg("requested starting point %X/%08X is ahead of the WAL flush position of this server %X/%08X",
+						   LSN_FORMAT_ARGS(cmd->startpoint),
+						   LSN_FORMAT_ARGS(FlushPtr)));
 		}
 
 		/* Start streaming from the requested point */
@@ -983,7 +983,7 @@ StartReplication(StartReplicationCmd *cmd)
 		Datum		values[2];
 		bool		nulls[2] = {0};
 
-		snprintf(startpos_str, sizeof(startpos_str), "%X/%X",
+		snprintf(startpos_str, sizeof(startpos_str), "%X/%08X",
 				 LSN_FORMAT_ARGS(sendTimeLineValidUpto));
 
 		dest = CreateDestReceiver(DestRemoteSimple);
@@ -1324,7 +1324,7 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd)
 			ReplicationSlotPersist();
 	}
 
-	snprintf(xloc, sizeof(xloc), "%X/%X",
+	snprintf(xloc, sizeof(xloc), "%X/%08X",
 			 LSN_FORMAT_ARGS(MyReplicationSlot->data.confirmed_flush));
 
 	dest = CreateDestReceiver(DestRemoteSimple);
@@ -2429,7 +2429,7 @@ ProcessStandbyReplyMessage(void)
 		/* Copy because timestamptz_to_str returns a static buffer */
 		replyTimeStr = pstrdup(timestamptz_to_str(replyTime));
 
-		elog(DEBUG2, "write %X/%X flush %X/%X apply %X/%X%s reply_time %s",
+		elog(DEBUG2, "write %X/%08X flush %X/%08X apply %X/%08X%s reply_time %s",
 			 LSN_FORMAT_ARGS(writePtr),
 			 LSN_FORMAT_ARGS(flushPtr),
 			 LSN_FORMAT_ARGS(applyPtr),
@@ -3251,7 +3251,7 @@ XLogSendPhysical(void)
 
 		WalSndCaughtUp = true;
 
-		elog(DEBUG1, "walsender reached end of timeline at %X/%X (sent up to %X/%X)",
+		elog(DEBUG1, "walsender reached end of timeline at %X/%08X (sent up to %X/%08X)",
 			 LSN_FORMAT_ARGS(sendTimeLineValidUpto),
 			 LSN_FORMAT_ARGS(sentPtr));
 		return;
@@ -3392,7 +3392,7 @@ retry:
 	{
 		char		activitymsg[50];
 
-		snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%X",
+		snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%08X",
 				 LSN_FORMAT_ARGS(sentPtr));
 		set_ps_display(activitymsg);
 	}
@@ -3449,8 +3449,16 @@ XLogSendLogical(void)
 	if (flushPtr == InvalidXLogRecPtr ||
 		logical_decoding_ctx->reader->EndRecPtr >= flushPtr)
 	{
+		/*
+		 * For cascading logical WAL senders, we use the replay LSN instead of
+		 * the flush LSN, since logical decoding on a standby only processes
+		 * WAL that has been replayed.  This distinction becomes particularly
+		 * important during shutdown, as new WAL is no longer replayed and the
+		 * last replayed LSN marks the furthest point up to which decoding can
+		 * proceed.
+		 */
 		if (am_cascading_walsender)
-			flushPtr = GetStandbyFlushRecPtr(NULL);
+			flushPtr = GetXLogReplayRecPtr(NULL);
 		else
 			flushPtr = GetFlushRecPtr(NULL);
 	}
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index f0bce5f9ed9..adc9e7600e1 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -923,8 +923,9 @@ rewriteTargetListIU(List *targetList,
 				apply_default = true;
 
 			/*
-			 * Can only insert DEFAULT into generated columns, regardless of
-			 * any OVERRIDING clauses.
+			 * Can only insert DEFAULT into generated columns.  (The
+			 * OVERRIDING clause does not apply to generated columns, so we
+			 * don't consider it here.)
 			 */
 			if (att_tup->attgenerated && !apply_default)
 			{
@@ -4544,7 +4545,7 @@ build_generation_expression(Relation rel, int attrno)
 List *
 QueryRewrite(Query *parsetree)
 {
-	uint64		input_query_id = parsetree->queryId;
+	int64		input_query_id = parsetree->queryId;
 	List	   *querylist;
 	List	   *results;
 	ListCell   *l;
diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c
index c64d815ebd1..3643f27ad6e 100644
--- a/src/backend/storage/aio/aio.c
+++ b/src/backend/storage/aio/aio.c
@@ -556,6 +556,13 @@ bool
 pgaio_io_was_recycled(PgAioHandle *ioh, uint64 ref_generation, PgAioHandleState *state)
 {
 	*state = ioh->state;
+
+	/*
+	 * Ensure that we don't see an earlier state of the handle than ioh->state
+	 * due to compiler or CPU reordering. This protects both ->generation as
+	 * directly used here, and other fields in the handle accessed in the
+	 * caller if the handle was not reused.
+	 */
 	pg_read_barrier();
 
 	return ioh->generation != ref_generation;
@@ -752,7 +759,7 @@ pgaio_io_wait_for_free(void)
 {
 	int			reclaimed = 0;
 
-	pgaio_debug(DEBUG2, "waiting for free IO with %d pending, %d in-flight, %d idle IOs",
+	pgaio_debug(DEBUG2, "waiting for free IO with %d pending, %u in-flight, %u idle IOs",
 				pgaio_my_backend->num_staged_ios,
 				dclist_count(&pgaio_my_backend->in_flight_ios),
 				dclist_count(&pgaio_my_backend->idle_ios));
@@ -773,7 +780,12 @@ pgaio_io_wait_for_free(void)
 			 * Note that no interrupts are processed between the state check
 			 * and the call to reclaim - that's important as otherwise an
 			 * interrupt could have already reclaimed the handle.
+			 *
+			 * Need to ensure that there's no reordering, in the more common
+			 * paths, where we wait for IO, that's done by
+			 * pgaio_io_was_recycled().
 			 */
+			pg_read_barrier();
 			pgaio_io_reclaim(ioh);
 			reclaimed++;
 		}
@@ -797,7 +809,7 @@ pgaio_io_wait_for_free(void)
 	if (dclist_count(&pgaio_my_backend->in_flight_ios) == 0)
 		ereport(ERROR,
 				errmsg_internal("no free IOs despite no in-flight IOs"),
-				errdetail_internal("%d pending, %d in-flight, %d idle IOs",
+				errdetail_internal("%d pending, %u in-flight, %u idle IOs",
 								   pgaio_my_backend->num_staged_ios,
 								   dclist_count(&pgaio_my_backend->in_flight_ios),
 								   dclist_count(&pgaio_my_backend->idle_ios)));
@@ -828,7 +840,7 @@ pgaio_io_wait_for_free(void)
 			case PGAIO_HS_COMPLETED_IO:
 			case PGAIO_HS_SUBMITTED:
 				pgaio_debug_io(DEBUG2, ioh,
-							   "waiting for free io with %d in flight",
+							   "waiting for free io with %u in flight",
 							   dclist_count(&pgaio_my_backend->in_flight_ios));
 
 				/*
@@ -852,7 +864,12 @@ pgaio_io_wait_for_free(void)
 				 * check and the call to reclaim - that's important as
 				 * otherwise an interrupt could have already reclaimed the
 				 * handle.
+				 *
+				 * Need to ensure that there's no reordering, in the more
+				 * common paths, where we wait for IO, that's done by
+				 * pgaio_io_was_recycled().
 				 */
+				pg_read_barrier();
 				pgaio_io_reclaim(ioh);
 				break;
 		}
@@ -1252,7 +1269,7 @@ pgaio_closing_fd(int fd)
 				break;
 
 			pgaio_debug_io(DEBUG2, ioh,
-						   "waiting for IO before FD %d gets closed, %d in-flight IOs",
+						   "waiting for IO before FD %d gets closed, %u in-flight IOs",
 						   fd, dclist_count(&pgaio_my_backend->in_flight_ios));
 
 			/* see comment in pgaio_io_wait_for_free() about raciness */
@@ -1288,7 +1305,7 @@ pgaio_shutdown(int code, Datum arg)
 		uint64		generation = ioh->generation;
 
 		pgaio_debug_io(DEBUG2, ioh,
-					   "waiting for IO to complete during shutdown, %d in-flight IOs",
+					   "waiting for IO to complete during shutdown, %u in-flight IOs",
 					   dclist_count(&pgaio_my_backend->in_flight_ios));
 
 		/* see comment in pgaio_io_wait_for_free() about raciness */
diff --git a/src/backend/storage/aio/aio_callback.c b/src/backend/storage/aio/aio_callback.c
index 0ad9795bb7e..03c9bba0802 100644
--- a/src/backend/storage/aio/aio_callback.c
+++ b/src/backend/storage/aio/aio_callback.c
@@ -256,6 +256,9 @@ pgaio_io_call_complete_shared(PgAioHandle *ioh)
 					   pgaio_result_status_string(result.status),
 					   result.id, result.error_data, result.result);
 		result = ce->cb->complete_shared(ioh, result, cb_data);
+
+		/* the callback should never transition to unknown */
+		Assert(result.status != PGAIO_RS_UNKNOWN);
 	}
 
 	ioh->distilled_result = result;
@@ -290,6 +293,7 @@ pgaio_io_call_complete_local(PgAioHandle *ioh)
 
 	/* start with distilled result from shared callback */
 	result = ioh->distilled_result;
+	Assert(result.status != PGAIO_RS_UNKNOWN);
 
 	for (int i = ioh->num_callbacks; i > 0; i--)
 	{
@@ -306,6 +310,9 @@ pgaio_io_call_complete_local(PgAioHandle *ioh)
 					   pgaio_result_status_string(result.status),
 					   result.id, result.error_data, result.result);
 		result = ce->cb->complete_local(ioh, result, cb_data);
+
+		/* the callback should never transition to unknown */
+		Assert(result.status != PGAIO_RS_UNKNOWN);
 	}
 
 	/*
diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c
index c719ba2727a..0a8c054162f 100644
--- a/src/backend/storage/aio/method_io_uring.c
+++ b/src/backend/storage/aio/method_io_uring.c
@@ -29,6 +29,9 @@
 
 #ifdef IOMETHOD_IO_URING_ENABLED
 
+#include <sys/mman.h>
+#include <unistd.h>
+
 #include <liburing.h>
 
 #include "miscadmin.h"
@@ -94,12 +97,32 @@ PgAioUringContext
 	struct io_uring io_uring_ring;
 } PgAioUringContext;
 
+/*
+ * Information about the capabilities that io_uring has.
+ *
+ * Depending on liburing and kernel version different features are
+ * supported. At least for the kernel a kernel version check does not suffice
+ * as various vendors do backport features to older kernels :(.
+ */
+typedef struct PgAioUringCaps
+{
+	bool		checked;
+	/* -1 if io_uring_queue_init_mem() is unsupported */
+	int			mem_init_size;
+} PgAioUringCaps;
+
+
 /* PgAioUringContexts for all backends */
 static PgAioUringContext *pgaio_uring_contexts;
 
 /* the current backend's context */
 static PgAioUringContext *pgaio_my_uring_context;
 
+static PgAioUringCaps pgaio_uring_caps =
+{
+	.checked = false,
+	.mem_init_size = -1,
+};
 
 static uint32
 pgaio_uring_procs(void)
@@ -111,30 +134,184 @@ pgaio_uring_procs(void)
 	return MaxBackends + NUM_AUXILIARY_PROCS - MAX_IO_WORKERS;
 }
 
-static Size
+/*
+ * Initializes pgaio_uring_caps, unless that's already done.
+ */
+static void
+pgaio_uring_check_capabilities(void)
+{
+	if (pgaio_uring_caps.checked)
+		return;
+
+	/*
+	 * By default io_uring creates a shared memory mapping for each io_uring
+	 * instance, leading to a large number of memory mappings. Unfortunately a
+	 * large number of memory mappings slows things down, backend exit is
+	 * particularly affected.  To address that, newer kernels (6.5) support
+	 * using user-provided memory for the memory, by putting the relevant
+	 * memory into shared memory we don't need any additional mappings.
+	 *
+	 * To know whether this is supported, we unfortunately need to probe the
+	 * kernel by trying to create a ring with userspace-provided memory. This
+	 * also has a secondary benefit: We can determine precisely how much
+	 * memory we need for each io_uring instance.
+	 */
+#if defined(HAVE_LIBURING_QUEUE_INIT_MEM) && defined(IORING_SETUP_NO_MMAP)
+	{
+		struct io_uring test_ring;
+		size_t		ring_size;
+		void	   *ring_ptr;
+		struct io_uring_params p = {0};
+		int			ret;
+
+		/*
+		 * Liburing does not yet provide an API to query how much memory a
+		 * ring will need. So we over-estimate it here. As the memory is freed
+		 * just below that's small temporary waste of memory.
+		 *
+		 * 1MB is more than enough for rings within io_max_concurrency's
+		 * range.
+		 */
+		ring_size = 1024 * 1024;
+
+		/*
+		 * Hard to believe a system exists where 1MB would not be a multiple
+		 * of the page size. But it's cheap to ensure...
+		 */
+		ring_size -= ring_size % sysconf(_SC_PAGESIZE);
+
+		ring_ptr = mmap(NULL, ring_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+		if (ring_ptr == MAP_FAILED)
+			elog(ERROR,
+				 "mmap(%zu) to determine io_uring_queue_init_mem() support failed: %m",
+				 ring_size);
+
+		ret = io_uring_queue_init_mem(io_max_concurrency, &test_ring, &p, ring_ptr, ring_size);
+		if (ret > 0)
+		{
+			pgaio_uring_caps.mem_init_size = ret;
+
+			elog(DEBUG1,
+				 "can use combined memory mapping for io_uring, each ring needs %d bytes",
+				 ret);
+
+			/* clean up the created ring, it was just for a test */
+			io_uring_queue_exit(&test_ring);
+		}
+		else
+		{
+			/*
+			 * There are different reasons for ring creation to fail, but it's
+			 * ok to treat that just as io_uring_queue_init_mem() not being
+			 * supported. We'll report a more detailed error in
+			 * pgaio_uring_shmem_init().
+			 */
+			errno = -ret;
+			elog(DEBUG1,
+				 "cannot use combined memory mapping for io_uring, ring creation failed: %m");
+
+		}
+
+		if (munmap(ring_ptr, ring_size) != 0)
+			elog(ERROR, "munmap() failed: %m");
+	}
+#else
+	{
+		elog(DEBUG1,
+			 "can't use combined memory mapping for io_uring, kernel or liburing too old");
+	}
+#endif
+
+	pgaio_uring_caps.checked = true;
+}
+
+/*
+ * Memory for all PgAioUringContext instances
+ */
+static size_t
 pgaio_uring_context_shmem_size(void)
 {
 	return mul_size(pgaio_uring_procs(), sizeof(PgAioUringContext));
 }
 
+/*
+ * Memory for the combined memory used by io_uring instances. Returns 0 if
+ * that is not supported by kernel/liburing.
+ */
+static size_t
+pgaio_uring_ring_shmem_size(void)
+{
+	size_t		sz = 0;
+
+	if (pgaio_uring_caps.mem_init_size > 0)
+	{
+		/*
+		 * Memory for rings needs to be allocated to the page boundary,
+		 * reserve space. Luckily it does not need to be aligned to hugepage
+		 * boundaries, even if huge pages are used.
+		 */
+		sz = add_size(sz, sysconf(_SC_PAGESIZE));
+		sz = add_size(sz, mul_size(pgaio_uring_procs(),
+								   pgaio_uring_caps.mem_init_size));
+	}
+
+	return sz;
+}
+
 static size_t
 pgaio_uring_shmem_size(void)
 {
-	return pgaio_uring_context_shmem_size();
+	size_t		sz;
+
+	/*
+	 * Kernel and liburing support for various features influences how much
+	 * shmem we need, perform the necessary checks.
+	 */
+	pgaio_uring_check_capabilities();
+
+	sz = pgaio_uring_context_shmem_size();
+	sz = add_size(sz, pgaio_uring_ring_shmem_size());
+
+	return sz;
 }
 
 static void
 pgaio_uring_shmem_init(bool first_time)
 {
-	int			TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS - MAX_IO_WORKERS;
+	int			TotalProcs = pgaio_uring_procs();
 	bool		found;
+	char	   *shmem;
+	size_t		ring_mem_remain = 0;
+	char	   *ring_mem_next = 0;
 
-	pgaio_uring_contexts = (PgAioUringContext *)
-		ShmemInitStruct("AioUring", pgaio_uring_shmem_size(), &found);
-
+	/*
+	 * We allocate memory for all PgAioUringContext instances and, if
+	 * supported, the memory required for each of the io_uring instances, in
+	 * one ShmemInitStruct().
+	 */
+	shmem = ShmemInitStruct("AioUringContext", pgaio_uring_shmem_size(), &found);
 	if (found)
 		return;
 
+	pgaio_uring_contexts = (PgAioUringContext *) shmem;
+	shmem += pgaio_uring_context_shmem_size();
+
+	/* if supported, handle memory alignment / sizing for io_uring memory */
+	if (pgaio_uring_caps.mem_init_size > 0)
+	{
+		ring_mem_remain = pgaio_uring_ring_shmem_size();
+		ring_mem_next = (char *) shmem;
+
+		/* align to page boundary, see also pgaio_uring_ring_shmem_size() */
+		ring_mem_next = (char *) TYPEALIGN(sysconf(_SC_PAGESIZE), ring_mem_next);
+
+		/* account for alignment */
+		ring_mem_remain -= ring_mem_next - shmem;
+		shmem += ring_mem_next - shmem;
+
+		shmem += ring_mem_remain;
+	}
+
 	for (int contextno = 0; contextno < TotalProcs; contextno++)
 	{
 		PgAioUringContext *context = &pgaio_uring_contexts[contextno];
@@ -158,7 +335,28 @@ pgaio_uring_shmem_init(bool first_time)
 		 * be worth using that - also need to evaluate if that causes
 		 * noticeable additional contention?
 		 */
-		ret = io_uring_queue_init(io_max_concurrency, &context->io_uring_ring, 0);
+
+		/*
+		 * If supported (c.f. pgaio_uring_check_capabilities()), create ring
+		 * with its data in shared memory. Otherwise fall back io_uring
+		 * creating a memory mapping for each ring.
+		 */
+#if defined(HAVE_LIBURING_QUEUE_INIT_MEM) && defined(IORING_SETUP_NO_MMAP)
+		if (pgaio_uring_caps.mem_init_size > 0)
+		{
+			struct io_uring_params p = {0};
+
+			ret = io_uring_queue_init_mem(io_max_concurrency, &context->io_uring_ring, &p, ring_mem_next, ring_mem_remain);
+
+			ring_mem_remain -= ret;
+			ring_mem_next += ret;
+		}
+		else
+#endif
+		{
+			ret = io_uring_queue_init(io_max_concurrency, &context->io_uring_ring, 0);
+		}
+
 		if (ret < 0)
 		{
 			char	   *hint = NULL;
@@ -400,9 +598,9 @@ pgaio_uring_wait_one(PgAioHandle *ioh, uint64 ref_generation)
 	while (true)
 	{
 		pgaio_debug_io(DEBUG3, ioh,
-					   "wait_one io_gen: %llu, ref_gen: %llu, cycle %d",
-					   (long long unsigned) ioh->generation,
-					   (long long unsigned) ref_generation,
+					   "wait_one io_gen: %" PRIu64 ", ref_gen: %" PRIu64 ", cycle %d",
+					   ioh->generation,
+					   ref_generation,
 					   waited);
 
 		if (pgaio_io_was_recycled(ioh, ref_generation, &state) ||
diff --git a/src/backend/storage/aio/method_worker.c b/src/backend/storage/aio/method_worker.c
index 743cccc2acd..bf8f77e6ff6 100644
--- a/src/backend/storage/aio/method_worker.c
+++ b/src/backend/storage/aio/method_worker.c
@@ -52,26 +52,26 @@
 #define IO_WORKER_WAKEUP_FANOUT 2
 
 
-typedef struct AioWorkerSubmissionQueue
+typedef struct PgAioWorkerSubmissionQueue
 {
 	uint32		size;
 	uint32		mask;
 	uint32		head;
 	uint32		tail;
-	uint32		ios[FLEXIBLE_ARRAY_MEMBER];
-} AioWorkerSubmissionQueue;
+	uint32		sqes[FLEXIBLE_ARRAY_MEMBER];
+} PgAioWorkerSubmissionQueue;
 
-typedef struct AioWorkerSlot
+typedef struct PgAioWorkerSlot
 {
 	Latch	   *latch;
 	bool		in_use;
-} AioWorkerSlot;
+} PgAioWorkerSlot;
 
-typedef struct AioWorkerControl
+typedef struct PgAioWorkerControl
 {
 	uint64		idle_worker_mask;
-	AioWorkerSlot workers[FLEXIBLE_ARRAY_MEMBER];
-} AioWorkerControl;
+	PgAioWorkerSlot workers[FLEXIBLE_ARRAY_MEMBER];
+} PgAioWorkerControl;
 
 
 static size_t pgaio_worker_shmem_size(void);
@@ -96,8 +96,8 @@ int			io_workers = 3;
 
 static int	io_worker_queue_size = 64;
 static int	MyIoWorkerId;
-static AioWorkerSubmissionQueue *io_worker_submission_queue;
-static AioWorkerControl *io_worker_control;
+static PgAioWorkerSubmissionQueue *io_worker_submission_queue;
+static PgAioWorkerControl *io_worker_control;
 
 
 static size_t
@@ -106,15 +106,15 @@ pgaio_worker_queue_shmem_size(int *queue_size)
 	/* Round size up to next power of two so we can make a mask. */
 	*queue_size = pg_nextpower2_32(io_worker_queue_size);
 
-	return offsetof(AioWorkerSubmissionQueue, ios) +
+	return offsetof(PgAioWorkerSubmissionQueue, sqes) +
 		sizeof(uint32) * *queue_size;
 }
 
 static size_t
 pgaio_worker_control_shmem_size(void)
 {
-	return offsetof(AioWorkerControl, workers) +
-		sizeof(AioWorkerSlot) * MAX_IO_WORKERS;
+	return offsetof(PgAioWorkerControl, workers) +
+		sizeof(PgAioWorkerSlot) * MAX_IO_WORKERS;
 }
 
 static size_t
@@ -162,7 +162,7 @@ pgaio_worker_shmem_init(bool first_time)
 }
 
 static int
-pgaio_choose_idle_worker(void)
+pgaio_worker_choose_idle(void)
 {
 	int			worker;
 
@@ -172,6 +172,7 @@ pgaio_choose_idle_worker(void)
 	/* Find the lowest bit position, and clear it. */
 	worker = pg_rightmost_one_pos64(io_worker_control->idle_worker_mask);
 	io_worker_control->idle_worker_mask &= ~(UINT64_C(1) << worker);
+	Assert(io_worker_control->workers[worker].in_use);
 
 	return worker;
 }
@@ -179,7 +180,7 @@ pgaio_choose_idle_worker(void)
 static bool
 pgaio_worker_submission_queue_insert(PgAioHandle *ioh)
 {
-	AioWorkerSubmissionQueue *queue;
+	PgAioWorkerSubmissionQueue *queue;
 	uint32		new_head;
 
 	queue = io_worker_submission_queue;
@@ -191,7 +192,7 @@ pgaio_worker_submission_queue_insert(PgAioHandle *ioh)
 		return false;			/* full */
 	}
 
-	queue->ios[queue->head] = pgaio_io_get_id(ioh);
+	queue->sqes[queue->head] = pgaio_io_get_id(ioh);
 	queue->head = new_head;
 
 	return true;
@@ -200,14 +201,14 @@ pgaio_worker_submission_queue_insert(PgAioHandle *ioh)
 static uint32
 pgaio_worker_submission_queue_consume(void)
 {
-	AioWorkerSubmissionQueue *queue;
+	PgAioWorkerSubmissionQueue *queue;
 	uint32		result;
 
 	queue = io_worker_submission_queue;
 	if (queue->tail == queue->head)
 		return UINT32_MAX;		/* empty */
 
-	result = queue->ios[queue->tail];
+	result = queue->sqes[queue->tail];
 	queue->tail = (queue->tail + 1) & (queue->size - 1);
 
 	return result;
@@ -240,37 +241,37 @@ pgaio_worker_needs_synchronous_execution(PgAioHandle *ioh)
 }
 
 static void
-pgaio_worker_submit_internal(int nios, PgAioHandle *ios[])
+pgaio_worker_submit_internal(int num_staged_ios, PgAioHandle **staged_ios)
 {
 	PgAioHandle *synchronous_ios[PGAIO_SUBMIT_BATCH_SIZE];
 	int			nsync = 0;
 	Latch	   *wakeup = NULL;
 	int			worker;
 
-	Assert(nios <= PGAIO_SUBMIT_BATCH_SIZE);
+	Assert(num_staged_ios <= PGAIO_SUBMIT_BATCH_SIZE);
 
 	LWLockAcquire(AioWorkerSubmissionQueueLock, LW_EXCLUSIVE);
-	for (int i = 0; i < nios; ++i)
+	for (int i = 0; i < num_staged_ios; ++i)
 	{
-		Assert(!pgaio_worker_needs_synchronous_execution(ios[i]));
-		if (!pgaio_worker_submission_queue_insert(ios[i]))
+		Assert(!pgaio_worker_needs_synchronous_execution(staged_ios[i]));
+		if (!pgaio_worker_submission_queue_insert(staged_ios[i]))
 		{
 			/*
 			 * We'll do it synchronously, but only after we've sent as many as
 			 * we can to workers, to maximize concurrency.
 			 */
-			synchronous_ios[nsync++] = ios[i];
+			synchronous_ios[nsync++] = staged_ios[i];
 			continue;
 		}
 
 		if (wakeup == NULL)
 		{
 			/* Choose an idle worker to wake up if we haven't already. */
-			worker = pgaio_choose_idle_worker();
+			worker = pgaio_worker_choose_idle();
 			if (worker >= 0)
 				wakeup = io_worker_control->workers[worker].latch;
 
-			pgaio_debug_io(DEBUG4, ios[i],
+			pgaio_debug_io(DEBUG4, staged_ios[i],
 						   "choosing worker %d",
 						   worker);
 		}
@@ -316,6 +317,7 @@ pgaio_worker_die(int code, Datum arg)
 	Assert(io_worker_control->workers[MyIoWorkerId].in_use);
 	Assert(io_worker_control->workers[MyIoWorkerId].latch == MyLatch);
 
+	io_worker_control->idle_worker_mask &= ~(UINT64_C(1) << MyIoWorkerId);
 	io_worker_control->workers[MyIoWorkerId].in_use = false;
 	io_worker_control->workers[MyIoWorkerId].latch = NULL;
 	LWLockRelease(AioWorkerSubmissionQueueLock);
@@ -461,7 +463,12 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len)
 		int			nwakeups = 0;
 		int			worker;
 
-		/* Try to get a job to do. */
+		/*
+		 * Try to get a job to do.
+		 *
+		 * The lwlock acquisition also provides the necessary memory barrier
+		 * to ensure that we don't see an outdated data in the handle.
+		 */
 		LWLockAcquire(AioWorkerSubmissionQueueLock, LW_EXCLUSIVE);
 		if ((io_index = pgaio_worker_submission_queue_consume()) == UINT32_MAX)
 		{
@@ -483,7 +490,7 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len)
 						   IO_WORKER_WAKEUP_FANOUT);
 			for (int i = 0; i < nwakeups; ++i)
 			{
-				if ((worker = pgaio_choose_idle_worker()) < 0)
+				if ((worker = pgaio_worker_choose_idle()) < 0)
 					break;
 				latches[nlatches++] = io_worker_control->workers[worker].latch;
 			}
@@ -568,6 +575,12 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len)
 		}
 
 		CHECK_FOR_INTERRUPTS();
+
+		if (ConfigReloadPending)
+		{
+			ConfigReloadPending = false;
+			ProcessConfigFile(PGC_SIGHUP);
+		}
 	}
 
 	error_context_stack = errcallback.previous;
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index f93131a645e..6afdd28dba6 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -3339,10 +3339,10 @@ UnpinBufferNoOwner(BufferDesc *buf)
  * BufferSync -- Write out all dirty buffers in the pool.
  *
  * This is called at checkpoint time to write out all dirty shared buffers.
- * The checkpoint request flags should be passed in.  If CHECKPOINT_IMMEDIATE
- * is set, we disable delays between writes; if CHECKPOINT_IS_SHUTDOWN,
- * CHECKPOINT_END_OF_RECOVERY or CHECKPOINT_FLUSH_ALL is set, we write even
- * unlogged buffers, which are otherwise skipped.  The remaining flags
+ * The checkpoint request flags should be passed in.  If CHECKPOINT_FAST is
+ * set, we disable delays between writes; if CHECKPOINT_IS_SHUTDOWN,
+ * CHECKPOINT_END_OF_RECOVERY or CHECKPOINT_FLUSH_UNLOGGED is set, we write
+ * even unlogged buffers, which are otherwise skipped.  The remaining flags
  * currently have no effect here.
  */
 static void
@@ -3367,7 +3367,7 @@ BufferSync(int flags)
 	 * recovery, we write all dirty buffers.
 	 */
 	if (!((flags & (CHECKPOINT_IS_SHUTDOWN | CHECKPOINT_END_OF_RECOVERY |
-					CHECKPOINT_FLUSH_ALL))))
+					CHECKPOINT_FLUSH_UNLOGGED))))
 		mask |= BM_PERMANENT;
 
 	/*
@@ -4550,11 +4550,9 @@ DropRelationBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum,
 	if (RelFileLocatorBackendIsTemp(rlocator))
 	{
 		if (rlocator.backend == MyProcNumber)
-		{
-			for (j = 0; j < nforks; j++)
-				DropRelationLocalBuffers(rlocator.locator, forkNum[j],
-										 firstDelBlock[j]);
-		}
+			DropRelationLocalBuffers(rlocator.locator, forkNum, nforks,
+									 firstDelBlock);
+
 		return;
 	}
 
@@ -7320,7 +7318,7 @@ buffer_readv_report(PgAioResult result, const PgAioTargetData *td,
 				affected_count > 1 ?
 				errdetail("Block %u held first zeroed page.",
 						  first + first_off) : 0,
-				errhint("See server log for details about the other %u invalid block(s).",
+				errhint("See server log for details about the other %d invalid block(s).",
 						affected_count + checkfail_count - 1));
 		return;
 	}
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 63101d56a07..3da9c41ee1d 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -629,7 +629,7 @@ InvalidateLocalBuffer(BufferDesc *bufHdr, bool check_unreferenced)
 	 */
 	if (check_unreferenced &&
 		(LocalRefCount[bufid] != 0 || BUF_STATE_GET_REFCOUNT(buf_state) != 0))
-		elog(ERROR, "block %u of %s is still referenced (local %u)",
+		elog(ERROR, "block %u of %s is still referenced (local %d)",
 			 bufHdr->tag.blockNum,
 			 relpathbackend(BufTagGetRelFileLocator(&bufHdr->tag),
 							MyProcNumber,
@@ -660,10 +660,11 @@ InvalidateLocalBuffer(BufferDesc *bufHdr, bool check_unreferenced)
  *		See DropRelationBuffers in bufmgr.c for more notes.
  */
 void
-DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum,
-						 BlockNumber firstDelBlock)
+DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber *forkNum,
+						 int nforks, BlockNumber *firstDelBlock)
 {
 	int			i;
+	int			j;
 
 	for (i = 0; i < NLocBuffer; i++)
 	{
@@ -672,12 +673,18 @@ DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum,
 
 		buf_state = pg_atomic_read_u32(&bufHdr->state);
 
-		if ((buf_state & BM_TAG_VALID) &&
-			BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator) &&
-			BufTagGetForkNum(&bufHdr->tag) == forkNum &&
-			bufHdr->tag.blockNum >= firstDelBlock)
+		if (!(buf_state & BM_TAG_VALID) ||
+			!BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator))
+			continue;
+
+		for (j = 0; j < nforks; j++)
 		{
-			InvalidateLocalBuffer(bufHdr, true);
+			if (BufTagGetForkNum(&bufHdr->tag) == forkNum[j] &&
+				bufHdr->tag.blockNum >= firstDelBlock[j])
+			{
+				InvalidateLocalBuffer(bufHdr, true);
+				break;
+			}
 		}
 	}
 }
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 0e8299dd556..a4ec7959f31 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -400,25 +400,22 @@ pg_fsync(int fd)
 	 * portable, even if it runs ok on the current system.
 	 *
 	 * We assert here that a descriptor for a file was opened with write
-	 * permissions (either O_RDWR or O_WRONLY) and for a directory without
-	 * write permissions (O_RDONLY).
+	 * permissions (i.e., not O_RDONLY) and for a directory without write
+	 * permissions (O_RDONLY).  Notice that the assertion check is made even
+	 * if fsync() is disabled.
 	 *
-	 * Ignore any fstat errors and let the follow-up fsync() do its work.
-	 * Doing this sanity check here counts for the case where fsync() is
-	 * disabled.
+	 * If fstat() fails, ignore it and let the follow-up fsync() complain.
 	 */
 	if (fstat(fd, &st) == 0)
 	{
 		int			desc_flags = fcntl(fd, F_GETFL);
 
-		/*
-		 * O_RDONLY is historically 0, so just make sure that for directories
-		 * no write flags are used.
-		 */
+		desc_flags &= O_ACCMODE;
+
 		if (S_ISDIR(st.st_mode))
-			Assert((desc_flags & (O_RDWR | O_WRONLY)) == 0);
+			Assert(desc_flags == O_RDONLY);
 		else
-			Assert((desc_flags & (O_RDWR | O_WRONLY)) != 0);
+			Assert(desc_flags != O_RDONLY);
 	}
 	errno = 0;
 #endif
diff --git a/src/backend/storage/ipc/dsm_registry.c b/src/backend/storage/ipc/dsm_registry.c
index 1d4fd31ffed..1682cc6d34c 100644
--- a/src/backend/storage/ipc/dsm_registry.c
+++ b/src/backend/storage/ipc/dsm_registry.c
@@ -15,6 +15,20 @@
  * current backend.  This function guarantees that only one backend
  * initializes the segment and that all other backends just attach it.
  *
+ * A DSA can be created in or retrieved from the registry by calling
+ * GetNamedDSA().  As with GetNamedDSMSegment(), if a DSA with the provided
+ * name does not yet exist, it is created.  Otherwise, GetNamedDSA()
+ * ensures the DSA is attached to the current backend.  This function
+ * guarantees that only one backend initializes the DSA and that all other
+ * backends just attach it.
+ *
+ * A dshash table can be created in or retrieved from the registry by
+ * calling GetNamedDSHash().  As with GetNamedDSMSegment(), if a hash
+ * table with the provided name does not yet exist, it is created.
+ * Otherwise, GetNamedDSHash() ensures the hash table is attached to the
+ * current backend.  This function guarantees that only one backend
+ * initializes the table and that all other backends just attach it.
+ *
  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
@@ -26,12 +40,20 @@
 
 #include "postgres.h"
 
+#include "funcapi.h"
 #include "lib/dshash.h"
 #include "storage/dsm_registry.h"
 #include "storage/lwlock.h"
 #include "storage/shmem.h"
+#include "utils/builtins.h"
 #include "utils/memutils.h"
 
+#define DSMR_NAME_LEN				128
+
+#define DSMR_DSA_TRANCHE_SUFFIX		" DSA"
+#define DSMR_DSA_TRANCHE_SUFFIX_LEN (sizeof(DSMR_DSA_TRANCHE_SUFFIX) - 1)
+#define DSMR_DSA_TRANCHE_NAME_LEN	(DSMR_NAME_LEN + DSMR_DSA_TRANCHE_SUFFIX_LEN)
+
 typedef struct DSMRegistryCtxStruct
 {
 	dsa_handle	dsah;
@@ -40,15 +62,55 @@ typedef struct DSMRegistryCtxStruct
 
 static DSMRegistryCtxStruct *DSMRegistryCtx;
 
-typedef struct DSMRegistryEntry
+typedef struct NamedDSMState
 {
-	char		name[64];
 	dsm_handle	handle;
 	size_t		size;
+} NamedDSMState;
+
+typedef struct NamedDSAState
+{
+	dsa_handle	handle;
+	int			tranche;
+	char		tranche_name[DSMR_DSA_TRANCHE_NAME_LEN];
+} NamedDSAState;
+
+typedef struct NamedDSHState
+{
+	NamedDSAState dsa;
+	dshash_table_handle handle;
+	int			tranche;
+	char		tranche_name[DSMR_NAME_LEN];
+} NamedDSHState;
+
+typedef enum DSMREntryType
+{
+	DSMR_ENTRY_TYPE_DSM,
+	DSMR_ENTRY_TYPE_DSA,
+	DSMR_ENTRY_TYPE_DSH,
+} DSMREntryType;
+
+static const char *const DSMREntryTypeNames[] =
+{
+	[DSMR_ENTRY_TYPE_DSM] = "segment",
+	[DSMR_ENTRY_TYPE_DSA] = "area",
+	[DSMR_ENTRY_TYPE_DSH] = "hash",
+};
+
+typedef struct DSMRegistryEntry
+{
+	char		name[DSMR_NAME_LEN];
+	DSMREntryType type;
+	union
+	{
+		NamedDSMState dsm;
+		NamedDSAState dsa;
+		NamedDSHState dsh;
+	}			data;
 } DSMRegistryEntry;
 
 static const dshash_parameters dsh_params = {
-	offsetof(DSMRegistryEntry, handle),
+	offsetof(DSMRegistryEntry, type),
 	sizeof(DSMRegistryEntry),
 	dshash_strcmp,
 	dshash_strhash,
@@ -141,7 +203,7 @@ GetNamedDSMSegment(const char *name, size_t size,
 		ereport(ERROR,
 				(errmsg("DSM segment name cannot be empty")));
 
-	if (strlen(name) >= offsetof(DSMRegistryEntry, handle))
+	if (strlen(name) >= offsetof(DSMRegistryEntry, type))
 		ereport(ERROR,
 				(errmsg("DSM segment name too long")));
 
@@ -158,32 +220,39 @@ GetNamedDSMSegment(const char *name, size_t size,
 	entry = dshash_find_or_insert(dsm_registry_table, name, found);
 	if (!(*found))
 	{
+		NamedDSMState *state = &entry->data.dsm;
+		dsm_segment *seg;
+
+		entry->type = DSMR_ENTRY_TYPE_DSM;
+
 		/* Initialize the segment. */
-		dsm_segment *seg = dsm_create(size, 0);
+		seg = dsm_create(size, 0);
 
 		dsm_pin_segment(seg);
 		dsm_pin_mapping(seg);
-		entry->handle = dsm_segment_handle(seg);
-		entry->size = size;
+		state->handle = dsm_segment_handle(seg);
+		state->size = size;
 		ret = dsm_segment_address(seg);
 
 		if (init_callback)
 			(*init_callback) (ret);
 	}
-	else if (entry->size != size)
-	{
+	else if (entry->type != DSMR_ENTRY_TYPE_DSM)
 		ereport(ERROR,
-				(errmsg("requested DSM segment size does not match size of "
-						"existing segment")));
-	}
+				(errmsg("requested DSM segment does not match type of existing entry")));
+	else if (entry->data.dsm.size != size)
+		ereport(ERROR,
+				(errmsg("requested DSM segment size does not match size of existing segment")));
 	else
 	{
-		dsm_segment *seg = dsm_find_mapping(entry->handle);
+		NamedDSMState *state = &entry->data.dsm;
+		dsm_segment *seg;
 
 		/* If the existing segment is not already attached, attach it now. */
+		seg = dsm_find_mapping(state->handle);
 		if (seg == NULL)
 		{
-			seg = dsm_attach(entry->handle);
+			seg = dsm_attach(state->handle);
 			if (seg == NULL)
 				elog(ERROR, "could not map dynamic shared memory segment");
 
@@ -198,3 +267,220 @@ GetNamedDSMSegment(const char *name, size_t size,
 
 	return ret;
 }
+
+/*
+ * Initialize or attach a named DSA.
+ *
+ * This routine returns a pointer to the DSA.  A new LWLock tranche ID will be
+ * generated if needed.  Note that the lock tranche will be registered with the
+ * provided name.  Also note that this should be called at most once for a
+ * given DSA in each backend.
+ */
+dsa_area *
+GetNamedDSA(const char *name, bool *found)
+{
+	DSMRegistryEntry *entry;
+	MemoryContext oldcontext;
+	dsa_area   *ret;
+
+	Assert(found);
+
+	if (!name || *name == '\0')
+		ereport(ERROR,
+				(errmsg("DSA name cannot be empty")));
+
+	if (strlen(name) >= offsetof(DSMRegistryEntry, type))
+		ereport(ERROR,
+				(errmsg("DSA name too long")));
+
+	/* Be sure any local memory allocated by DSM/DSA routines is persistent. */
+	oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+
+	/* Connect to the registry. */
+	init_dsm_registry();
+
+	entry = dshash_find_or_insert(dsm_registry_table, name, found);
+	if (!(*found))
+	{
+		NamedDSAState *state = &entry->data.dsa;
+
+		entry->type = DSMR_ENTRY_TYPE_DSA;
+
+		/* Initialize the LWLock tranche for the DSA. */
+		state->tranche = LWLockNewTrancheId();
+		strcpy(state->tranche_name, name);
+		LWLockRegisterTranche(state->tranche, state->tranche_name);
+
+		/* Initialize the DSA. */
+		ret = dsa_create(state->tranche);
+		dsa_pin(ret);
+		dsa_pin_mapping(ret);
+
+		/* Store handle for other backends to use. */
+		state->handle = dsa_get_handle(ret);
+	}
+	else if (entry->type != DSMR_ENTRY_TYPE_DSA)
+		ereport(ERROR,
+				(errmsg("requested DSA does not match type of existing entry")));
+	else
+	{
+		NamedDSAState *state = &entry->data.dsa;
+
+		if (dsa_is_attached(state->handle))
+			ereport(ERROR,
+					(errmsg("requested DSA already attached to current process")));
+
+		/* Initialize existing LWLock tranche for the DSA. */
+		LWLockRegisterTranche(state->tranche, state->tranche_name);
+
+		/* Attach to existing DSA. */
+		ret = dsa_attach(state->handle);
+		dsa_pin_mapping(ret);
+	}
+
+	dshash_release_lock(dsm_registry_table, entry);
+	MemoryContextSwitchTo(oldcontext);
+
+	return ret;
+}
+
+/*
+ * Initialize or attach a named dshash table.
+ *
+ * This routine returns the address of the table.  The tranche_id member of
+ * params is ignored; new tranche IDs will be generated if needed.  Note that
+ * the DSA lock tranche will be registered with the provided name with " DSA"
+ * appended.  The dshash lock tranche will be registered with the provided
+ * name.  Also note that this should be called at most once for a given table
+ * in each backend.
+ */
+dshash_table *
+GetNamedDSHash(const char *name, const dshash_parameters *params, bool *found)
+{
+	DSMRegistryEntry *entry;
+	MemoryContext oldcontext;
+	dshash_table *ret;
+
+	Assert(params);
+	Assert(found);
+
+	if (!name || *name == '\0')
+		ereport(ERROR,
+				(errmsg("DSHash name cannot be empty")));
+
+	if (strlen(name) >= offsetof(DSMRegistryEntry, type))
+		ereport(ERROR,
+				(errmsg("DSHash name too long")));
+
+	/* Be sure any local memory allocated by DSM/DSA routines is persistent. */
+	oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+
+	/* Connect to the registry. */
+	init_dsm_registry();
+
+	entry = dshash_find_or_insert(dsm_registry_table, name, found);
+	if (!(*found))
+	{
+		NamedDSAState *dsa_state = &entry->data.dsh.dsa;
+		NamedDSHState *dsh_state = &entry->data.dsh;
+		dshash_parameters params_copy;
+		dsa_area   *dsa;
+
+		entry->type = DSMR_ENTRY_TYPE_DSH;
+
+		/* Initialize the LWLock tranche for the DSA. */
+		dsa_state->tranche = LWLockNewTrancheId();
+		sprintf(dsa_state->tranche_name, "%s%s", name, DSMR_DSA_TRANCHE_SUFFIX);
+		LWLockRegisterTranche(dsa_state->tranche, dsa_state->tranche_name);
+
+		/* Initialize the LWLock tranche for the dshash table. */
+		dsh_state->tranche = LWLockNewTrancheId();
+		strcpy(dsh_state->tranche_name, name);
+		LWLockRegisterTranche(dsh_state->tranche, dsh_state->tranche_name);
+
+		/* Initialize the DSA for the hash table. */
+		dsa = dsa_create(dsa_state->tranche);
+		dsa_pin(dsa);
+		dsa_pin_mapping(dsa);
+
+		/* Initialize the dshash table. */
+		memcpy(&params_copy, params, sizeof(dshash_parameters));
+		params_copy.tranche_id = dsh_state->tranche;
+		ret = dshash_create(dsa, &params_copy, NULL);
+
+		/* Store handles for other backends to use. */
+		dsa_state->handle = dsa_get_handle(dsa);
+		dsh_state->handle = dshash_get_hash_table_handle(ret);
+	}
+	else if (entry->type != DSMR_ENTRY_TYPE_DSH)
+		ereport(ERROR,
+				(errmsg("requested DSHash does not match type of existing entry")));
+	else
+	{
+		NamedDSAState *dsa_state = &entry->data.dsh.dsa;
+		NamedDSHState *dsh_state = &entry->data.dsh;
+		dsa_area   *dsa;
+
+		/* XXX: Should we verify params matches what table was created with? */
+
+		if (dsa_is_attached(dsa_state->handle))
+			ereport(ERROR,
+					(errmsg("requested DSHash already attached to current process")));
+
+		/* Initialize existing LWLock tranches for the DSA and dshash table. */
+		LWLockRegisterTranche(dsa_state->tranche, dsa_state->tranche_name);
+		LWLockRegisterTranche(dsh_state->tranche, dsh_state->tranche_name);
+
+		/* Attach to existing DSA for the hash table. */
+		dsa = dsa_attach(dsa_state->handle);
+		dsa_pin_mapping(dsa);
+
+		/* Attach to existing dshash table. */
+		ret = dshash_attach(dsa, params, dsh_state->handle, NULL);
+	}
+
+	dshash_release_lock(dsm_registry_table, entry);
+	MemoryContextSwitchTo(oldcontext);
+
+	return ret;
+}
+
+Datum
+pg_get_dsm_registry_allocations(PG_FUNCTION_ARGS)
+{
+	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+	DSMRegistryEntry *entry;
+	MemoryContext oldcontext;
+	dshash_seq_status status;
+
+	InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
+
+	/* Be sure any local memory allocated by DSM/DSA routines is persistent. */
+	oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+	init_dsm_registry();
+	MemoryContextSwitchTo(oldcontext);
+
+	dshash_seq_init(&status, dsm_registry_table, false);
+	while ((entry = dshash_seq_next(&status)) != NULL)
+	{
+		Datum		vals[3];
+		bool		nulls[3] = {0};
+
+		vals[0] = CStringGetTextDatum(entry->name);
+		vals[1] = CStringGetTextDatum(DSMREntryTypeNames[entry->type]);
+
+		/*
+		 * Since we can't know the size of DSA/dshash entries without first
+		 * attaching to them, return NULL for those.
+		 */
+		if (entry->type == DSMR_ENTRY_TYPE_DSM)
+			vals[2] = Int64GetDatum(entry->data.dsm.size);
+		else
+			nulls[2] = true;
+
+		tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, vals, nulls);
+	}
+	dshash_seq_term(&status);
+
+	return (Datum) 0;
+}
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 00c76d05356..2fa045e6b0f 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -51,7 +51,6 @@
 #include "storage/sinvaladt.h"
 #include "utils/guc.h"
 #include "utils/injection_point.h"
-#include "utils/memutils.h"
 
 /* GUCs */
 int			shared_memory_type = DEFAULT_SHARED_MEMORY_TYPE;
@@ -151,7 +150,6 @@ CalculateShmemSize(int *num_semaphores)
 	size = add_size(size, InjectionPointShmemSize());
 	size = add_size(size, SlotSyncShmemSize());
 	size = add_size(size, AioShmemSize());
-	size = add_size(size, MemoryContextReportingShmemSize());
 
 	/* include additional requested shmem from preload libraries */
 	size = add_size(size, total_addin_request);
@@ -345,7 +343,6 @@ CreateOrAttachShmemStructs(void)
 	WaitEventCustomShmemInit();
 	InjectionPointShmemInit();
 	AioShmemInit();
-	MemoryContextReportingShmemInit();
 }
 
 /*
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index e5b945a9ee3..2418967def6 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -1622,58 +1622,6 @@ TransactionIdIsInProgress(TransactionId xid)
 	return false;
 }
 
-/*
- * TransactionIdIsActive -- is xid the top-level XID of an active backend?
- *
- * This differs from TransactionIdIsInProgress in that it ignores prepared
- * transactions, as well as transactions running on the primary if we're in
- * hot standby.  Also, we ignore subtransactions since that's not needed
- * for current uses.
- */
-bool
-TransactionIdIsActive(TransactionId xid)
-{
-	bool		result = false;
-	ProcArrayStruct *arrayP = procArray;
-	TransactionId *other_xids = ProcGlobal->xids;
-	int			i;
-
-	/*
-	 * Don't bother checking a transaction older than RecentXmin; it could not
-	 * possibly still be running.
-	 */
-	if (TransactionIdPrecedes(xid, RecentXmin))
-		return false;
-
-	LWLockAcquire(ProcArrayLock, LW_SHARED);
-
-	for (i = 0; i < arrayP->numProcs; i++)
-	{
-		int			pgprocno = arrayP->pgprocnos[i];
-		PGPROC	   *proc = &allProcs[pgprocno];
-		TransactionId pxid;
-
-		/* Fetch xid just once - see GetNewTransactionId */
-		pxid = UINT32_ACCESS_ONCE(other_xids[i]);
-
-		if (!TransactionIdIsValid(pxid))
-			continue;
-
-		if (proc->pid == 0)
-			continue;			/* ignore prepared transactions */
-
-		if (TransactionIdEquals(pxid, xid))
-		{
-			result = true;
-			break;
-		}
-	}
-
-	LWLockRelease(ProcArrayLock);
-
-	return result;
-}
-
 
 /*
  * Determine XID horizons.
diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c
index ce69e26d720..a9bb540b55a 100644
--- a/src/backend/storage/ipc/procsignal.c
+++ b/src/backend/storage/ipc/procsignal.c
@@ -691,9 +691,6 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
 	if (CheckProcSignal(PROCSIG_LOG_MEMORY_CONTEXT))
 		HandleLogMemoryContextInterrupt();
 
-	if (CheckProcSignal(PROCSIG_GET_MEMORY_CONTEXT))
-		HandleGetMemoryContextInterrupt();
-
 	if (CheckProcSignal(PROCSIG_PARALLEL_APPLY_MESSAGE))
 		HandleParallelApplyMessageInterrupt();
 
diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
index c9ae3b45b76..ca3656fc76f 100644
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -679,12 +679,10 @@ pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
 		 */
 		for (i = 0; i < shm_ent_page_count; i++)
 		{
-			volatile uint64 touch pg_attribute_unused();
-
 			page_ptrs[i] = startptr + (i * os_page_size);
 
 			if (firstNumaTouch)
-				pg_numa_touch_mem_if_required(touch, page_ptrs[i]);
+				pg_numa_touch_mem_if_required(page_ptrs[i]);
 
 			CHECK_FOR_INTERRUPTS();
 		}
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index 7fa8d9247e0..4222bdab078 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -1376,7 +1376,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
 
 	if (xlrec.subxid_overflow)
 		elog(DEBUG2,
-			 "snapshot of %d running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
+			 "snapshot of %d running transactions overflowed (lsn %X/%08X oldest xid %u latest complete %u next xid %u)",
 			 CurrRunningXacts->xcnt,
 			 LSN_FORMAT_ARGS(recptr),
 			 CurrRunningXacts->oldestRunningXid,
@@ -1384,7 +1384,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts)
 			 CurrRunningXacts->nextXid);
 	else
 		elog(DEBUG2,
-			 "snapshot of %d+%d running transaction ids (lsn %X/%X oldest xid %u latest complete %u next xid %u)",
+			 "snapshot of %d+%d running transaction ids (lsn %X/%08X oldest xid %u latest complete %u next xid %u)",
 			 CurrRunningXacts->xcnt, CurrRunningXacts->subxcnt,
 			 LSN_FORMAT_ARGS(recptr),
 			 CurrRunningXacts->oldestRunningXid,
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index f50962983c3..3f6bf70bd3c 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -717,7 +717,10 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid,
 		 * through, to avoid slowing down the normal case.)
 		 */
 		if (!first)
+		{
+			CHECK_FOR_INTERRUPTS();
 			pg_usleep(1000L);
+		}
 		first = false;
 		xid = SubTransGetTopmostTransaction(xid);
 	}
@@ -757,7 +760,10 @@ ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
 
 		/* See XactLockTableWait about this case */
 		if (!first)
+		{
+			CHECK_FOR_INTERRUPTS();
 			pg_usleep(1000L);
+		}
 		first = false;
 		xid = SubTransGetTopmostTransaction(xid);
 	}
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 86b06b9223f..62f3471448e 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -51,7 +51,7 @@
 
 /* GUC variables */
 int			max_locks_per_xact; /* used to set the lock table size */
-bool		log_lock_failure = false;
+bool		log_lock_failures = false;
 
 #define NLOCKENTS() \
 	mul_size(max_locks_per_xact, add_size(MaxBackends, max_prepared_xacts))
@@ -3539,9 +3539,9 @@ AtPrepare_Locks(void)
  * but that probably costs more cycles.
  */
 void
-PostPrepare_Locks(TransactionId xid)
+PostPrepare_Locks(FullTransactionId fxid)
 {
-	PGPROC	   *newproc = TwoPhaseGetDummyProc(xid, false);
+	PGPROC	   *newproc = TwoPhaseGetDummyProc(fxid, false);
 	HASH_SEQ_STATUS status;
 	LOCALLOCK  *locallock;
 	LOCK	   *lock;
@@ -4324,11 +4324,11 @@ DumpAllLocks(void)
  * and PANIC anyway.
  */
 void
-lock_twophase_recover(TransactionId xid, uint16 info,
+lock_twophase_recover(FullTransactionId fxid, uint16 info,
 					  void *recdata, uint32 len)
 {
 	TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata;
-	PGPROC	   *proc = TwoPhaseGetDummyProc(xid, false);
+	PGPROC	   *proc = TwoPhaseGetDummyProc(fxid, false);
 	LOCKTAG    *locktag;
 	LOCKMODE	lockmode;
 	LOCKMETHODID lockmethodid;
@@ -4505,7 +4505,7 @@ lock_twophase_recover(TransactionId xid, uint16 info,
  * starting up into hot standby mode.
  */
 void
-lock_twophase_standby_recover(TransactionId xid, uint16 info,
+lock_twophase_standby_recover(FullTransactionId fxid, uint16 info,
 							  void *recdata, uint32 len)
 {
 	TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata;
@@ -4524,7 +4524,7 @@ lock_twophase_standby_recover(TransactionId xid, uint16 info,
 	if (lockmode == AccessExclusiveLock &&
 		locktag->locktag_type == LOCKTAG_RELATION)
 	{
-		StandbyAcquireAccessExclusiveLock(xid,
+		StandbyAcquireAccessExclusiveLock(XidFromFullTransactionId(fxid),
 										  locktag->locktag_field1 /* dboid */ ,
 										  locktag->locktag_field2 /* reloid */ );
 	}
@@ -4537,11 +4537,11 @@ lock_twophase_standby_recover(TransactionId xid, uint16 info,
  * Find and release the lock indicated by the 2PC record.
  */
 void
-lock_twophase_postcommit(TransactionId xid, uint16 info,
+lock_twophase_postcommit(FullTransactionId fxid, uint16 info,
 						 void *recdata, uint32 len)
 {
 	TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata;
-	PGPROC	   *proc = TwoPhaseGetDummyProc(xid, true);
+	PGPROC	   *proc = TwoPhaseGetDummyProc(fxid, true);
 	LOCKTAG    *locktag;
 	LOCKMETHODID lockmethodid;
 	LockMethod	lockMethodTable;
@@ -4563,10 +4563,10 @@ lock_twophase_postcommit(TransactionId xid, uint16 info,
  * This is actually just the same as the COMMIT case.
  */
 void
-lock_twophase_postabort(TransactionId xid, uint16 info,
+lock_twophase_postabort(FullTransactionId fxid, uint16 info,
 						void *recdata, uint32 len)
 {
-	lock_twophase_postcommit(xid, info, recdata, len);
+	lock_twophase_postcommit(fxid, info, recdata, len);
 }
 
 /*
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 5148ef982e3..2d43bf2cc13 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -170,16 +170,14 @@ static const char *const BuiltinTrancheNames[] = {
 	[LWTRANCHE_DSM_REGISTRY_DSA] = "DSMRegistryDSA",
 	[LWTRANCHE_DSM_REGISTRY_HASH] = "DSMRegistryHash",
 	[LWTRANCHE_COMMITTS_SLRU] = "CommitTsSLRU",
-	[LWTRANCHE_MULTIXACTOFFSET_SLRU] = "MultixactOffsetSLRU",
-	[LWTRANCHE_MULTIXACTMEMBER_SLRU] = "MultixactMemberSLRU",
+	[LWTRANCHE_MULTIXACTOFFSET_SLRU] = "MultiXactOffsetSLRU",
+	[LWTRANCHE_MULTIXACTMEMBER_SLRU] = "MultiXactMemberSLRU",
 	[LWTRANCHE_NOTIFY_SLRU] = "NotifySLRU",
 	[LWTRANCHE_SERIAL_SLRU] = "SerialSLRU",
 	[LWTRANCHE_SUBTRANS_SLRU] = "SubtransSLRU",
 	[LWTRANCHE_XACT_SLRU] = "XactSLRU",
 	[LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA",
 	[LWTRANCHE_AIO_URING_COMPLETION] = "AioUringCompletion",
-	[LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE] = "MemoryContextReportingState",
-	[LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC] = "MemoryContextReportingPerProcess",
 };
 
 StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index d82114ffca1..c07fb588355 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -191,7 +191,7 @@
  *		AtPrepare_PredicateLocks(void);
  *		PostPrepare_PredicateLocks(TransactionId xid);
  *		PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit);
- *		predicatelock_twophase_recover(TransactionId xid, uint16 info,
+ *		predicatelock_twophase_recover(FullTransactionId fxid, uint16 info,
  *									   void *recdata, uint32 len);
  */
 
@@ -4856,7 +4856,7 @@ AtPrepare_PredicateLocks(void)
  *		anyway. We only need to clean up our local state.
  */
 void
-PostPrepare_PredicateLocks(TransactionId xid)
+PostPrepare_PredicateLocks(FullTransactionId fxid)
 {
 	if (MySerializableXact == InvalidSerializableXact)
 		return;
@@ -4879,12 +4879,12 @@ PostPrepare_PredicateLocks(TransactionId xid)
  *		commits or aborts.
  */
 void
-PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit)
+PredicateLockTwoPhaseFinish(FullTransactionId fxid, bool isCommit)
 {
 	SERIALIZABLEXID *sxid;
 	SERIALIZABLEXIDTAG sxidtag;
 
-	sxidtag.xid = xid;
+	sxidtag.xid = XidFromFullTransactionId(fxid);
 
 	LWLockAcquire(SerializableXactHashLock, LW_SHARED);
 	sxid = (SERIALIZABLEXID *)
@@ -4906,10 +4906,11 @@ PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit)
  * Re-acquire a predicate lock belonging to a transaction that was prepared.
  */
 void
-predicatelock_twophase_recover(TransactionId xid, uint16 info,
+predicatelock_twophase_recover(FullTransactionId fxid, uint16 info,
 							   void *recdata, uint32 len)
 {
 	TwoPhasePredicateRecord *record;
+	TransactionId xid = XidFromFullTransactionId(fxid);
 
 	Assert(len == sizeof(TwoPhasePredicateRecord));
 
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index f194e6b3dcc..e9ef0fbfe32 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -50,7 +50,6 @@
 #include "storage/procsignal.h"
 #include "storage/spin.h"
 #include "storage/standby.h"
-#include "utils/memutils.h"
 #include "utils/timeout.h"
 #include "utils/timestamp.h"
 
diff --git a/src/backend/tcop/backend_startup.c b/src/backend/tcop/backend_startup.c
index a7d1fec981f..ad0af5edc1f 100644
--- a/src/backend/tcop/backend_startup.c
+++ b/src/backend/tcop/backend_startup.c
@@ -881,7 +881,7 @@ ProcessCancelRequestPacket(Port *port, void *pkt, int pktlen)
 	{
 		ereport(COMMERROR,
 				(errcode(ERRCODE_PROTOCOL_VIOLATION),
-				 errmsg("invalid length of query cancel packet")));
+				 errmsg("invalid length of cancel request packet")));
 		return;
 	}
 	len = pktlen - offsetof(CancelRequestPacket, cancelAuthCode);
@@ -889,7 +889,7 @@ ProcessCancelRequestPacket(Port *port, void *pkt, int pktlen)
 	{
 		ereport(COMMERROR,
 				(errcode(ERRCODE_PROTOCOL_VIOLATION),
-				 errmsg("invalid length of query cancel key")));
+				 errmsg("invalid length of cancel key in cancel request packet")));
 		return;
 	}
 
@@ -1077,7 +1077,7 @@ check_log_connections(char **newval, void **extra, GucSource source)
 
 	if (!SplitIdentifierString(rawstring, ',', &elemlist))
 	{
-		GUC_check_errdetail("Invalid list syntax in parameter \"log_connections\".");
+		GUC_check_errdetail("Invalid list syntax in parameter \"%s\".", "log_connections");
 		pfree(rawstring);
 		list_free(elemlist);
 		return false;
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 37784b7816d..2f8c3d5f918 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -1682,7 +1682,7 @@ exec_bind_message(StringInfo input_message)
 	{
 		Query	   *query = lfirst_node(Query, lc);
 
-		if (query->queryId != UINT64CONST(0))
+		if (query->queryId != INT64CONST(0))
 		{
 			pgstat_report_query_id(query->queryId, false);
 			break;
@@ -2034,7 +2034,7 @@ exec_bind_message(StringInfo input_message)
 	{
 		PlannedStmt *plan = lfirst_node(PlannedStmt, lc);
 
-		if (plan->planId != UINT64CONST(0))
+		if (plan->planId != INT64CONST(0))
 		{
 			pgstat_report_plan_id(plan->planId, false);
 			break;
@@ -2174,7 +2174,7 @@ exec_execute_message(const char *portal_name, long max_rows)
 	{
 		PlannedStmt *stmt = lfirst_node(PlannedStmt, lc);
 
-		if (stmt->queryId != UINT64CONST(0))
+		if (stmt->queryId != INT64CONST(0))
 		{
 			pgstat_report_query_id(stmt->queryId, false);
 			break;
@@ -2185,7 +2185,7 @@ exec_execute_message(const char *portal_name, long max_rows)
 	{
 		PlannedStmt *stmt = lfirst_node(PlannedStmt, lc);
 
-		if (stmt->planId != UINT64CONST(0))
+		if (stmt->planId != INT64CONST(0))
 		{
 			pgstat_report_plan_id(stmt->planId, false);
 			break;
@@ -3533,9 +3533,6 @@ ProcessInterrupts(void)
 	if (LogMemoryContextPending)
 		ProcessLogMemoryContextInterrupt();
 
-	if (PublishMemoryContextPending)
-		ProcessGetMemoryContextInterrupt();
-
 	if (ParallelApplyMessagePending)
 		ProcessParallelApplyMessages();
 }
diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c
index d1593f38b35..08791b8f75e 100644
--- a/src/backend/tcop/pquery.c
+++ b/src/backend/tcop/pquery.c
@@ -1350,24 +1350,15 @@ PortalRunMulti(Portal portal,
 		PopActiveSnapshot();
 
 	/*
-	 * If a query completion data was supplied, use it.  Otherwise use the
-	 * portal's query completion data.
-	 *
-	 * Exception: Clients expect INSERT/UPDATE/DELETE tags to have counts, so
-	 * fake them with zeros.  This can happen with DO INSTEAD rules if there
-	 * is no replacement query of the same type as the original.  We print "0
-	 * 0" here because technically there is no query of the matching tag type,
-	 * and printing a non-zero count for a different query type seems wrong,
-	 * e.g.  an INSERT that does an UPDATE instead should not print "0 1" if
-	 * one row was updated.  See QueryRewrite(), step 3, for details.
+	 * If a command tag was requested and we did not fill in a run-time-
+	 * determined tag above, copy the parse-time tag from the Portal.  (There
+	 * might not be any tag there either, in edge cases such as empty prepared
+	 * statements.  That's OK.)
 	 */
-	if (qc && qc->commandTag == CMDTAG_UNKNOWN)
-	{
-		if (portal->qc.commandTag != CMDTAG_UNKNOWN)
-			CopyQueryCompletion(qc, &portal->qc);
-		/* If the caller supplied a qc, we should have set it by now. */
-		Assert(qc->commandTag != CMDTAG_UNKNOWN);
-	}
+	if (qc &&
+		qc->commandTag == CMDTAG_UNKNOWN &&
+		portal->qc.commandTag != CMDTAG_UNKNOWN)
+		CopyQueryCompletion(qc, &portal->qc);
 }
 
 /*
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 25fe3d58016..4c1faf5575c 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -943,17 +943,7 @@ standard_ProcessUtility(PlannedStmt *pstmt,
 			break;
 
 		case T_CheckPointStmt:
-			if (!has_privs_of_role(GetUserId(), ROLE_PG_CHECKPOINT))
-				ereport(ERROR,
-						(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
-				/* translator: %s is name of a SQL command, eg CHECKPOINT */
-						 errmsg("permission denied to execute %s command",
-								"CHECKPOINT"),
-						 errdetail("Only roles with privileges of the \"%s\" role may execute this command.",
-								   "pg_checkpoint")));
-
-			RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_WAIT |
-							  (RecoveryInProgress() ? 0 : CHECKPOINT_FORCE));
+			ExecCheckpoint(pstate, (CheckPointStmt *) parsetree);
 			break;
 
 			/*
@@ -1343,7 +1333,7 @@ ProcessUtilitySlow(ParseState *pstate,
 					 */
 					switch (stmt->subtype)
 					{
-						case 'T':	/* ALTER DOMAIN DEFAULT */
+						case AD_AlterDefault:
 
 							/*
 							 * Recursively alter column default for table and,
@@ -1353,30 +1343,30 @@ ProcessUtilitySlow(ParseState *pstate,
 								AlterDomainDefault(stmt->typeName,
 												   stmt->def);
 							break;
-						case 'N':	/* ALTER DOMAIN DROP NOT NULL */
+						case AD_DropNotNull:
 							address =
 								AlterDomainNotNull(stmt->typeName,
 												   false);
 							break;
-						case 'O':	/* ALTER DOMAIN SET NOT NULL */
+						case AD_SetNotNull:
 							address =
 								AlterDomainNotNull(stmt->typeName,
 												   true);
 							break;
-						case 'C':	/* ADD CONSTRAINT */
+						case AD_AddConstraint:
 							address =
 								AlterDomainAddConstraint(stmt->typeName,
 														 stmt->def,
 														 &secondaryObject);
 							break;
-						case 'X':	/* DROP CONSTRAINT */
+						case AD_DropConstraint:
 							address =
 								AlterDomainDropConstraint(stmt->typeName,
 														  stmt->name,
 														  stmt->behavior,
 														  stmt->missing_ok);
 							break;
-						case 'V':	/* VALIDATE CONSTRAINT */
+						case AD_ValidateConstraint:
 							address =
 								AlterDomainValidateConstraint(stmt->typeName,
 															  stmt->name);
diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c
index b77d8c23d36..4801fe90089 100644
--- a/src/backend/tsearch/ts_locale.c
+++ b/src/backend/tsearch/ts_locale.c
@@ -36,7 +36,7 @@ t_isalpha(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
+	locale_t	mylocale = 0;	/* TODO */
 
 	if (clen == 1 || database_ctype_is_c)
 		return isalpha(TOUCHAR(ptr));
@@ -51,7 +51,7 @@ t_isalnum(const char *ptr)
 {
 	int			clen = pg_mblen(ptr);
 	wchar_t		character[WC_BUF_LEN];
-	pg_locale_t mylocale = 0;	/* TODO */
+	locale_t	mylocale = 0;	/* TODO */
 
 	if (clen == 1 || database_ctype_is_c)
 		return isalnum(TOUCHAR(ptr));
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c
index 79bcd32a063..e2dd3da3aa3 100644
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -299,7 +299,7 @@ TParserInit(char *str, int len)
 	 */
 	if (prs->charmaxlen > 1)
 	{
-		pg_locale_t mylocale = 0;	/* TODO */
+		locale_t	mylocale = 0;	/* TODO */
 
 		prs->usewide = true;
 		if (database_ctype_is_c)
diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c
index e1576e64b6d..a290cc4c975 100644
--- a/src/backend/utils/activity/backend_status.c
+++ b/src/backend/utils/activity/backend_status.c
@@ -320,8 +320,8 @@ pgstat_bestart_initial(void)
 	lbeentry.st_state = STATE_STARTING;
 	lbeentry.st_progress_command = PROGRESS_COMMAND_INVALID;
 	lbeentry.st_progress_command_target = InvalidOid;
-	lbeentry.st_query_id = UINT64CONST(0);
-	lbeentry.st_plan_id = UINT64CONST(0);
+	lbeentry.st_query_id = INT64CONST(0);
+	lbeentry.st_plan_id = INT64CONST(0);
 
 	/*
 	 * we don't zero st_progress_param here to save cycles; nobody should
@@ -599,8 +599,8 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
 			beentry->st_activity_start_timestamp = 0;
 			/* st_xact_start_timestamp and wait_event_info are also disabled */
 			beentry->st_xact_start_timestamp = 0;
-			beentry->st_query_id = UINT64CONST(0);
-			beentry->st_plan_id = UINT64CONST(0);
+			beentry->st_query_id = INT64CONST(0);
+			beentry->st_plan_id = INT64CONST(0);
 			proc->wait_event_info = 0;
 			PGSTAT_END_WRITE_ACTIVITY(beentry);
 		}
@@ -662,8 +662,8 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
 	 */
 	if (state == STATE_RUNNING)
 	{
-		beentry->st_query_id = UINT64CONST(0);
-		beentry->st_plan_id = UINT64CONST(0);
+		beentry->st_query_id = INT64CONST(0);
+		beentry->st_plan_id = INT64CONST(0);
 	}
 
 	if (cmd_str != NULL)
@@ -683,7 +683,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
  * --------
  */
 void
-pgstat_report_query_id(uint64 query_id, bool force)
+pgstat_report_query_id(int64 query_id, bool force)
 {
 	volatile PgBackendStatus *beentry = MyBEEntry;
 
@@ -702,7 +702,7 @@ pgstat_report_query_id(uint64 query_id, bool force)
 	 * command, so ignore the one provided unless it's an explicit call to
 	 * reset the identifier.
 	 */
-	if (beentry->st_query_id != 0 && !force)
+	if (beentry->st_query_id != INT64CONST(0) && !force)
 		return;
 
 	/*
@@ -722,7 +722,7 @@ pgstat_report_query_id(uint64 query_id, bool force)
  * --------
  */
 void
-pgstat_report_plan_id(uint64 plan_id, bool force)
+pgstat_report_plan_id(int64 plan_id, bool force)
 {
 	volatile PgBackendStatus *beentry = MyBEEntry;
 
@@ -1134,7 +1134,7 @@ pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen)
  *
  * Return current backend's query identifier.
  */
-uint64
+int64
 pgstat_get_my_query_id(void)
 {
 	if (!MyBEEntry)
@@ -1154,7 +1154,7 @@ pgstat_get_my_query_id(void)
  *
  * Return current backend's plan identifier.
  */
-uint64
+int64
 pgstat_get_my_plan_id(void)
 {
 	if (!MyBEEntry)
diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c
index 28587e2916b..69df741cbf6 100644
--- a/src/backend/utils/activity/pgstat_relation.c
+++ b/src/backend/utils/activity/pgstat_relation.c
@@ -744,7 +744,7 @@ PostPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state)
  * Load the saved counts into our local pgstats state.
  */
 void
-pgstat_twophase_postcommit(TransactionId xid, uint16 info,
+pgstat_twophase_postcommit(FullTransactionId fxid, uint16 info,
 						   void *recdata, uint32 len)
 {
 	TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
@@ -780,7 +780,7 @@ pgstat_twophase_postcommit(TransactionId xid, uint16 info,
  * as aborted.
  */
 void
-pgstat_twophase_postabort(TransactionId xid, uint16 info,
+pgstat_twophase_postabort(FullTransactionId fxid, uint16 info,
 						  void *recdata, uint32 len)
 {
 	TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
diff --git a/src/backend/utils/activity/pgstat_shmem.c b/src/backend/utils/activity/pgstat_shmem.c
index 2e33293b000..53e7d534270 100644
--- a/src/backend/utils/activity/pgstat_shmem.c
+++ b/src/backend/utils/activity/pgstat_shmem.c
@@ -183,7 +183,7 @@ StatsShmemInit(void)
 		p += MAXALIGN(pgstat_dsa_init_size());
 		dsa = dsa_create_in_place(ctl->raw_dsa_area,
 								  pgstat_dsa_init_size(),
-								  LWTRANCHE_PGSTATS_DSA, 0);
+								  LWTRANCHE_PGSTATS_DSA, NULL);
 		dsa_pin(dsa);
 
 		/*
@@ -255,7 +255,8 @@ pgstat_attach_shmem(void)
 	dsa_pin_mapping(pgStatLocal.dsa);
 
 	pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
-											pgStatLocal.shmem->hash_handle, 0);
+											pgStatLocal.shmem->hash_handle,
+											NULL);
 
 	MemoryContextSwitchTo(oldcontext);
 }
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index 930321905f1..4da68312b5f 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -161,7 +161,6 @@ WAL_RECEIVER_EXIT	"Waiting for the WAL receiver to exit."
 WAL_RECEIVER_WAIT_START	"Waiting for startup process to send initial data for streaming replication."
 WAL_SUMMARY_READY	"Waiting for a new WAL summary to be generated."
 XACT_GROUP_UPDATE	"Waiting for the group leader to update transaction status at transaction end."
-MEM_CXT_PUBLISH	"Waiting for a process to publish memory information."
 
 ABI_compatibility:
 
@@ -402,6 +401,7 @@ SerialSLRU	"Waiting to access the serializable transaction conflict SLRU cache."
 SubtransSLRU	"Waiting to access the sub-transaction SLRU cache."
 XactSLRU	"Waiting to access the transaction status SLRU cache."
 ParallelVacuumDSA	"Waiting for parallel vacuum dynamic shared memory allocation."
+AioUringCompletion	"Waiting for another process to complete IO via io_uring."
 
 # No "ABI_compatibility" region here as WaitEventLWLock has its own C code.
 
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index 4a233b63c32..ffeacf2b819 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -23,6 +23,7 @@ OBJS = \
 	arrayutils.o \
 	ascii.o \
 	bool.o \
+	bytea.o \
 	cash.o \
 	char.o \
 	cryptohashfuncs.o \
diff --git a/src/backend/utils/adt/acl.c b/src/backend/utils/adt/acl.c
index ca3c5ee3df3..1213f9106d5 100644
--- a/src/backend/utils/adt/acl.c
+++ b/src/backend/utils/adt/acl.c
@@ -135,6 +135,22 @@ static void RoleMembershipCacheCallback(Datum arg, int cacheid, uint32 hashvalue
 
 
 /*
+ * Test whether an identifier char can be left unquoted in ACLs.
+ *
+ * Formerly, we used isalnum() even on non-ASCII characters, resulting in
+ * unportable behavior.  To ensure dump compatibility with old versions,
+ * we now treat high-bit-set characters as always requiring quoting during
+ * putid(), but getid() will always accept them without quotes.
+ */
+static inline bool
+is_safe_acl_char(unsigned char c, bool is_getid)
+{
+	if (IS_HIGHBIT_SET(c))
+		return is_getid;
+	return isalnum(c) || c == '_';
+}
+
+/*
  * getid
  *		Consumes the first alphanumeric string (identifier) found in string
  *		's', ignoring any leading white space.  If it finds a double quote
@@ -159,21 +175,22 @@ getid(const char *s, char *n, Node *escontext)
 
 	while (isspace((unsigned char) *s))
 		s++;
-	/* This code had better match what putid() does, below */
 	for (;
 		 *s != '\0' &&
-		 (isalnum((unsigned char) *s) ||
-		  *s == '_' ||
-		  *s == '"' ||
-		  in_quotes);
+		 (in_quotes || *s == '"' || is_safe_acl_char(*s, true));
 		 s++)
 	{
 		if (*s == '"')
 		{
+			if (!in_quotes)
+			{
+				in_quotes = true;
+				continue;
+			}
 			/* safe to look at next char (could be '\0' though) */
 			if (*(s + 1) != '"')
 			{
-				in_quotes = !in_quotes;
+				in_quotes = false;
 				continue;
 			}
 			/* it's an escaped double quote; skip the escaping char */
@@ -207,10 +224,10 @@ putid(char *p, const char *s)
 	const char *src;
 	bool		safe = true;
 
+	/* Detect whether we need to use double quotes */
 	for (src = s; *src; src++)
 	{
-		/* This test had better match what getid() does, above */
-		if (!isalnum((unsigned char) *src) && *src != '_')
+		if (!is_safe_acl_char(*src, false))
 		{
 			safe = false;
 			break;
diff --git a/src/backend/utils/adt/bytea.c b/src/backend/utils/adt/bytea.c
new file mode 100644
index 00000000000..2e539c2504e
--- /dev/null
+++ b/src/backend/utils/adt/bytea.c
@@ -0,0 +1,1143 @@
+/*-------------------------------------------------------------------------
+ *
+ * bytea.c
+ *	  Functions for the bytea type.
+ *
+ * Portions Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/adt/bytea.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/detoast.h"
+#include "catalog/pg_collation_d.h"
+#include "catalog/pg_type_d.h"
+#include "common/int.h"
+#include "fmgr.h"
+#include "libpq/pqformat.h"
+#include "port/pg_bitutils.h"
+#include "utils/builtins.h"
+#include "utils/bytea.h"
+#include "utils/fmgrprotos.h"
+#include "utils/memutils.h"
+#include "utils/sortsupport.h"
+#include "utils/varlena.h"
+#include "varatt.h"
+
+/* GUC variable */
+int			bytea_output = BYTEA_OUTPUT_HEX;
+
+static bytea *bytea_catenate(bytea *t1, bytea *t2);
+static bytea *bytea_substring(Datum str, int S, int L,
+							  bool length_not_specified);
+static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
+
+/*
+ * bytea_catenate
+ *	Guts of byteacat(), broken out so it can be used by other functions
+ *
+ * Arguments can be in short-header form, but not compressed or out-of-line
+ */
+static bytea *
+bytea_catenate(bytea *t1, bytea *t2)
+{
+	bytea	   *result;
+	int			len1,
+				len2,
+				len;
+	char	   *ptr;
+
+	len1 = VARSIZE_ANY_EXHDR(t1);
+	len2 = VARSIZE_ANY_EXHDR(t2);
+
+	/* paranoia ... probably should throw error instead? */
+	if (len1 < 0)
+		len1 = 0;
+	if (len2 < 0)
+		len2 = 0;
+
+	len = len1 + len2 + VARHDRSZ;
+	result = (bytea *) palloc(len);
+
+	/* Set size of result string... */
+	SET_VARSIZE(result, len);
+
+	/* Fill data field of result string... */
+	ptr = VARDATA(result);
+	if (len1 > 0)
+		memcpy(ptr, VARDATA_ANY(t1), len1);
+	if (len2 > 0)
+		memcpy(ptr + len1, VARDATA_ANY(t2), len2);
+
+	return result;
+}
+
+#define PG_STR_GET_BYTEA(str_) \
+	DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
+
+static bytea *
+bytea_substring(Datum str,
+				int S,
+				int L,
+				bool length_not_specified)
+{
+	int32		S1;				/* adjusted start position */
+	int32		L1;				/* adjusted substring length */
+	int32		E;				/* end position */
+
+	/*
+	 * The logic here should generally match text_substring().
+	 */
+	S1 = Max(S, 1);
+
+	if (length_not_specified)
+	{
+		/*
+		 * Not passed a length - DatumGetByteaPSlice() grabs everything to the
+		 * end of the string if we pass it a negative value for length.
+		 */
+		L1 = -1;
+	}
+	else if (L < 0)
+	{
+		/* SQL99 says to throw an error for E < S, i.e., negative length */
+		ereport(ERROR,
+				(errcode(ERRCODE_SUBSTRING_ERROR),
+				 errmsg("negative substring length not allowed")));
+		L1 = -1;				/* silence stupider compilers */
+	}
+	else if (pg_add_s32_overflow(S, L, &E))
+	{
+		/*
+		 * L could be large enough for S + L to overflow, in which case the
+		 * substring must run to end of string.
+		 */
+		L1 = -1;
+	}
+	else
+	{
+		/*
+		 * A zero or negative value for the end position can happen if the
+		 * start was negative or one. SQL99 says to return a zero-length
+		 * string.
+		 */
+		if (E < 1)
+			return PG_STR_GET_BYTEA("");
+
+		L1 = E - S1;
+	}
+
+	/*
+	 * If the start position is past the end of the string, SQL99 says to
+	 * return a zero-length string -- DatumGetByteaPSlice() will do that for
+	 * us.  We need only convert S1 to zero-based starting position.
+	 */
+	return DatumGetByteaPSlice(str, S1 - 1, L1);
+}
+
+static bytea *
+bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
+{
+	bytea	   *result;
+	bytea	   *s1;
+	bytea	   *s2;
+	int			sp_pl_sl;
+
+	/*
+	 * Check for possible integer-overflow cases.  For negative sp, throw a
+	 * "substring length" error because that's what should be expected
+	 * according to the spec's definition of OVERLAY().
+	 */
+	if (sp <= 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_SUBSTRING_ERROR),
+				 errmsg("negative substring length not allowed")));
+	if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
+		ereport(ERROR,
+				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+				 errmsg("integer out of range")));
+
+	s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
+	s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
+	result = bytea_catenate(s1, t2);
+	result = bytea_catenate(result, s2);
+
+	return result;
+}
+
+/*****************************************************************************
+ *	 USER I/O ROUTINES														 *
+ *****************************************************************************/
+
+#define VAL(CH)			((CH) - '0')
+#define DIG(VAL)		((VAL) + '0')
+
+/*
+ *		byteain			- converts from printable representation of byte array
+ *
+ *		Non-printable characters must be passed as '\nnn' (octal) and are
+ *		converted to internal form.  '\' must be passed as '\\'.
+ *		ereport(ERROR, ...) if bad form.
+ *
+ *		BUGS:
+ *				The input is scanned twice.
+ *				The error checking of input is minimal.
+ */
+Datum
+byteain(PG_FUNCTION_ARGS)
+{
+	char	   *inputText = PG_GETARG_CSTRING(0);
+	Node	   *escontext = fcinfo->context;
+	char	   *tp;
+	char	   *rp;
+	int			bc;
+	bytea	   *result;
+
+	/* Recognize hex input */
+	if (inputText[0] == '\\' && inputText[1] == 'x')
+	{
+		size_t		len = strlen(inputText);
+
+		bc = (len - 2) / 2 + VARHDRSZ;	/* maximum possible length */
+		result = palloc(bc);
+		bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
+							 escontext);
+		SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
+
+		PG_RETURN_BYTEA_P(result);
+	}
+
+	/* Else, it's the traditional escaped style */
+	for (bc = 0, tp = inputText; *tp != '\0'; bc++)
+	{
+		if (tp[0] != '\\')
+			tp++;
+		else if ((tp[0] == '\\') &&
+				 (tp[1] >= '0' && tp[1] <= '3') &&
+				 (tp[2] >= '0' && tp[2] <= '7') &&
+				 (tp[3] >= '0' && tp[3] <= '7'))
+			tp += 4;
+		else if ((tp[0] == '\\') &&
+				 (tp[1] == '\\'))
+			tp += 2;
+		else
+		{
+			/*
+			 * one backslash, not followed by another or ### valid octal
+			 */
+			ereturn(escontext, (Datum) 0,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type %s", "bytea")));
+		}
+	}
+
+	bc += VARHDRSZ;
+
+	result = (bytea *) palloc(bc);
+	SET_VARSIZE(result, bc);
+
+	tp = inputText;
+	rp = VARDATA(result);
+	while (*tp != '\0')
+	{
+		if (tp[0] != '\\')
+			*rp++ = *tp++;
+		else if ((tp[0] == '\\') &&
+				 (tp[1] >= '0' && tp[1] <= '3') &&
+				 (tp[2] >= '0' && tp[2] <= '7') &&
+				 (tp[3] >= '0' && tp[3] <= '7'))
+		{
+			bc = VAL(tp[1]);
+			bc <<= 3;
+			bc += VAL(tp[2]);
+			bc <<= 3;
+			*rp++ = bc + VAL(tp[3]);
+
+			tp += 4;
+		}
+		else if ((tp[0] == '\\') &&
+				 (tp[1] == '\\'))
+		{
+			*rp++ = '\\';
+			tp += 2;
+		}
+		else
+		{
+			/*
+			 * We should never get here. The first pass should not allow it.
+			 */
+			ereturn(escontext, (Datum) 0,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type %s", "bytea")));
+		}
+	}
+
+	PG_RETURN_BYTEA_P(result);
+}
+
+/*
+ *		byteaout		- converts to printable representation of byte array
+ *
+ *		In the traditional escaped format, non-printable characters are
+ *		printed as '\nnn' (octal) and '\' as '\\'.
+ */
+Datum
+byteaout(PG_FUNCTION_ARGS)
+{
+	bytea	   *vlena = PG_GETARG_BYTEA_PP(0);
+	char	   *result;
+	char	   *rp;
+
+	if (bytea_output == BYTEA_OUTPUT_HEX)
+	{
+		/* Print hex format */
+		rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
+		*rp++ = '\\';
+		*rp++ = 'x';
+		rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
+	}
+	else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
+	{
+		/* Print traditional escaped format */
+		char	   *vp;
+		uint64		len;
+		int			i;
+
+		len = 1;				/* empty string has 1 char */
+		vp = VARDATA_ANY(vlena);
+		for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
+		{
+			if (*vp == '\\')
+				len += 2;
+			else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
+				len += 4;
+			else
+				len++;
+		}
+
+		/*
+		 * In principle len can't overflow uint32 if the input fit in 1GB, but
+		 * for safety let's check rather than relying on palloc's internal
+		 * check.
+		 */
+		if (len > MaxAllocSize)
+			ereport(ERROR,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+					 errmsg_internal("result of bytea output conversion is too large")));
+		rp = result = (char *) palloc(len);
+
+		vp = VARDATA_ANY(vlena);
+		for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
+		{
+			if (*vp == '\\')
+			{
+				*rp++ = '\\';
+				*rp++ = '\\';
+			}
+			else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
+			{
+				int			val;	/* holds unprintable chars */
+
+				val = *vp;
+				rp[0] = '\\';
+				rp[3] = DIG(val & 07);
+				val >>= 3;
+				rp[2] = DIG(val & 07);
+				val >>= 3;
+				rp[1] = DIG(val & 03);
+				rp += 4;
+			}
+			else
+				*rp++ = *vp;
+		}
+	}
+	else
+	{
+		elog(ERROR, "unrecognized \"bytea_output\" setting: %d",
+			 bytea_output);
+		rp = result = NULL;		/* keep compiler quiet */
+	}
+	*rp = '\0';
+	PG_RETURN_CSTRING(result);
+}
+
+/*
+ *		bytearecv			- converts external binary format to bytea
+ */
+Datum
+bytearecv(PG_FUNCTION_ARGS)
+{
+	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
+	bytea	   *result;
+	int			nbytes;
+
+	nbytes = buf->len - buf->cursor;
+	result = (bytea *) palloc(nbytes + VARHDRSZ);
+	SET_VARSIZE(result, nbytes + VARHDRSZ);
+	pq_copymsgbytes(buf, VARDATA(result), nbytes);
+	PG_RETURN_BYTEA_P(result);
+}
+
+/*
+ *		byteasend			- converts bytea to binary format
+ *
+ * This is a special case: just copy the input...
+ */
+Datum
+byteasend(PG_FUNCTION_ARGS)
+{
+	bytea	   *vlena = PG_GETARG_BYTEA_P_COPY(0);
+
+	PG_RETURN_BYTEA_P(vlena);
+}
+
+Datum
+bytea_string_agg_transfn(PG_FUNCTION_ARGS)
+{
+	StringInfo	state;
+
+	state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
+
+	/* Append the value unless null, preceding it with the delimiter. */
+	if (!PG_ARGISNULL(1))
+	{
+		bytea	   *value = PG_GETARG_BYTEA_PP(1);
+		bool		isfirst = false;
+
+		/*
+		 * You might think we can just throw away the first delimiter, however
+		 * we must keep it as we may be a parallel worker doing partial
+		 * aggregation building a state to send to the main process.  We need
+		 * to keep the delimiter of every aggregation so that the combine
+		 * function can properly join up the strings of two separately
+		 * partially aggregated results.  The first delimiter is only stripped
+		 * off in the final function.  To know how much to strip off the front
+		 * of the string, we store the length of the first delimiter in the
+		 * StringInfo's cursor field, which we don't otherwise need here.
+		 */
+		if (state == NULL)
+		{
+			MemoryContext aggcontext;
+			MemoryContext oldcontext;
+
+			if (!AggCheckCallContext(fcinfo, &aggcontext))
+			{
+				/* cannot be called directly because of internal-type argument */
+				elog(ERROR, "bytea_string_agg_transfn called in non-aggregate context");
+			}
+
+			/*
+			 * Create state in aggregate context.  It'll stay there across
+			 * subsequent calls.
+			 */
+			oldcontext = MemoryContextSwitchTo(aggcontext);
+			state = makeStringInfo();
+			MemoryContextSwitchTo(oldcontext);
+
+			isfirst = true;
+		}
+
+		if (!PG_ARGISNULL(2))
+		{
+			bytea	   *delim = PG_GETARG_BYTEA_PP(2);
+
+			appendBinaryStringInfo(state, VARDATA_ANY(delim),
+								   VARSIZE_ANY_EXHDR(delim));
+			if (isfirst)
+				state->cursor = VARSIZE_ANY_EXHDR(delim);
+		}
+
+		appendBinaryStringInfo(state, VARDATA_ANY(value),
+							   VARSIZE_ANY_EXHDR(value));
+	}
+
+	/*
+	 * The transition type for string_agg() is declared to be "internal",
+	 * which is a pass-by-value type the same size as a pointer.
+	 */
+	if (state)
+		PG_RETURN_POINTER(state);
+	PG_RETURN_NULL();
+}
+
+Datum
+bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
+{
+	StringInfo	state;
+
+	/* cannot be called directly because of internal-type argument */
+	Assert(AggCheckCallContext(fcinfo, NULL));
+
+	state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
+
+	if (state != NULL)
+	{
+		/* As per comment in transfn, strip data before the cursor position */
+		bytea	   *result;
+		int			strippedlen = state->len - state->cursor;
+
+		result = (bytea *) palloc(strippedlen + VARHDRSZ);
+		SET_VARSIZE(result, strippedlen + VARHDRSZ);
+		memcpy(VARDATA(result), &state->data[state->cursor], strippedlen);
+		PG_RETURN_BYTEA_P(result);
+	}
+	else
+		PG_RETURN_NULL();
+}
+
+/*-------------------------------------------------------------
+ * byteaoctetlen
+ *
+ * get the number of bytes contained in an instance of type 'bytea'
+ *-------------------------------------------------------------
+ */
+Datum
+byteaoctetlen(PG_FUNCTION_ARGS)
+{
+	Datum		str = PG_GETARG_DATUM(0);
+
+	/* We need not detoast the input at all */
+	PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
+}
+
+/*
+ * byteacat -
+ *	  takes two bytea* and returns a bytea* that is the concatenation of
+ *	  the two.
+ *
+ * Cloned from textcat and modified as required.
+ */
+Datum
+byteacat(PG_FUNCTION_ARGS)
+{
+	bytea	   *t1 = PG_GETARG_BYTEA_PP(0);
+	bytea	   *t2 = PG_GETARG_BYTEA_PP(1);
+
+	PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
+}
+
+/*
+ * byteaoverlay
+ *	Replace specified substring of first string with second
+ *
+ * The SQL standard defines OVERLAY() in terms of substring and concatenation.
+ * This code is a direct implementation of what the standard says.
+ */
+Datum
+byteaoverlay(PG_FUNCTION_ARGS)
+{
+	bytea	   *t1 = PG_GETARG_BYTEA_PP(0);
+	bytea	   *t2 = PG_GETARG_BYTEA_PP(1);
+	int			sp = PG_GETARG_INT32(2);	/* substring start position */
+	int			sl = PG_GETARG_INT32(3);	/* substring length */
+
+	PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
+}
+
+Datum
+byteaoverlay_no_len(PG_FUNCTION_ARGS)
+{
+	bytea	   *t1 = PG_GETARG_BYTEA_PP(0);
+	bytea	   *t2 = PG_GETARG_BYTEA_PP(1);
+	int			sp = PG_GETARG_INT32(2);	/* substring start position */
+	int			sl;
+
+	sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
+	PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
+}
+
+/*
+ * bytea_substr()
+ * Return a substring starting at the specified position.
+ * Cloned from text_substr and modified as required.
+ *
+ * Input:
+ *	- string
+ *	- starting position (is one-based)
+ *	- string length (optional)
+ *
+ * If the starting position is zero or less, then return from the start of the string
+ * adjusting the length to be consistent with the "negative start" per SQL.
+ * If the length is less than zero, an ERROR is thrown. If no third argument
+ * (length) is provided, the length to the end of the string is assumed.
+ */
+Datum
+bytea_substr(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
+									  PG_GETARG_INT32(1),
+									  PG_GETARG_INT32(2),
+									  false));
+}
+
+/*
+ * bytea_substr_no_len -
+ *	  Wrapper to avoid opr_sanity failure due to
+ *	  one function accepting a different number of args.
+ */
+Datum
+bytea_substr_no_len(PG_FUNCTION_ARGS)
+{
+	PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
+									  PG_GETARG_INT32(1),
+									  -1,
+									  true));
+}
+
+/*
+ * bit_count
+ */
+Datum
+bytea_bit_count(PG_FUNCTION_ARGS)
+{
+	bytea	   *t1 = PG_GETARG_BYTEA_PP(0);
+
+	PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
+}
+
+/*
+ * byteapos -
+ *	  Return the position of the specified substring.
+ *	  Implements the SQL POSITION() function.
+ * Cloned from textpos and modified as required.
+ */
+Datum
+byteapos(PG_FUNCTION_ARGS)
+{
+	bytea	   *t1 = PG_GETARG_BYTEA_PP(0);
+	bytea	   *t2 = PG_GETARG_BYTEA_PP(1);
+	int			pos;
+	int			px,
+				p;
+	int			len1,
+				len2;
+	char	   *p1,
+			   *p2;
+
+	len1 = VARSIZE_ANY_EXHDR(t1);
+	len2 = VARSIZE_ANY_EXHDR(t2);
+
+	if (len2 <= 0)
+		PG_RETURN_INT32(1);		/* result for empty pattern */
+
+	p1 = VARDATA_ANY(t1);
+	p2 = VARDATA_ANY(t2);
+
+	pos = 0;
+	px = (len1 - len2);
+	for (p = 0; p <= px; p++)
+	{
+		if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
+		{
+			pos = p + 1;
+			break;
+		};
+		p1++;
+	};
+
+	PG_RETURN_INT32(pos);
+}
+
+/*-------------------------------------------------------------
+ * byteaGetByte
+ *
+ * this routine treats "bytea" as an array of bytes.
+ * It returns the Nth byte (a number between 0 and 255).
+ *-------------------------------------------------------------
+ */
+Datum
+byteaGetByte(PG_FUNCTION_ARGS)
+{
+	bytea	   *v = PG_GETARG_BYTEA_PP(0);
+	int32		n = PG_GETARG_INT32(1);
+	int			len;
+	int			byte;
+
+	len = VARSIZE_ANY_EXHDR(v);
+
+	if (n < 0 || n >= len)
+		ereport(ERROR,
+				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+				 errmsg("index %d out of valid range, 0..%d",
+						n, len - 1)));
+
+	byte = ((unsigned char *) VARDATA_ANY(v))[n];
+
+	PG_RETURN_INT32(byte);
+}
+
+/*-------------------------------------------------------------
+ * byteaGetBit
+ *
+ * This routine treats a "bytea" type like an array of bits.
+ * It returns the value of the Nth bit (0 or 1).
+ *
+ *-------------------------------------------------------------
+ */
+Datum
+byteaGetBit(PG_FUNCTION_ARGS)
+{
+	bytea	   *v = PG_GETARG_BYTEA_PP(0);
+	int64		n = PG_GETARG_INT64(1);
+	int			byteNo,
+				bitNo;
+	int			len;
+	int			byte;
+
+	len = VARSIZE_ANY_EXHDR(v);
+
+	if (n < 0 || n >= (int64) len * 8)
+		ereport(ERROR,
+				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+				 errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
+						n, (int64) len * 8 - 1)));
+
+	/* n/8 is now known < len, so safe to cast to int */
+	byteNo = (int) (n / 8);
+	bitNo = (int) (n % 8);
+
+	byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
+
+	if (byte & (1 << bitNo))
+		PG_RETURN_INT32(1);
+	else
+		PG_RETURN_INT32(0);
+}
+
+/*-------------------------------------------------------------
+ * byteaSetByte
+ *
+ * Given an instance of type 'bytea' creates a new one with
+ * the Nth byte set to the given value.
+ *
+ *-------------------------------------------------------------
+ */
+Datum
+byteaSetByte(PG_FUNCTION_ARGS)
+{
+	bytea	   *res = PG_GETARG_BYTEA_P_COPY(0);
+	int32		n = PG_GETARG_INT32(1);
+	int32		newByte = PG_GETARG_INT32(2);
+	int			len;
+
+	len = VARSIZE(res) - VARHDRSZ;
+
+	if (n < 0 || n >= len)
+		ereport(ERROR,
+				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+				 errmsg("index %d out of valid range, 0..%d",
+						n, len - 1)));
+
+	/*
+	 * Now set the byte.
+	 */
+	((unsigned char *) VARDATA(res))[n] = newByte;
+
+	PG_RETURN_BYTEA_P(res);
+}
+
+/*-------------------------------------------------------------
+ * byteaSetBit
+ *
+ * Given an instance of type 'bytea' creates a new one with
+ * the Nth bit set to the given value.
+ *
+ *-------------------------------------------------------------
+ */
+Datum
+byteaSetBit(PG_FUNCTION_ARGS)
+{
+	bytea	   *res = PG_GETARG_BYTEA_P_COPY(0);
+	int64		n = PG_GETARG_INT64(1);
+	int32		newBit = PG_GETARG_INT32(2);
+	int			len;
+	int			oldByte,
+				newByte;
+	int			byteNo,
+				bitNo;
+
+	len = VARSIZE(res) - VARHDRSZ;
+
+	if (n < 0 || n >= (int64) len * 8)
+		ereport(ERROR,
+				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+				 errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
+						n, (int64) len * 8 - 1)));
+
+	/* n/8 is now known < len, so safe to cast to int */
+	byteNo = (int) (n / 8);
+	bitNo = (int) (n % 8);
+
+	/*
+	 * sanity check!
+	 */
+	if (newBit != 0 && newBit != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("new bit must be 0 or 1")));
+
+	/*
+	 * Update the byte.
+	 */
+	oldByte = ((unsigned char *) VARDATA(res))[byteNo];
+
+	if (newBit == 0)
+		newByte = oldByte & (~(1 << bitNo));
+	else
+		newByte = oldByte | (1 << bitNo);
+
+	((unsigned char *) VARDATA(res))[byteNo] = newByte;
+
+	PG_RETURN_BYTEA_P(res);
+}
+
+/*
+ * Return reversed bytea
+ */
+Datum
+bytea_reverse(PG_FUNCTION_ARGS)
+{
+	bytea	   *v = PG_GETARG_BYTEA_PP(0);
+	const char *p = VARDATA_ANY(v);
+	int			len = VARSIZE_ANY_EXHDR(v);
+	const char *endp = p + len;
+	bytea	   *result = palloc(len + VARHDRSZ);
+	char	   *dst = (char *) VARDATA(result) + len;
+
+	SET_VARSIZE(result, len + VARHDRSZ);
+
+	while (p < endp)
+		*(--dst) = *p++;
+
+	PG_RETURN_BYTEA_P(result);
+}
+
+
+/*****************************************************************************
+ *	Comparison Functions used for bytea
+ *
+ * Note: btree indexes need these routines not to leak memory; therefore,
+ * be careful to free working copies of toasted datums.  Most places don't
+ * need to be so careful.
+ *****************************************************************************/
+
+Datum
+byteaeq(PG_FUNCTION_ARGS)
+{
+	Datum		arg1 = PG_GETARG_DATUM(0);
+	Datum		arg2 = PG_GETARG_DATUM(1);
+	bool		result;
+	Size		len1,
+				len2;
+
+	/*
+	 * We can use a fast path for unequal lengths, which might save us from
+	 * having to detoast one or both values.
+	 */
+	len1 = toast_raw_datum_size(arg1);
+	len2 = toast_raw_datum_size(arg2);
+	if (len1 != len2)
+		result = false;
+	else
+	{
+		bytea	   *barg1 = DatumGetByteaPP(arg1);
+		bytea	   *barg2 = DatumGetByteaPP(arg2);
+
+		result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
+						 len1 - VARHDRSZ) == 0);
+
+		PG_FREE_IF_COPY(barg1, 0);
+		PG_FREE_IF_COPY(barg2, 1);
+	}
+
+	PG_RETURN_BOOL(result);
+}
+
+Datum
+byteane(PG_FUNCTION_ARGS)
+{
+	Datum		arg1 = PG_GETARG_DATUM(0);
+	Datum		arg2 = PG_GETARG_DATUM(1);
+	bool		result;
+	Size		len1,
+				len2;
+
+	/*
+	 * We can use a fast path for unequal lengths, which might save us from
+	 * having to detoast one or both values.
+	 */
+	len1 = toast_raw_datum_size(arg1);
+	len2 = toast_raw_datum_size(arg2);
+	if (len1 != len2)
+		result = true;
+	else
+	{
+		bytea	   *barg1 = DatumGetByteaPP(arg1);
+		bytea	   *barg2 = DatumGetByteaPP(arg2);
+
+		result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
+						 len1 - VARHDRSZ) != 0);
+
+		PG_FREE_IF_COPY(barg1, 0);
+		PG_FREE_IF_COPY(barg2, 1);
+	}
+
+	PG_RETURN_BOOL(result);
+}
+
+Datum
+bytealt(PG_FUNCTION_ARGS)
+{
+	bytea	   *arg1 = PG_GETARG_BYTEA_PP(0);
+	bytea	   *arg2 = PG_GETARG_BYTEA_PP(1);
+	int			len1,
+				len2;
+	int			cmp;
+
+	len1 = VARSIZE_ANY_EXHDR(arg1);
+	len2 = VARSIZE_ANY_EXHDR(arg2);
+
+	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+
+	PG_FREE_IF_COPY(arg1, 0);
+	PG_FREE_IF_COPY(arg2, 1);
+
+	PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
+}
+
+Datum
+byteale(PG_FUNCTION_ARGS)
+{
+	bytea	   *arg1 = PG_GETARG_BYTEA_PP(0);
+	bytea	   *arg2 = PG_GETARG_BYTEA_PP(1);
+	int			len1,
+				len2;
+	int			cmp;
+
+	len1 = VARSIZE_ANY_EXHDR(arg1);
+	len2 = VARSIZE_ANY_EXHDR(arg2);
+
+	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+
+	PG_FREE_IF_COPY(arg1, 0);
+	PG_FREE_IF_COPY(arg2, 1);
+
+	PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
+}
+
+Datum
+byteagt(PG_FUNCTION_ARGS)
+{
+	bytea	   *arg1 = PG_GETARG_BYTEA_PP(0);
+	bytea	   *arg2 = PG_GETARG_BYTEA_PP(1);
+	int			len1,
+				len2;
+	int			cmp;
+
+	len1 = VARSIZE_ANY_EXHDR(arg1);
+	len2 = VARSIZE_ANY_EXHDR(arg2);
+
+	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+
+	PG_FREE_IF_COPY(arg1, 0);
+	PG_FREE_IF_COPY(arg2, 1);
+
+	PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
+}
+
+Datum
+byteage(PG_FUNCTION_ARGS)
+{
+	bytea	   *arg1 = PG_GETARG_BYTEA_PP(0);
+	bytea	   *arg2 = PG_GETARG_BYTEA_PP(1);
+	int			len1,
+				len2;
+	int			cmp;
+
+	len1 = VARSIZE_ANY_EXHDR(arg1);
+	len2 = VARSIZE_ANY_EXHDR(arg2);
+
+	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+
+	PG_FREE_IF_COPY(arg1, 0);
+	PG_FREE_IF_COPY(arg2, 1);
+
+	PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
+}
+
+Datum
+byteacmp(PG_FUNCTION_ARGS)
+{
+	bytea	   *arg1 = PG_GETARG_BYTEA_PP(0);
+	bytea	   *arg2 = PG_GETARG_BYTEA_PP(1);
+	int			len1,
+				len2;
+	int			cmp;
+
+	len1 = VARSIZE_ANY_EXHDR(arg1);
+	len2 = VARSIZE_ANY_EXHDR(arg2);
+
+	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+	if ((cmp == 0) && (len1 != len2))
+		cmp = (len1 < len2) ? -1 : 1;
+
+	PG_FREE_IF_COPY(arg1, 0);
+	PG_FREE_IF_COPY(arg2, 1);
+
+	PG_RETURN_INT32(cmp);
+}
+
+Datum
+bytea_larger(PG_FUNCTION_ARGS)
+{
+	bytea	   *arg1 = PG_GETARG_BYTEA_PP(0);
+	bytea	   *arg2 = PG_GETARG_BYTEA_PP(1);
+	bytea	   *result;
+	int			len1,
+				len2;
+	int			cmp;
+
+	len1 = VARSIZE_ANY_EXHDR(arg1);
+	len2 = VARSIZE_ANY_EXHDR(arg2);
+
+	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+	result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
+
+	PG_RETURN_BYTEA_P(result);
+}
+
+Datum
+bytea_smaller(PG_FUNCTION_ARGS)
+{
+	bytea	   *arg1 = PG_GETARG_BYTEA_PP(0);
+	bytea	   *arg2 = PG_GETARG_BYTEA_PP(1);
+	bytea	   *result;
+	int			len1,
+				len2;
+	int			cmp;
+
+	len1 = VARSIZE_ANY_EXHDR(arg1);
+	len2 = VARSIZE_ANY_EXHDR(arg2);
+
+	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+	result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
+
+	PG_RETURN_BYTEA_P(result);
+}
+
+Datum
+bytea_sortsupport(PG_FUNCTION_ARGS)
+{
+	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+	MemoryContext oldcontext;
+
+	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+	/* Use generic string SortSupport, forcing "C" collation */
+	varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
+
+	MemoryContextSwitchTo(oldcontext);
+
+	PG_RETURN_VOID();
+}
+
+/* Cast bytea -> int2 */
+Datum
+bytea_int2(PG_FUNCTION_ARGS)
+{
+	bytea	   *v = PG_GETARG_BYTEA_PP(0);
+	int			len = VARSIZE_ANY_EXHDR(v);
+	uint16		result;
+
+	/* Check that the byte array is not too long */
+	if (len > sizeof(result))
+		ereport(ERROR,
+				errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+				errmsg("smallint out of range"));
+
+	/* Convert it to an integer; most significant bytes come first */
+	result = 0;
+	for (int i = 0; i < len; i++)
+	{
+		result <<= BITS_PER_BYTE;
+		result |= ((unsigned char *) VARDATA_ANY(v))[i];
+	}
+
+	PG_RETURN_INT16(result);
+}
+
+/* Cast bytea -> int4 */
+Datum
+bytea_int4(PG_FUNCTION_ARGS)
+{
+	bytea	   *v = PG_GETARG_BYTEA_PP(0);
+	int			len = VARSIZE_ANY_EXHDR(v);
+	uint32		result;
+
+	/* Check that the byte array is not too long */
+	if (len > sizeof(result))
+		ereport(ERROR,
+				errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+				errmsg("integer out of range"));
+
+	/* Convert it to an integer; most significant bytes come first */
+	result = 0;
+	for (int i = 0; i < len; i++)
+	{
+		result <<= BITS_PER_BYTE;
+		result |= ((unsigned char *) VARDATA_ANY(v))[i];
+	}
+
+	PG_RETURN_INT32(result);
+}
+
+/* Cast bytea -> int8 */
+Datum
+bytea_int8(PG_FUNCTION_ARGS)
+{
+	bytea	   *v = PG_GETARG_BYTEA_PP(0);
+	int			len = VARSIZE_ANY_EXHDR(v);
+	uint64		result;
+
+	/* Check that the byte array is not too long */
+	if (len > sizeof(result))
+		ereport(ERROR,
+				errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+				errmsg("bigint out of range"));
+
+	/* Convert it to an integer; most significant bytes come first */
+	result = 0;
+	for (int i = 0; i < len; i++)
+	{
+		result <<= BITS_PER_BYTE;
+		result |= ((unsigned char *) VARDATA_ANY(v))[i];
+	}
+
+	PG_RETURN_INT64(result);
+}
+
+/* Cast int2 -> bytea; can just use int2send() */
+Datum
+int2_bytea(PG_FUNCTION_ARGS)
+{
+	return int2send(fcinfo);
+}
+
+/* Cast int4 -> bytea; can just use int4send() */
+Datum
+int4_bytea(PG_FUNCTION_ARGS)
+{
+	return int4send(fcinfo);
+}
+
+/* Cast int8 -> bytea; can just use int8send() */
+Datum
+int8_bytea(PG_FUNCTION_ARGS)
+{
+	return int8send(fcinfo);
+}
diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index 4227ab1a72b..344f58b92f7 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -1363,10 +1363,35 @@ timestamp_date(PG_FUNCTION_ARGS)
 {
 	Timestamp	timestamp = PG_GETARG_TIMESTAMP(0);
 	DateADT		result;
+
+	result = timestamp2date_opt_overflow(timestamp, NULL);
+	PG_RETURN_DATEADT(result);
+}
+
+/*
+ * Convert timestamp to date.
+ *
+ * On successful conversion, *overflow is set to zero if it's not NULL.
+ *
+ * If the timestamp is finite but out of the valid range for date, then:
+ * if overflow is NULL, we throw an out-of-range error.
+ * if overflow is not NULL, we store +1 or -1 there to indicate the sign
+ * of the overflow, and return the appropriate date infinity.
+ *
+ * Note: given the ranges of the types, overflow is only possible at
+ * the minimum end of the range, but we don't assume that in this code.
+ */
+DateADT
+timestamp2date_opt_overflow(Timestamp timestamp, int *overflow)
+{
+	DateADT		result;
 	struct pg_tm tt,
 			   *tm = &tt;
 	fsec_t		fsec;
 
+	if (overflow)
+		*overflow = 0;
+
 	if (TIMESTAMP_IS_NOBEGIN(timestamp))
 		DATE_NOBEGIN(result);
 	else if (TIMESTAMP_IS_NOEND(timestamp))
@@ -1374,14 +1399,30 @@ timestamp_date(PG_FUNCTION_ARGS)
 	else
 	{
 		if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0)
+		{
+			if (overflow)
+			{
+				if (timestamp < 0)
+				{
+					*overflow = -1;
+					DATE_NOBEGIN(result);
+				}
+				else
+				{
+					*overflow = 1;	/* not actually reachable */
+					DATE_NOEND(result);
+				}
+				return result;
+			}
 			ereport(ERROR,
 					(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
 					 errmsg("timestamp out of range")));
+		}
 
 		result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE;
 	}
 
-	PG_RETURN_DATEADT(result);
+	return result;
 }
 
 
@@ -1408,11 +1449,36 @@ timestamptz_date(PG_FUNCTION_ARGS)
 {
 	TimestampTz timestamp = PG_GETARG_TIMESTAMP(0);
 	DateADT		result;
+
+	result = timestamptz2date_opt_overflow(timestamp, NULL);
+	PG_RETURN_DATEADT(result);
+}
+
+/*
+ * Convert timestamptz to date.
+ *
+ * On successful conversion, *overflow is set to zero if it's not NULL.
+ *
+ * If the timestamptz is finite but out of the valid range for date, then:
+ * if overflow is NULL, we throw an out-of-range error.
+ * if overflow is not NULL, we store +1 or -1 there to indicate the sign
+ * of the overflow, and return the appropriate date infinity.
+ *
+ * Note: given the ranges of the types, overflow is only possible at
+ * the minimum end of the range, but we don't assume that in this code.
+ */
+DateADT
+timestamptz2date_opt_overflow(TimestampTz timestamp, int *overflow)
+{
+	DateADT		result;
 	struct pg_tm tt,
 			   *tm = &tt;
 	fsec_t		fsec;
 	int			tz;
 
+	if (overflow)
+		*overflow = 0;
+
 	if (TIMESTAMP_IS_NOBEGIN(timestamp))
 		DATE_NOBEGIN(result);
 	else if (TIMESTAMP_IS_NOEND(timestamp))
@@ -1420,14 +1486,30 @@ timestamptz_date(PG_FUNCTION_ARGS)
 	else
 	{
 		if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0)
+		{
+			if (overflow)
+			{
+				if (timestamp < 0)
+				{
+					*overflow = -1;
+					DATE_NOBEGIN(result);
+				}
+				else
+				{
+					*overflow = 1;	/* not actually reachable */
+					DATE_NOEND(result);
+				}
+				return result;
+			}
 			ereport(ERROR,
 					(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
 					 errmsg("timestamp out of range")));
+		}
 
 		result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE;
 	}
 
-	PG_RETURN_DATEADT(result);
+	return result;
 }
 
 
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 793d8a9adcc..680fee2a844 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -702,9 +702,18 @@ ParseFraction(char *cp, double *frac)
 	}
 	else
 	{
+		/*
+		 * On the other hand, let's reject anything that's not digits after
+		 * the ".".  strtod is happy with input like ".123e9", but that'd
+		 * break callers' expectation that the result is in 0..1.  (It's quite
+		 * difficult to get here with such input, but not impossible.)
+		 */
+		if (strspn(cp + 1, "0123456789") != strlen(cp + 1))
+			return DTERR_BAD_FORMAT;
+
 		errno = 0;
 		*frac = strtod(cp, &cp);
-		/* check for parse failure */
+		/* check for parse failure (probably redundant given prior check) */
 		if (*cp != '\0' || errno != 0)
 			return DTERR_BAD_FORMAT;
 	}
@@ -2959,30 +2968,27 @@ DecodeNumberField(int len, char *str, int fmask,
 	char	   *cp;
 
 	/*
+	 * This function was originally meant to cope only with DTK_NUMBER fields,
+	 * but we now sometimes abuse it to parse (parts of) DTK_DATE fields,
+	 * which can contain letters and other punctuation.  Reject if it's not a
+	 * valid DTK_NUMBER, that is digits and decimal point(s).  (ParseFraction
+	 * will reject if there's more than one decimal point.)
+	 */
+	if (strspn(str, "0123456789.") != len)
+		return DTERR_BAD_FORMAT;
+
+	/*
 	 * Have a decimal point? Then this is a date or something with a seconds
 	 * field...
 	 */
 	if ((cp = strchr(str, '.')) != NULL)
 	{
-		/*
-		 * Can we use ParseFractionalSecond here?  Not clear whether trailing
-		 * junk should be rejected ...
-		 */
-		if (cp[1] == '\0')
-		{
-			/* avoid assuming that strtod will accept "." */
-			*fsec = 0;
-		}
-		else
-		{
-			double		frac;
+		int			dterr;
 
-			errno = 0;
-			frac = strtod(cp, NULL);
-			if (errno != 0)
-				return DTERR_BAD_FORMAT;
-			*fsec = rint(frac * 1000000);
-		}
+		/* Convert the fraction and store at *fsec */
+		dterr = ParseFractionalSecond(cp, fsec);
+		if (dterr)
+			return dterr;
 		/* Now truncate off the fraction for further processing */
 		*cp = '\0';
 		len = strlen(str);
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
index 6d20ae07ae7..7b97d2be6ca 100644
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -4065,10 +4065,11 @@ float84ge(PG_FUNCTION_ARGS)
  * in the histogram. width_bucket() returns an integer indicating the
  * bucket number that 'operand' belongs to in an equiwidth histogram
  * with the specified characteristics. An operand smaller than the
- * lower bound is assigned to bucket 0. An operand greater than the
- * upper bound is assigned to an additional bucket (with number
- * count+1). We don't allow "NaN" for any of the float8 inputs, and we
- * don't allow either of the histogram bounds to be +/- infinity.
+ * lower bound is assigned to bucket 0. An operand greater than or equal
+ * to the upper bound is assigned to an additional bucket (with number
+ * count+1). We don't allow the histogram bounds to be NaN or +/- infinity,
+ * but we do allow those values for the operand (taking NaN to be larger
+ * than any other value, as we do in comparisons).
  */
 Datum
 width_bucket_float8(PG_FUNCTION_ARGS)
@@ -4084,12 +4085,11 @@ width_bucket_float8(PG_FUNCTION_ARGS)
 				(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
 				 errmsg("count must be greater than zero")));
 
-	if (isnan(operand) || isnan(bound1) || isnan(bound2))
+	if (isnan(bound1) || isnan(bound2))
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
-				 errmsg("operand, lower bound, and upper bound cannot be NaN")));
+				 errmsg("lower and upper bounds cannot be NaN")));
 
-	/* Note that we allow "operand" to be infinite */
 	if (isinf(bound1) || isinf(bound2))
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
@@ -4097,15 +4097,15 @@ width_bucket_float8(PG_FUNCTION_ARGS)
 
 	if (bound1 < bound2)
 	{
-		if (operand < bound1)
-			result = 0;
-		else if (operand >= bound2)
+		if (isnan(operand) || operand >= bound2)
 		{
 			if (pg_add_s32_overflow(count, 1, &result))
 				ereport(ERROR,
 						(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
 						 errmsg("integer out of range")));
 		}
+		else if (operand < bound1)
+			result = 0;
 		else
 		{
 			if (!isinf(bound2 - bound1))
@@ -4135,7 +4135,7 @@ width_bucket_float8(PG_FUNCTION_ARGS)
 	}
 	else if (bound1 > bound2)
 	{
-		if (operand > bound1)
+		if (isnan(operand) || operand > bound1)
 			result = 0;
 		else if (operand <= bound2)
 		{
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 5bd1e01f7e4..1d05481181d 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -3590,14 +3590,15 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
 					if (matched < 2)
 						ereturn(escontext,,
 								(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
-								 errmsg("invalid input string for \"Y,YYY\"")));
+								 errmsg("invalid value \"%s\" for \"%s\"",
+										s, "Y,YYY")));
 
 					/* years += (millennia * 1000); */
 					if (pg_mul_s32_overflow(millennia, 1000, &millennia) ||
 						pg_add_s32_overflow(years, millennia, &years))
 						ereturn(escontext,,
 								(errcode(ERRCODE_DATETIME_FIELD_OVERFLOW),
-								 errmsg("value for \"Y,YYY\" in source string is out of range")));
+								 errmsg("value for \"%s\" in source string is out of range", "Y,YYY")));
 
 					if (!from_char_set_int(&out->year, years, n, escontext))
 						return;
diff --git a/src/backend/utils/adt/inet_net_pton.c b/src/backend/utils/adt/inet_net_pton.c
index ef2236d9f04..3b0db2a3799 100644
--- a/src/backend/utils/adt/inet_net_pton.c
+++ b/src/backend/utils/adt/inet_net_pton.c
@@ -115,8 +115,7 @@ inet_cidr_pton_ipv4(const char *src, u_char *dst, size_t size)
 		src++;					/* skip x or X. */
 		while ((ch = *src++) != '\0' && isxdigit((unsigned char) ch))
 		{
-			if (isupper((unsigned char) ch))
-				ch = tolower((unsigned char) ch);
+			ch = pg_ascii_tolower((unsigned char) ch);
 			n = strchr(xdigits, ch) - xdigits;
 			assert(n >= 0 && n <= 15);
 			if (dirty == 0)
diff --git a/src/backend/utils/adt/jsonb_util.c b/src/backend/utils/adt/jsonb_util.c
index c8b6c15e059..82b807d067a 100644
--- a/src/backend/utils/adt/jsonb_util.c
+++ b/src/backend/utils/adt/jsonb_util.c
@@ -277,22 +277,16 @@ compareJsonbContainers(JsonbContainer *a, JsonbContainer *b)
 		else
 		{
 			/*
-			 * It's safe to assume that the types differed, and that the va
-			 * and vb values passed were set.
-			 *
-			 * If the two values were of the same container type, then there'd
-			 * have been a chance to observe the variation in the number of
-			 * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're
-			 * either two heterogeneously-typed containers, or a container and
-			 * some scalar type.
-			 *
-			 * We don't have to consider the WJB_END_ARRAY and WJB_END_OBJECT
-			 * cases here, because we would have seen the corresponding
-			 * WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT tokens first, and
-			 * concluded that they don't match.
+			 * It's not possible for one iterator to report end of array or
+			 * object while the other one reports something else, because we
+			 * would have detected a length mismatch when we processed the
+			 * container-start tokens above.  Likewise we can't see WJB_DONE
+			 * from one but not the other.  So we have two different-type
+			 * containers, or a container and some scalar type, or two
+			 * different scalar types.  Sort on the basis of the type code.
 			 */
-			Assert(ra != WJB_END_ARRAY && ra != WJB_END_OBJECT);
-			Assert(rb != WJB_END_ARRAY && rb != WJB_END_OBJECT);
+			Assert(ra != WJB_DONE && ra != WJB_END_ARRAY && ra != WJB_END_OBJECT);
+			Assert(rb != WJB_DONE && rb != WJB_END_ARRAY && rb != WJB_END_OBJECT);
 
 			Assert(va.type != vb.type);
 			Assert(va.type != jbvBinary);
@@ -852,15 +846,20 @@ JsonbIteratorInit(JsonbContainer *container)
  * It is our job to expand the jbvBinary representation without bothering them
  * with it.  However, clients should not take it upon themselves to touch array
  * or Object element/pair buffers, since their element/pair pointers are
- * garbage.  Also, *val will not be set when returning WJB_END_ARRAY or
- * WJB_END_OBJECT, on the assumption that it's only useful to access values
- * when recursing in.
+ * garbage.
+ *
+ * *val is not meaningful when the result is WJB_DONE, WJB_END_ARRAY or
+ * WJB_END_OBJECT.  However, we set val->type = jbvNull in those cases,
+ * so that callers may assume that val->type is always well-defined.
  */
 JsonbIteratorToken
 JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested)
 {
 	if (*it == NULL)
+	{
+		val->type = jbvNull;
 		return WJB_DONE;
+	}
 
 	/*
 	 * When stepping into a nested container, we jump back here to start
@@ -898,6 +897,7 @@ recurse:
 				 * nesting).
 				 */
 				*it = freeAndGetParent(*it);
+				val->type = jbvNull;
 				return WJB_END_ARRAY;
 			}
 
@@ -951,6 +951,7 @@ recurse:
 				 * of nesting).
 				 */
 				*it = freeAndGetParent(*it);
+				val->type = jbvNull;
 				return WJB_END_OBJECT;
 			}
 			else
@@ -995,8 +996,10 @@ recurse:
 				return WJB_VALUE;
 	}
 
-	elog(ERROR, "invalid iterator state");
-	return -1;
+	elog(ERROR, "invalid jsonb iterator state");
+	/* satisfy compilers that don't know that elog(ERROR) doesn't return */
+	val->type = jbvNull;
+	return WJB_DONE;
 }
 
 /*
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 7f4cf614585..4216ac17f43 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -98,7 +98,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale)
 	else if (locale->is_default)
 		return pg_tolower(c);
 	else
-		return tolower_l(c, locale->info.lt);
+		return char_tolower(c, locale);
 }
 
 
@@ -209,7 +209,17 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
 	 * way.
 	 */
 
-	if (pg_database_encoding_max_length() > 1 || (locale->provider == COLLPROVIDER_ICU))
+	if (locale->ctype_is_c ||
+		(char_tolower_enabled(locale) &&
+		 pg_database_encoding_max_length() == 1))
+	{
+		p = VARDATA_ANY(pat);
+		plen = VARSIZE_ANY_EXHDR(pat);
+		s = VARDATA_ANY(str);
+		slen = VARSIZE_ANY_EXHDR(str);
+		return SB_IMatchText(s, slen, p, plen, locale);
+	}
+	else
 	{
 		pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
 													 PointerGetDatum(pat)));
@@ -224,14 +234,6 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
 		else
 			return MB_MatchText(s, slen, p, plen, 0);
 	}
-	else
-	{
-		p = VARDATA_ANY(pat);
-		plen = VARSIZE_ANY_EXHDR(pat);
-		s = VARDATA_ANY(str);
-		slen = VARSIZE_ANY_EXHDR(str);
-		return SB_IMatchText(s, slen, p, plen, locale);
-	}
 }
 
 /*
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index 8fdc677371f..999f23f86d5 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -1495,13 +1495,8 @@ pattern_char_isalpha(char c, bool is_multibyte,
 {
 	if (locale->ctype_is_c)
 		return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
-	else if (is_multibyte && IS_HIGHBIT_SET(c))
-		return true;
-	else if (locale->provider != COLLPROVIDER_LIBC)
-		return IS_HIGHBIT_SET(c) ||
-			(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
 	else
-		return isalpha_l((unsigned char) c, locale->info.lt);
+		return char_is_cased(c, locale);
 }
 
 
diff --git a/src/backend/utils/adt/mcxtfuncs.c b/src/backend/utils/adt/mcxtfuncs.c
index 7ec2c225016..fe6dce9cba3 100644
--- a/src/backend/utils/adt/mcxtfuncs.c
+++ b/src/backend/utils/adt/mcxtfuncs.c
@@ -15,27 +15,30 @@
 
 #include "postgres.h"
 
-#include "access/twophase.h"
-#include "catalog/pg_authid_d.h"
 #include "funcapi.h"
 #include "mb/pg_wchar.h"
-#include "miscadmin.h"
 #include "storage/proc.h"
 #include "storage/procarray.h"
-#include "utils/acl.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/hsearch.h"
-#include "utils/memutils.h"
-#include "utils/wait_event_types.h"
 
 /* ----------
  * The max bytes for showing identifiers of MemoryContext.
  * ----------
  */
 #define MEMORY_CONTEXT_IDENT_DISPLAY_SIZE	1024
-struct MemoryStatsBackendState *memCxtState = NULL;
-struct MemoryStatsCtl *memCxtArea = NULL;
+
+/*
+ * MemoryContextId
+ *		Used for storage of transient identifiers for
+ *		pg_get_backend_memory_contexts.
+ */
+typedef struct MemoryContextId
+{
+	MemoryContext context;
+	int			context_id;
+} MemoryContextId;
 
 /*
  * int_list_to_array
@@ -86,7 +89,7 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore,
 	 */
 	for (MemoryContext cur = context; cur != NULL; cur = cur->parent)
 	{
-		MemoryStatsContextId *entry;
+		MemoryContextId *entry;
 		bool		found;
 
 		entry = hash_search(context_id_lookup, &cur, HASH_FIND, &found);
@@ -140,51 +143,36 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore,
 	else
 		nulls[1] = true;
 
-	type = ContextTypeToString(context->type);
-
-	values[2] = CStringGetTextDatum(type);
-	values[3] = Int32GetDatum(list_length(path));	/* level */
-	values[4] = int_list_to_array(path);
-	values[5] = Int64GetDatum(stat.totalspace);
-	values[6] = Int64GetDatum(stat.nblocks);
-	values[7] = Int64GetDatum(stat.freespace);
-	values[8] = Int64GetDatum(stat.freechunks);
-	values[9] = Int64GetDatum(stat.totalspace - stat.freespace);
-
-	tuplestore_putvalues(tupstore, tupdesc, values, nulls);
-	list_free(path);
-}
-
-/*
- * ContextTypeToString
- *		Returns a textual representation of a context type
- *
- * This should cover the same types as MemoryContextIsValid.
- */
-const char *
-ContextTypeToString(NodeTag type)
-{
-	const char *context_type;
-
-	switch (type)
+	switch (context->type)
 	{
 		case T_AllocSetContext:
-			context_type = "AllocSet";
+			type = "AllocSet";
 			break;
 		case T_GenerationContext:
-			context_type = "Generation";
+			type = "Generation";
 			break;
 		case T_SlabContext:
-			context_type = "Slab";
+			type = "Slab";
 			break;
 		case T_BumpContext:
-			context_type = "Bump";
+			type = "Bump";
 			break;
 		default:
-			context_type = "???";
+			type = "???";
 			break;
 	}
-	return context_type;
+
+	values[2] = CStringGetTextDatum(type);
+	values[3] = Int32GetDatum(list_length(path));	/* level */
+	values[4] = int_list_to_array(path);
+	values[5] = Int64GetDatum(stat.totalspace);
+	values[6] = Int64GetDatum(stat.nblocks);
+	values[7] = Int64GetDatum(stat.freespace);
+	values[8] = Int64GetDatum(stat.freechunks);
+	values[9] = Int64GetDatum(stat.totalspace - stat.freespace);
+
+	tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+	list_free(path);
 }
 
 /*
@@ -201,7 +189,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS)
 	HTAB	   *context_id_lookup;
 
 	ctl.keysize = sizeof(MemoryContext);
-	ctl.entrysize = sizeof(MemoryStatsContextId);
+	ctl.entrysize = sizeof(MemoryContextId);
 	ctl.hcxt = CurrentMemoryContext;
 
 	context_id_lookup = hash_create("pg_get_backend_memory_contexts",
@@ -228,7 +216,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS)
 
 	foreach_ptr(MemoryContextData, cur, contexts)
 	{
-		MemoryStatsContextId *entry;
+		MemoryContextId *entry;
 		bool		found;
 
 		/*
@@ -236,8 +224,8 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS)
 		 * PutMemoryContextsStatsTupleStore needs this to populate the "path"
 		 * column with the parent context_ids.
 		 */
-		entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &cur,
-													 HASH_ENTER, &found);
+		entry = (MemoryContextId *) hash_search(context_id_lookup, &cur,
+												HASH_ENTER, &found);
 		entry->context_id = context_id++;
 		Assert(!found);
 
@@ -317,349 +305,3 @@ pg_log_backend_memory_contexts(PG_FUNCTION_ARGS)
 
 	PG_RETURN_BOOL(true);
 }
-
-/*
- * pg_get_process_memory_contexts
- *		Signal a backend or an auxiliary process to send its memory contexts,
- *		wait for the results and display them.
- *
- * By default, only superusers or users with ROLE_PG_READ_ALL_STATS are allowed
- * to signal a process to return the memory contexts. This is because allowing
- * any users to issue this request at an unbounded rate would cause lots of
- * requests to be sent, which can lead to denial of service. Additional roles
- * can be permitted with GRANT.
- *
- * On receipt of this signal, a backend or an auxiliary process sets the flag
- * in the signal handler, which causes the next CHECK_FOR_INTERRUPTS()
- * or process-specific interrupt handler to copy the memory context details
- * to a dynamic shared memory space.
- *
- * We have defined a limit on DSA memory that could be allocated per process -
- * if the process has more memory contexts than what can fit in the allocated
- * size, the excess contexts are summarized and represented as cumulative total
- * at the end of the buffer.
- *
- * After sending the signal, wait on a condition variable. The publishing
- * backend, after copying the data to shared memory, sends signal on that
- * condition variable. There is one condition variable per publishing backend.
- * Once the condition variable is signalled, check if the latest memory context
- * information is available and display.
- *
- * If the publishing backend does not respond before the condition variable
- * times out, which is set to MEMSTATS_WAIT_TIMEOUT, retry given that there is
- * time left within the timeout specified by the user, before giving up and
- * returning previously published statistics, if any. If no previous statistics
- * exist, return NULL.
- */
-#define MEMSTATS_WAIT_TIMEOUT 100
-Datum
-pg_get_process_memory_contexts(PG_FUNCTION_ARGS)
-{
-	int			pid = PG_GETARG_INT32(0);
-	bool		summary = PG_GETARG_BOOL(1);
-	double		timeout = PG_GETARG_FLOAT8(2);
-	PGPROC	   *proc;
-	ProcNumber	procNumber = INVALID_PROC_NUMBER;
-	bool		proc_is_aux = false;
-	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
-	MemoryStatsEntry *memcxt_info;
-	TimestampTz start_timestamp;
-
-	/*
-	 * See if the process with given pid is a backend or an auxiliary process
-	 * and remember the type for when we requery the process later.
-	 */
-	proc = BackendPidGetProc(pid);
-	if (proc == NULL)
-	{
-		proc = AuxiliaryPidGetProc(pid);
-		proc_is_aux = true;
-	}
-
-	/*
-	 * BackendPidGetProc() and AuxiliaryPidGetProc() return NULL if the pid
-	 * isn't valid; this is however not a problem and leave with a WARNING.
-	 * See comment in pg_log_backend_memory_contexts for a discussion on this.
-	 */
-	if (proc == NULL)
-	{
-		/*
-		 * This is just a warning so a loop-through-resultset will not abort
-		 * if one backend terminated on its own during the run.
-		 */
-		ereport(WARNING,
-				errmsg("PID %d is not a PostgreSQL server process", pid));
-		PG_RETURN_NULL();
-	}
-
-	InitMaterializedSRF(fcinfo, 0);
-
-	procNumber = GetNumberFromPGProc(proc);
-
-	LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE);
-	memCxtState[procNumber].summary = summary;
-	LWLockRelease(&memCxtState[procNumber].lw_lock);
-
-	start_timestamp = GetCurrentTimestamp();
-
-	/*
-	 * Send a signal to a PostgreSQL process, informing it we want it to
-	 * produce information about its memory contexts.
-	 */
-	if (SendProcSignal(pid, PROCSIG_GET_MEMORY_CONTEXT, procNumber) < 0)
-	{
-		ereport(WARNING,
-				errmsg("could not send signal to process %d: %m", pid));
-		PG_RETURN_NULL();
-	}
-
-	/*
-	 * Even if the proc has published statistics, the may not be due to the
-	 * current request, but previously published stats.  Check if the stats
-	 * are updated by comparing the timestamp, if the stats are newer than our
-	 * previously recorded timestamp from before sending the procsignal, they
-	 * must by definition be updated. Wait for the timeout specified by the
-	 * user, following which display old statistics if available or return
-	 * NULL.
-	 */
-	while (1)
-	{
-		long		msecs;
-
-		/*
-		 * We expect to come out of sleep when the requested process has
-		 * finished publishing the statistics, verified using the valid DSA
-		 * pointer.
-		 *
-		 * Make sure that the information belongs to pid we requested
-		 * information for, Otherwise loop back and wait for the server
-		 * process to finish publishing statistics.
-		 */
-		LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE);
-
-		/*
-		 * Note in procnumber.h file says that a procNumber can be re-used for
-		 * a different backend immediately after a backend exits. In case an
-		 * old process' data was there and not updated by the current process
-		 * in the slot identified by the procNumber, the pid of the requested
-		 * process and the proc_id might not match.
-		 */
-		if (memCxtState[procNumber].proc_id == pid)
-		{
-			/*
-			 * Break if the latest stats have been read, indicated by
-			 * statistics timestamp being newer than the current request
-			 * timestamp.
-			 */
-			msecs = TimestampDifferenceMilliseconds(start_timestamp,
-													memCxtState[procNumber].stats_timestamp);
-
-			if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer)
-				&& msecs > 0)
-				break;
-		}
-		LWLockRelease(&memCxtState[procNumber].lw_lock);
-
-		/*
-		 * Recheck the state of the backend before sleeping on the condition
-		 * variable to ensure the process is still alive.  Only check the
-		 * relevant process type based on the earlier PID check.
-		 */
-		if (proc_is_aux)
-			proc = AuxiliaryPidGetProc(pid);
-		else
-			proc = BackendPidGetProc(pid);
-
-		/*
-		 * The process ending during memory context processing is not an
-		 * error.
-		 */
-		if (proc == NULL)
-		{
-			ereport(WARNING,
-					errmsg("PID %d is no longer a PostgreSQL server process",
-						   pid));
-			PG_RETURN_NULL();
-		}
-
-		msecs = TimestampDifferenceMilliseconds(start_timestamp, GetCurrentTimestamp());
-
-		/*
-		 * If we haven't already exceeded the timeout value, sleep for the
-		 * remainder of the timeout on the condition variable.
-		 */
-		if (msecs > 0 && msecs < (timeout * 1000))
-		{
-			/*
-			 * Wait for the timeout as defined by the user. If no updated
-			 * statistics are available within the allowed time then display
-			 * previously published statistics if there are any. If no
-			 * previous statistics are available then return NULL.  The timer
-			 * is defined in milliseconds since that's what the condition
-			 * variable sleep uses.
-			 */
-			if (ConditionVariableTimedSleep(&memCxtState[procNumber].memcxt_cv,
-											((timeout * 1000) - msecs), WAIT_EVENT_MEM_CXT_PUBLISH))
-			{
-				LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE);
-				/* Displaying previously published statistics if available */
-				if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer))
-					break;
-				else
-				{
-					LWLockRelease(&memCxtState[procNumber].lw_lock);
-					PG_RETURN_NULL();
-				}
-			}
-		}
-		else
-		{
-			LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE);
-			/* Displaying previously published statistics if available */
-			if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer))
-				break;
-			else
-			{
-				LWLockRelease(&memCxtState[procNumber].lw_lock);
-				PG_RETURN_NULL();
-			}
-		}
-	}
-
-	/*
-	 * We should only reach here with a valid DSA handle, either containing
-	 * updated statistics or previously published statistics (identified by
-	 * the timestamp.
-	 */
-	Assert(memCxtArea->memstats_dsa_handle != DSA_HANDLE_INVALID);
-	/* Attach to the dsa area if we have not already done so */
-	if (MemoryStatsDsaArea == NULL)
-	{
-		MemoryContext oldcontext = CurrentMemoryContext;
-
-		MemoryContextSwitchTo(TopMemoryContext);
-		MemoryStatsDsaArea = dsa_attach(memCxtArea->memstats_dsa_handle);
-		MemoryContextSwitchTo(oldcontext);
-		dsa_pin_mapping(MemoryStatsDsaArea);
-	}
-
-	/*
-	 * Backend has finished publishing the stats, project them.
-	 */
-	memcxt_info = (MemoryStatsEntry *)
-		dsa_get_address(MemoryStatsDsaArea, memCxtState[procNumber].memstats_dsa_pointer);
-
-#define PG_GET_PROCESS_MEMORY_CONTEXTS_COLS	12
-	for (int i = 0; i < memCxtState[procNumber].total_stats; i++)
-	{
-		ArrayType  *path_array;
-		int			path_length;
-		Datum		values[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS];
-		bool		nulls[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS];
-		char	   *name;
-		char	   *ident;
-		Datum	   *path_datum = NULL;
-		int		   *path_int = NULL;
-
-		memset(values, 0, sizeof(values));
-		memset(nulls, 0, sizeof(nulls));
-
-		if (DsaPointerIsValid(memcxt_info[i].name))
-		{
-			name = (char *) dsa_get_address(MemoryStatsDsaArea, memcxt_info[i].name);
-			values[0] = CStringGetTextDatum(name);
-		}
-		else
-			nulls[0] = true;
-
-		if (DsaPointerIsValid(memcxt_info[i].ident))
-		{
-			ident = (char *) dsa_get_address(MemoryStatsDsaArea, memcxt_info[i].ident);
-			values[1] = CStringGetTextDatum(ident);
-		}
-		else
-			nulls[1] = true;
-
-		values[2] = CStringGetTextDatum(ContextTypeToString(memcxt_info[i].type));
-
-		path_length = memcxt_info[i].path_length;
-		path_datum = (Datum *) palloc(path_length * sizeof(Datum));
-		if (DsaPointerIsValid(memcxt_info[i].path))
-		{
-			path_int = (int *) dsa_get_address(MemoryStatsDsaArea, memcxt_info[i].path);
-			for (int j = 0; j < path_length; j++)
-				path_datum[j] = Int32GetDatum(path_int[j]);
-			path_array = construct_array_builtin(path_datum, path_length, INT4OID);
-			values[3] = PointerGetDatum(path_array);
-		}
-		else
-			nulls[3] = true;
-
-		values[4] = Int32GetDatum(memcxt_info[i].levels);
-		values[5] = Int64GetDatum(memcxt_info[i].totalspace);
-		values[6] = Int64GetDatum(memcxt_info[i].nblocks);
-		values[7] = Int64GetDatum(memcxt_info[i].freespace);
-		values[8] = Int64GetDatum(memcxt_info[i].freechunks);
-		values[9] = Int64GetDatum(memcxt_info[i].totalspace -
-								  memcxt_info[i].freespace);
-		values[10] = Int32GetDatum(memcxt_info[i].num_agg_stats);
-		values[11] = TimestampTzGetDatum(memCxtState[procNumber].stats_timestamp);
-
-		tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
-							 values, nulls);
-	}
-	LWLockRelease(&memCxtState[procNumber].lw_lock);
-
-	ConditionVariableCancelSleep();
-
-	PG_RETURN_NULL();
-}
-
-Size
-MemoryContextReportingShmemSize(void)
-{
-	Size		sz = 0;
-	Size		TotalProcs = 0;
-
-	TotalProcs = add_size(TotalProcs, NUM_AUXILIARY_PROCS);
-	TotalProcs = add_size(TotalProcs, MaxBackends);
-	sz = add_size(sz, mul_size(TotalProcs, sizeof(MemoryStatsBackendState)));
-
-	sz = add_size(sz, sizeof(MemoryStatsCtl));
-
-	return sz;
-}
-
-/*
- * Initialize shared memory for displaying memory context statistics
- */
-void
-MemoryContextReportingShmemInit(void)
-{
-	bool		found;
-
-	memCxtArea = (MemoryStatsCtl *)
-		ShmemInitStruct("MemoryStatsCtl",
-						sizeof(MemoryStatsCtl), &found);
-
-	if (!found)
-	{
-		LWLockInitialize(&memCxtArea->lw_lock, LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE);
-		memCxtArea->memstats_dsa_handle = DSA_HANDLE_INVALID;
-	}
-
-	memCxtState = (MemoryStatsBackendState *)
-		ShmemInitStruct("MemoryStatsBackendState",
-						((MaxBackends + NUM_AUXILIARY_PROCS) * sizeof(MemoryStatsBackendState)),
-						&found);
-
-	if (found)
-		return;
-
-	for (int i = 0; i < (MaxBackends + NUM_AUXILIARY_PROCS); i++)
-	{
-		ConditionVariableInit(&memCxtState[i].memcxt_cv);
-		LWLockInitialize(&memCxtState[i].lw_lock, LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC);
-		memCxtState[i].memstats_dsa_pointer = InvalidDsaPointer;
-	}
-}
diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build
index 244f48f4fd7..ed9bbd7b926 100644
--- a/src/backend/utils/adt/meson.build
+++ b/src/backend/utils/adt/meson.build
@@ -12,6 +12,7 @@ backend_sources += files(
   'arrayutils.c',
   'ascii.c',
   'bool.c',
+  'bytea.c',
   'cash.c',
   'char.c',
   'cryptohashfuncs.c',
diff --git a/src/backend/utils/adt/network.c b/src/backend/utils/adt/network.c
index f03fcc1147b..9fd211b2d45 100644
--- a/src/backend/utils/adt/network.c
+++ b/src/backend/utils/adt/network.c
@@ -12,8 +12,6 @@
 #include <netinet/in.h>
 #include <arpa/inet.h>
 
-#include "access/stratnum.h"
-#include "catalog/pg_opfamily.h"
 #include "catalog/pg_type.h"
 #include "common/hashfn.h"
 #include "common/ip.h"
diff --git a/src/backend/utils/adt/network_spgist.c b/src/backend/utils/adt/network_spgist.c
index a84747d9275..602276a35c3 100644
--- a/src/backend/utils/adt/network_spgist.c
+++ b/src/backend/utils/adt/network_spgist.c
@@ -37,7 +37,6 @@
 #include "catalog/pg_type.h"
 #include "utils/fmgrprotos.h"
 #include "utils/inet.h"
-#include "varatt.h"
 
 
 static int	inet_spg_node_number(const inet *val, int commonbits);
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index 40dcbc7b671..c9233565d57 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -1958,9 +1958,11 @@ generate_series_numeric_support(PG_FUNCTION_ARGS)
  * in the histogram. width_bucket() returns an integer indicating the
  * bucket number that 'operand' belongs to in an equiwidth histogram
  * with the specified characteristics. An operand smaller than the
- * lower bound is assigned to bucket 0. An operand greater than the
- * upper bound is assigned to an additional bucket (with number
- * count+1). We don't allow "NaN" for any of the numeric arguments.
+ * lower bound is assigned to bucket 0. An operand greater than or equal
+ * to the upper bound is assigned to an additional bucket (with number
+ * count+1). We don't allow the histogram bounds to be NaN or +/- infinity,
+ * but we do allow those values for the operand (taking NaN to be larger
+ * than any other value, as we do in comparisons).
  */
 Datum
 width_bucket_numeric(PG_FUNCTION_ARGS)
@@ -1978,17 +1980,13 @@ width_bucket_numeric(PG_FUNCTION_ARGS)
 				(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
 				 errmsg("count must be greater than zero")));
 
-	if (NUMERIC_IS_SPECIAL(operand) ||
-		NUMERIC_IS_SPECIAL(bound1) ||
-		NUMERIC_IS_SPECIAL(bound2))
+	if (NUMERIC_IS_SPECIAL(bound1) || NUMERIC_IS_SPECIAL(bound2))
 	{
-		if (NUMERIC_IS_NAN(operand) ||
-			NUMERIC_IS_NAN(bound1) ||
-			NUMERIC_IS_NAN(bound2))
+		if (NUMERIC_IS_NAN(bound1) || NUMERIC_IS_NAN(bound2))
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
-					 errmsg("operand, lower bound, and upper bound cannot be NaN")));
-		/* We allow "operand" to be infinite; cmp_numerics will cope */
+					 errmsg("lower and upper bounds cannot be NaN")));
+
 		if (NUMERIC_IS_INF(bound1) || NUMERIC_IS_INF(bound2))
 			ereport(ERROR,
 					(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index a858f27cadc..97c2ac1faf9 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -41,11 +41,11 @@
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "utils/builtins.h"
-#include "utils/formatting.h"
 #include "utils/guc_hooks.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
 #include "utils/pg_locale.h"
+#include "utils/relcache.h"
 #include "utils/syscache.h"
 
 #ifdef WIN32
@@ -79,31 +79,6 @@ extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
 extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
 extern char *get_collation_actual_version_libc(const char *collcollate);
 
-extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
-							   ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
-							   ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
-							   ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_builtin(char *dst, size_t dstsize, const char *src,
-							  ssize_t srclen, pg_locale_t locale);
-
-extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
-						   ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
-						   ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
-						   ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_icu(char *dst, size_t dstsize, const char *src,
-						  ssize_t srclen, pg_locale_t locale);
-
-extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
-							ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
-							ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
-							ssize_t srclen, pg_locale_t locale);
-
 /* GUC settings */
 char	   *locale_messages;
 char	   *locale_monetary;
@@ -1092,6 +1067,9 @@ create_pg_locale(Oid collid, MemoryContext context)
 	Assert((result->collate_is_c && result->collate == NULL) ||
 		   (!result->collate_is_c && result->collate != NULL));
 
+	Assert((result->ctype_is_c && result->ctype == NULL) ||
+		   (!result->ctype_is_c && result->ctype != NULL));
+
 	datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
 							&isnull);
 	if (!isnull)
@@ -1256,77 +1234,31 @@ size_t
 pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 			pg_locale_t locale)
 {
-	if (locale->provider == COLLPROVIDER_BUILTIN)
-		return strlower_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
-		return strlower_icu(dst, dstsize, src, srclen, locale);
-#endif
-	else if (locale->provider == COLLPROVIDER_LIBC)
-		return strlower_libc(dst, dstsize, src, srclen, locale);
-	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return 0;					/* keep compiler quiet */
+	return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
 }
 
 size_t
 pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 			pg_locale_t locale)
 {
-	if (locale->provider == COLLPROVIDER_BUILTIN)
-		return strtitle_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
-		return strtitle_icu(dst, dstsize, src, srclen, locale);
-#endif
-	else if (locale->provider == COLLPROVIDER_LIBC)
-		return strtitle_libc(dst, dstsize, src, srclen, locale);
-	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return 0;					/* keep compiler quiet */
+	return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
 }
 
 size_t
 pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 			pg_locale_t locale)
 {
-	if (locale->provider == COLLPROVIDER_BUILTIN)
-		return strupper_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
-		return strupper_icu(dst, dstsize, src, srclen, locale);
-#endif
-	else if (locale->provider == COLLPROVIDER_LIBC)
-		return strupper_libc(dst, dstsize, src, srclen, locale);
-	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return 0;					/* keep compiler quiet */
+	return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
 }
 
 size_t
 pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 		   pg_locale_t locale)
 {
-	if (locale->provider == COLLPROVIDER_BUILTIN)
-		return strfold_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
-	else if (locale->provider == COLLPROVIDER_ICU)
-		return strfold_icu(dst, dstsize, src, srclen, locale);
-#endif
-	/* for libc, just use strlower */
-	else if (locale->provider == COLLPROVIDER_LIBC)
-		return strlower_libc(dst, dstsize, src, srclen, locale);
+	if (locale->ctype->strfold)
+		return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
 	else
-		/* shouldn't happen */
-		PGLOCALE_SUPPORT_ERROR(locale->provider);
-
-	return 0;					/* keep compiler quiet */
+		return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
 }
 
 /*
@@ -1464,6 +1396,41 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
 }
 
 /*
+ * char_is_cased()
+ *
+ * Fuzzy test of whether the given char is case-varying or not. The argument
+ * is a single byte, so in a multibyte encoding, just assume any non-ASCII
+ * char is case-varying.
+ */
+bool
+char_is_cased(char ch, pg_locale_t locale)
+{
+	return locale->ctype->char_is_cased(ch, locale);
+}
+
+/*
+ * char_tolower_enabled()
+ *
+ * Does the provider support char_tolower()?
+ */
+bool
+char_tolower_enabled(pg_locale_t locale)
+{
+	return (locale->ctype->char_tolower != NULL);
+}
+
+/*
+ * char_tolower()
+ *
+ * Convert char (single-byte encoding) to lowercase.
+ */
+char
+char_tolower(unsigned char ch, pg_locale_t locale)
+{
+	return locale->ctype->char_tolower(ch, locale);
+}
+
+/*
  * Return required encoding ID for the given locale, or -1 if any encoding is
  * valid for the locale.
  */
diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c
index f51768830cd..0c9fbdb40f2 100644
--- a/src/backend/utils/adt/pg_locale_builtin.c
+++ b/src/backend/utils/adt/pg_locale_builtin.c
@@ -18,22 +18,12 @@
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "utils/builtins.h"
-#include "utils/memutils.h"
 #include "utils/pg_locale.h"
 #include "utils/syscache.h"
 
 extern pg_locale_t create_pg_locale_builtin(Oid collid,
 											MemoryContext context);
 extern char *get_collation_actual_version_builtin(const char *collcollate);
-extern size_t strlower_builtin(char *dest, size_t destsize, const char *src,
-							   ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_builtin(char *dest, size_t destsize, const char *src,
-							   ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_builtin(char *dest, size_t destsize, const char *src,
-							   ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_builtin(char *dest, size_t destsize, const char *src,
-							  ssize_t srclen, pg_locale_t locale);
-
 
 struct WordBoundaryState
 {
@@ -77,7 +67,7 @@ initcap_wbnext(void *state)
 	return wbstate->len;
 }
 
-size_t
+static size_t
 strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				 pg_locale_t locale)
 {
@@ -85,7 +75,7 @@ strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 							locale->info.builtin.casemap_full);
 }
 
-size_t
+static size_t
 strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				 pg_locale_t locale)
 {
@@ -103,7 +93,7 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 							initcap_wbnext, &wbstate);
 }
 
-size_t
+static size_t
 strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				 pg_locale_t locale)
 {
@@ -111,7 +101,7 @@ strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 							locale->info.builtin.casemap_full);
 }
 
-size_t
+static size_t
 strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				pg_locale_t locale)
 {
@@ -119,6 +109,98 @@ strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 						   locale->info.builtin.casemap_full);
 }
 
+static bool
+wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_isdigit(wc, !locale->info.builtin.casemap_full);
+}
+
+static bool
+wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_isalpha(wc);
+}
+
+static bool
+wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_isalnum(wc, !locale->info.builtin.casemap_full);
+}
+
+static bool
+wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_isupper(wc);
+}
+
+static bool
+wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_islower(wc);
+}
+
+static bool
+wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_isgraph(wc);
+}
+
+static bool
+wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_isprint(wc);
+}
+
+static bool
+wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_ispunct(wc, !locale->info.builtin.casemap_full);
+}
+
+static bool
+wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return pg_u_isspace(wc);
+}
+
+static bool
+char_is_cased_builtin(char ch, pg_locale_t locale)
+{
+	return IS_HIGHBIT_SET(ch) ||
+		(ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
+}
+
+static pg_wchar
+wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return unicode_uppercase_simple(wc);
+}
+
+static pg_wchar
+wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
+{
+	return unicode_lowercase_simple(wc);
+}
+
+static const struct ctype_methods ctype_methods_builtin = {
+	.strlower = strlower_builtin,
+	.strtitle = strtitle_builtin,
+	.strupper = strupper_builtin,
+	.strfold = strfold_builtin,
+	.wc_isdigit = wc_isdigit_builtin,
+	.wc_isalpha = wc_isalpha_builtin,
+	.wc_isalnum = wc_isalnum_builtin,
+	.wc_isupper = wc_isupper_builtin,
+	.wc_islower = wc_islower_builtin,
+	.wc_isgraph = wc_isgraph_builtin,
+	.wc_isprint = wc_isprint_builtin,
+	.wc_ispunct = wc_ispunct_builtin,
+	.wc_isspace = wc_isspace_builtin,
+	.char_is_cased = char_is_cased_builtin,
+	.wc_tolower = wc_tolower_builtin,
+	.wc_toupper = wc_toupper_builtin,
+};
+
 pg_locale_t
 create_pg_locale_builtin(Oid collid, MemoryContext context)
 {
@@ -158,10 +240,11 @@ create_pg_locale_builtin(Oid collid, MemoryContext context)
 
 	result->info.builtin.locale = MemoryContextStrdup(context, locstr);
 	result->info.builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
-	result->provider = COLLPROVIDER_BUILTIN;
 	result->deterministic = true;
 	result->collate_is_c = true;
 	result->ctype_is_c = (strcmp(locstr, "C") == 0);
+	if (!result->ctype_is_c)
+		result->ctype = &ctype_methods_builtin;
 
 	return result;
 }
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index a32c32a0744..96741e08269 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -48,19 +48,22 @@
 #define		TEXTBUFLEN			1024
 
 extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
-extern size_t strlower_icu(char *dest, size_t destsize, const char *src,
-						   ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_icu(char *dest, size_t destsize, const char *src,
-						   ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_icu(char *dest, size_t destsize, const char *src,
-						   ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_icu(char *dest, size_t destsize, const char *src,
-						  ssize_t srclen, pg_locale_t locale);
 
 #ifdef USE_ICU
 
 extern UCollator *pg_ucol_open(const char *loc_str);
 
+static size_t strlower_icu(char *dest, size_t destsize, const char *src,
+						   ssize_t srclen, pg_locale_t locale);
+static size_t strtitle_icu(char *dest, size_t destsize, const char *src,
+						   ssize_t srclen, pg_locale_t locale);
+static size_t strupper_icu(char *dest, size_t destsize, const char *src,
+						   ssize_t srclen, pg_locale_t locale);
+static size_t strfold_icu(char *dest, size_t destsize, const char *src,
+						  ssize_t srclen, pg_locale_t locale);
+static int	strncoll_icu(const char *arg1, ssize_t len1,
+						 const char *arg2, ssize_t len2,
+						 pg_locale_t locale);
 static size_t strnxfrm_icu(char *dest, size_t destsize,
 						   const char *src, ssize_t srclen,
 						   pg_locale_t locale);
@@ -118,6 +121,25 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
 									 const char *locale,
 									 UErrorCode *pErrorCode);
 
+static bool
+char_is_cased_icu(char ch, pg_locale_t locale)
+{
+	return IS_HIGHBIT_SET(ch) ||
+		(ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
+}
+
+static pg_wchar
+toupper_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_toupper(wc);
+}
+
+static pg_wchar
+tolower_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_tolower(wc);
+}
+
 static const struct collate_methods collate_methods_icu = {
 	.strncoll = strncoll_icu,
 	.strnxfrm = strnxfrm_icu,
@@ -136,6 +158,78 @@ static const struct collate_methods collate_methods_icu_utf8 = {
 	.strxfrm_is_safe = true,
 };
 
+static bool
+wc_isdigit_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_isdigit(wc);
+}
+
+static bool
+wc_isalpha_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_isalpha(wc);
+}
+
+static bool
+wc_isalnum_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_isalnum(wc);
+}
+
+static bool
+wc_isupper_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_isupper(wc);
+}
+
+static bool
+wc_islower_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_islower(wc);
+}
+
+static bool
+wc_isgraph_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_isgraph(wc);
+}
+
+static bool
+wc_isprint_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_isprint(wc);
+}
+
+static bool
+wc_ispunct_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_ispunct(wc);
+}
+
+static bool
+wc_isspace_icu(pg_wchar wc, pg_locale_t locale)
+{
+	return u_isspace(wc);
+}
+
+static const struct ctype_methods ctype_methods_icu = {
+	.strlower = strlower_icu,
+	.strtitle = strtitle_icu,
+	.strupper = strupper_icu,
+	.strfold = strfold_icu,
+	.wc_isdigit = wc_isdigit_icu,
+	.wc_isalpha = wc_isalpha_icu,
+	.wc_isalnum = wc_isalnum_icu,
+	.wc_isupper = wc_isupper_icu,
+	.wc_islower = wc_islower_icu,
+	.wc_isgraph = wc_isgraph_icu,
+	.wc_isprint = wc_isprint_icu,
+	.wc_ispunct = wc_ispunct_icu,
+	.wc_isspace = wc_isspace_icu,
+	.char_is_cased = char_is_cased_icu,
+	.wc_toupper = toupper_icu,
+	.wc_tolower = tolower_icu,
+};
 #endif
 
 pg_locale_t
@@ -198,7 +292,6 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
 	result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
 	result->info.icu.locale = MemoryContextStrdup(context, iculocstr);
 	result->info.icu.ucol = collator;
-	result->provider = COLLPROVIDER_ICU;
 	result->deterministic = deterministic;
 	result->collate_is_c = false;
 	result->ctype_is_c = false;
@@ -206,6 +299,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
 		result->collate = &collate_methods_icu_utf8;
 	else
 		result->collate = &collate_methods_icu;
+	result->ctype = &ctype_methods_icu;
 
 	return result;
 #else
@@ -379,7 +473,7 @@ make_icu_collator(const char *iculocstr, const char *icurules)
 	}
 }
 
-size_t
+static size_t
 strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 			 pg_locale_t locale)
 {
@@ -399,7 +493,7 @@ strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	return result_len;
 }
 
-size_t
+static size_t
 strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 			 pg_locale_t locale)
 {
@@ -419,7 +513,7 @@ strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	return result_len;
 }
 
-size_t
+static size_t
 strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 			 pg_locale_t locale)
 {
@@ -439,7 +533,7 @@ strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	return result_len;
 }
 
-size_t
+static size_t
 strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 			pg_locale_t locale)
 {
@@ -474,8 +568,6 @@ strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2
 	int			result;
 	UErrorCode	status;
 
-	Assert(locale->provider == COLLPROVIDER_ICU);
-
 	Assert(GetDatabaseEncoding() == PG_UTF8);
 
 	status = U_ZERO_ERROR;
@@ -503,8 +595,6 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	size_t		uchar_bsize;
 	Size		result_bsize;
 
-	Assert(locale->provider == COLLPROVIDER_ICU);
-
 	init_icu_converter();
 
 	ulen = uchar_length(icu_converter, src, srclen);
@@ -549,8 +639,6 @@ strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
 	uint32_t	state[2];
 	UErrorCode	status;
 
-	Assert(locale->provider == COLLPROVIDER_ICU);
-
 	Assert(GetDatabaseEncoding() == PG_UTF8);
 
 	uiter_setUTF8(&iter, src, srclen);
@@ -749,8 +837,6 @@ strncoll_icu(const char *arg1, ssize_t len1,
 			   *uchar2;
 	int			result;
 
-	Assert(locale->provider == COLLPROVIDER_ICU);
-
 	/* if encoding is UTF8, use more efficient strncoll_icu_utf8 */
 #ifdef HAVE_UCOL_STRCOLLUTF8
 	Assert(GetDatabaseEncoding() != PG_UTF8);
@@ -799,8 +885,6 @@ strnxfrm_prefix_icu(char *dest, size_t destsize,
 	size_t		uchar_bsize;
 	Size		result_bsize;
 
-	Assert(locale->provider == COLLPROVIDER_ICU);
-
 	/* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */
 	Assert(GetDatabaseEncoding() != PG_UTF8);
 
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 199857e22db..8d88b53c375 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -34,6 +34,46 @@
 #endif
 
 /*
+ * For the libc provider, to provide as much functionality as possible on a
+ * variety of platforms without going so far as to implement everything from
+ * scratch, we use several implementation strategies depending on the
+ * situation:
+ *
+ * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
+ * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
+ * collations don't give a fig about multibyte characters.
+ *
+ * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
+ * This assumes that every platform uses Unicode codepoints directly
+ * as the wchar_t representation of Unicode.  (XXX: ICU makes this assumption
+ * even for non-UTF8 encodings, which may be a problem.)  On some platforms
+ * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
+ *
+ * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
+ * values up to 255, and punt for values above that.  This is 100% correct
+ * only in single-byte encodings such as LATINn.  However, non-Unicode
+ * multibyte encodings are mostly Far Eastern character sets for which the
+ * properties being tested here aren't very relevant for higher code values
+ * anyway.  The difficulty with using the <wctype.h> functions with
+ * non-Unicode multibyte encodings is that we can have no certainty that
+ * the platform's wchar_t representation matches what we do in pg_wchar
+ * conversions.
+ *
+ * As a special case, in the "default" collation, (2) and (3) force ASCII
+ * letters to follow ASCII upcase/downcase rules, while in a non-default
+ * collation we just let the library functions do what they will.  The case
+ * where this matters is treatment of I/i in Turkish, and the behavior is
+ * meant to match the upper()/lower() SQL functions.
+ *
+ * We store the active collation setting in static variables.  In principle
+ * it could be passed down to here via the regex library's "struct vars" data
+ * structure; but that would require somewhat invasive changes in the regex
+ * library, and right now there's no real benefit to be gained from that.
+ *
+ * NB: the coding here assumes pg_wchar is an unsigned type.
+ */
+
+/*
  * Size of stack buffer to use for string transformations, used to avoid heap
  * allocations in typical cases. This should be large enough that most strings
  * will fit, but small enough that we feel comfortable putting it on the
@@ -43,13 +83,6 @@
 
 extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
 
-extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
-							ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
-							ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
-							ssize_t srclen, pg_locale_t locale);
-
 static int	strncoll_libc(const char *arg1, ssize_t len1,
 						  const char *arg2, ssize_t len2,
 						  pg_locale_t locale);
@@ -85,6 +118,251 @@ static size_t strupper_libc_mb(char *dest, size_t destsize,
 							   const char *src, ssize_t srclen,
 							   pg_locale_t locale);
 
+static bool
+wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return isdigit_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return isalpha_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return isalnum_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return isupper_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return islower_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return isgraph_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return isprint_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return ispunct_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	return isspace_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswdigit_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswalpha_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswalnum_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswupper_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswlower_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswgraph_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswprint_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswpunct_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	return iswspace_l((wint_t) wc, locale->info.lt);
+}
+
+static char
+char_tolower_libc(unsigned char ch, pg_locale_t locale)
+{
+	Assert(pg_database_encoding_max_length() == 1);
+	return tolower_l(ch, locale->info.lt);
+}
+
+static bool
+char_is_cased_libc(char ch, pg_locale_t locale)
+{
+	bool		is_multibyte = pg_database_encoding_max_length() > 1;
+
+	if (is_multibyte && IS_HIGHBIT_SET(ch))
+		return true;
+	else
+		return isalpha_l((unsigned char) ch, locale->info.lt);
+}
+
+static pg_wchar
+toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	Assert(GetDatabaseEncoding() != PG_UTF8);
+
+	/* force C behavior for ASCII characters, per comments above */
+	if (locale->is_default && wc <= (pg_wchar) 127)
+		return pg_ascii_toupper((unsigned char) wc);
+	if (wc <= (pg_wchar) UCHAR_MAX)
+		return toupper_l((unsigned char) wc, locale->info.lt);
+	else
+		return wc;
+}
+
+static pg_wchar
+toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	Assert(GetDatabaseEncoding() == PG_UTF8);
+
+	/* force C behavior for ASCII characters, per comments above */
+	if (locale->is_default && wc <= (pg_wchar) 127)
+		return pg_ascii_toupper((unsigned char) wc);
+	if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
+		return towupper_l((wint_t) wc, locale->info.lt);
+	else
+		return wc;
+}
+
+static pg_wchar
+tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+	Assert(GetDatabaseEncoding() != PG_UTF8);
+
+	/* force C behavior for ASCII characters, per comments above */
+	if (locale->is_default && wc <= (pg_wchar) 127)
+		return pg_ascii_tolower((unsigned char) wc);
+	if (wc <= (pg_wchar) UCHAR_MAX)
+		return tolower_l((unsigned char) wc, locale->info.lt);
+	else
+		return wc;
+}
+
+static pg_wchar
+tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+	Assert(GetDatabaseEncoding() == PG_UTF8);
+
+	/* force C behavior for ASCII characters, per comments above */
+	if (locale->is_default && wc <= (pg_wchar) 127)
+		return pg_ascii_tolower((unsigned char) wc);
+	if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
+		return towlower_l((wint_t) wc, locale->info.lt);
+	else
+		return wc;
+}
+
+static const struct ctype_methods ctype_methods_libc_sb = {
+	.strlower = strlower_libc_sb,
+	.strtitle = strtitle_libc_sb,
+	.strupper = strupper_libc_sb,
+	.wc_isdigit = wc_isdigit_libc_sb,
+	.wc_isalpha = wc_isalpha_libc_sb,
+	.wc_isalnum = wc_isalnum_libc_sb,
+	.wc_isupper = wc_isupper_libc_sb,
+	.wc_islower = wc_islower_libc_sb,
+	.wc_isgraph = wc_isgraph_libc_sb,
+	.wc_isprint = wc_isprint_libc_sb,
+	.wc_ispunct = wc_ispunct_libc_sb,
+	.wc_isspace = wc_isspace_libc_sb,
+	.char_is_cased = char_is_cased_libc,
+	.char_tolower = char_tolower_libc,
+	.wc_toupper = toupper_libc_sb,
+	.wc_tolower = tolower_libc_sb,
+	.max_chr = UCHAR_MAX,
+};
+
+/*
+ * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but
+ * single-byte semantics for pattern matching.
+ */
+static const struct ctype_methods ctype_methods_libc_other_mb = {
+	.strlower = strlower_libc_mb,
+	.strtitle = strtitle_libc_mb,
+	.strupper = strupper_libc_mb,
+	.wc_isdigit = wc_isdigit_libc_sb,
+	.wc_isalpha = wc_isalpha_libc_sb,
+	.wc_isalnum = wc_isalnum_libc_sb,
+	.wc_isupper = wc_isupper_libc_sb,
+	.wc_islower = wc_islower_libc_sb,
+	.wc_isgraph = wc_isgraph_libc_sb,
+	.wc_isprint = wc_isprint_libc_sb,
+	.wc_ispunct = wc_ispunct_libc_sb,
+	.wc_isspace = wc_isspace_libc_sb,
+	.char_is_cased = char_is_cased_libc,
+	.char_tolower = char_tolower_libc,
+	.wc_toupper = toupper_libc_sb,
+	.wc_tolower = tolower_libc_sb,
+	.max_chr = UCHAR_MAX,
+};
+
+static const struct ctype_methods ctype_methods_libc_utf8 = {
+	.strlower = strlower_libc_mb,
+	.strtitle = strtitle_libc_mb,
+	.strupper = strupper_libc_mb,
+	.wc_isdigit = wc_isdigit_libc_mb,
+	.wc_isalpha = wc_isalpha_libc_mb,
+	.wc_isalnum = wc_isalnum_libc_mb,
+	.wc_isupper = wc_isupper_libc_mb,
+	.wc_islower = wc_islower_libc_mb,
+	.wc_isgraph = wc_isgraph_libc_mb,
+	.wc_isprint = wc_isprint_libc_mb,
+	.wc_ispunct = wc_ispunct_libc_mb,
+	.wc_isspace = wc_isspace_libc_mb,
+	.char_is_cased = char_is_cased_libc,
+	.char_tolower = char_tolower_libc,
+	.wc_toupper = toupper_libc_mb,
+	.wc_tolower = tolower_libc_mb,
+};
+
 static const struct collate_methods collate_methods_libc = {
 	.strncoll = strncoll_libc,
 	.strnxfrm = strnxfrm_libc,
@@ -119,36 +397,6 @@ static const struct collate_methods collate_methods_libc_win32_utf8 = {
 };
 #endif
 
-size_t
-strlower_libc(char *dst, size_t dstsize, const char *src,
-			  ssize_t srclen, pg_locale_t locale)
-{
-	if (pg_database_encoding_max_length() > 1)
-		return strlower_libc_mb(dst, dstsize, src, srclen, locale);
-	else
-		return strlower_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
-size_t
-strtitle_libc(char *dst, size_t dstsize, const char *src,
-			  ssize_t srclen, pg_locale_t locale)
-{
-	if (pg_database_encoding_max_length() > 1)
-		return strtitle_libc_mb(dst, dstsize, src, srclen, locale);
-	else
-		return strtitle_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
-size_t
-strupper_libc(char *dst, size_t dstsize, const char *src,
-			  ssize_t srclen, pg_locale_t locale)
-{
-	if (pg_database_encoding_max_length() > 1)
-		return strupper_libc_mb(dst, dstsize, src, srclen, locale);
-	else
-		return strupper_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
 static size_t
 strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 				 pg_locale_t locale)
@@ -209,7 +457,7 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	/* Output workspace cannot have more codes than input bytes */
 	workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
 
-	char2wchar(workspace, srclen + 1, src, srclen, locale);
+	char2wchar(workspace, srclen + 1, src, srclen, loc);
 
 	for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
 		workspace[curr_char] = towlower_l(workspace[curr_char], loc);
@@ -220,7 +468,7 @@ strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	max_size = curr_char * pg_database_encoding_max_length();
 	result = palloc(max_size + 1);
 
-	result_size = wchar2char(result, workspace, max_size + 1, locale);
+	result_size = wchar2char(result, workspace, max_size + 1, loc);
 
 	if (result_size + 1 > destsize)
 		return result_size;
@@ -304,7 +552,7 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	/* Output workspace cannot have more codes than input bytes */
 	workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
 
-	char2wchar(workspace, srclen + 1, src, srclen, locale);
+	char2wchar(workspace, srclen + 1, src, srclen, loc);
 
 	for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
 	{
@@ -321,7 +569,7 @@ strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	max_size = curr_char * pg_database_encoding_max_length();
 	result = palloc(max_size + 1);
 
-	result_size = wchar2char(result, workspace, max_size + 1, locale);
+	result_size = wchar2char(result, workspace, max_size + 1, loc);
 
 	if (result_size + 1 > destsize)
 		return result_size;
@@ -392,7 +640,7 @@ strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	/* Output workspace cannot have more codes than input bytes */
 	workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
 
-	char2wchar(workspace, srclen + 1, src, srclen, locale);
+	char2wchar(workspace, srclen + 1, src, srclen, loc);
 
 	for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
 		workspace[curr_char] = towupper_l(workspace[curr_char], loc);
@@ -403,7 +651,7 @@ strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	max_size = curr_char * pg_database_encoding_max_length();
 	result = palloc(max_size + 1);
 
-	result_size = wchar2char(result, workspace, max_size + 1, locale);
+	result_size = wchar2char(result, workspace, max_size + 1, loc);
 
 	if (result_size + 1 > destsize)
 		return result_size;
@@ -465,7 +713,6 @@ create_pg_locale_libc(Oid collid, MemoryContext context)
 	loc = make_libc_collator(collate, ctype);
 
 	result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
-	result->provider = COLLPROVIDER_LIBC;
 	result->deterministic = true;
 	result->collate_is_c = (strcmp(collate, "C") == 0) ||
 		(strcmp(collate, "POSIX") == 0);
@@ -481,6 +728,15 @@ create_pg_locale_libc(Oid collid, MemoryContext context)
 #endif
 			result->collate = &collate_methods_libc;
 	}
+	if (!result->ctype_is_c)
+	{
+		if (GetDatabaseEncoding() == PG_UTF8)
+			result->ctype = &ctype_methods_libc_utf8;
+		else if (pg_database_encoding_max_length() > 1)
+			result->ctype = &ctype_methods_libc_other_mb;
+		else
+			result->ctype = &ctype_methods_libc_sb;
+	}
 
 	return result;
 }
@@ -576,8 +832,6 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
 	const char *arg2n;
 	int			result;
 
-	Assert(locale->provider == COLLPROVIDER_LIBC);
-
 	if (bufsize1 + bufsize2 > TEXTBUFLEN)
 		buf = palloc(bufsize1 + bufsize2);
 
@@ -632,8 +886,6 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
 	size_t		bufsize = srclen + 1;
 	size_t		result;
 
-	Assert(locale->provider == COLLPROVIDER_LIBC);
-
 	if (srclen == -1)
 		return strxfrm_l(dest, src, destsize, locale->info.lt);
 
@@ -742,7 +994,6 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
 	int			r;
 	int			result;
 
-	Assert(locale->provider == COLLPROVIDER_LIBC);
 	Assert(GetDatabaseEncoding() == PG_UTF8);
 
 	if (len1 == -1)
@@ -879,7 +1130,7 @@ wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
  * zero-terminated.  The output will be zero-terminated iff there is room.
  */
 size_t
-wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
+wchar2char(char *to, const wchar_t *from, size_t tolen, locale_t loc)
 {
 	size_t		result;
 
@@ -909,7 +1160,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
 	}
 	else
 #endif							/* WIN32 */
-	if (locale == (pg_locale_t) 0)
+	if (loc == (locale_t) 0)
 	{
 		/* Use wcstombs directly for the default locale */
 		result = wcstombs(to, from, tolen);
@@ -917,7 +1168,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
 	else
 	{
 		/* Use wcstombs_l for nondefault locales */
-		result = wcstombs_l(to, from, tolen, locale->info.lt);
+		result = wcstombs_l(to, from, tolen, loc);
 	}
 
 	return result;
@@ -934,7 +1185,7 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
  */
 size_t
 char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
-		   pg_locale_t locale)
+		   locale_t loc)
 {
 	size_t		result;
 
@@ -969,7 +1220,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
 		/* mbstowcs requires ending '\0' */
 		char	   *str = pnstrdup(from, fromlen);
 
-		if (locale == (pg_locale_t) 0)
+		if (loc == (locale_t) 0)
 		{
 			/* Use mbstowcs directly for the default locale */
 			result = mbstowcs(to, str, tolen);
@@ -977,7 +1228,7 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
 		else
 		{
 			/* Use mbstowcs_l for nondefault locales */
-			result = mbstowcs_l(to, str, tolen, locale->info.lt);
+			result = mbstowcs_l(to, str, tolen, loc);
 		}
 
 		pfree(str);
diff --git a/src/backend/utils/adt/pg_lsn.c b/src/backend/utils/adt/pg_lsn.c
index 16311590a14..12de2446f5b 100644
--- a/src/backend/utils/adt/pg_lsn.c
+++ b/src/backend/utils/adt/pg_lsn.c
@@ -83,7 +83,7 @@ pg_lsn_out(PG_FUNCTION_ARGS)
 	char		buf[MAXPG_LSNLEN + 1];
 	char	   *result;
 
-	snprintf(buf, sizeof buf, "%X/%X", LSN_FORMAT_ARGS(lsn));
+	snprintf(buf, sizeof buf, "%X/%08X", LSN_FORMAT_ARGS(lsn));
 	result = pstrdup(buf);
 	PG_RETURN_CSTRING(result);
 }
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 97af7c6554f..1c12ddbae49 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -640,10 +640,10 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
 				values[28] = BoolGetDatum(false);	/* GSS credentials not
 													 * delegated */
 			}
-			if (beentry->st_query_id == 0)
+			if (beentry->st_query_id == INT64CONST(0))
 				nulls[30] = true;
 			else
-				values[30] = UInt64GetDatum(beentry->st_query_id);
+				values[30] = Int64GetDatum(beentry->st_query_id);
 		}
 		else
 		{
@@ -1510,7 +1510,7 @@ pg_stat_io_build_tuples(ReturnSetInfo *rsinfo,
 							bktype_stats->bytes[io_obj][io_context][io_op];
 
 						/* Convert to numeric */
-						snprintf(buf, sizeof buf, UINT64_FORMAT, byte);
+						snprintf(buf, sizeof buf, INT64_FORMAT, byte);
 						values[byte_idx] = DirectFunctionCall3(numeric_in,
 															   CStringGetDatum(buf),
 															   ObjectIdGetDatum(0),
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index edee1f7880b..6e2864cbbda 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -773,8 +773,11 @@ similar_escape_internal(text *pat_text, text *esc_text)
 	int			plen,
 				elen;
 	bool		afterescape = false;
-	bool		incharclass = false;
 	int			nquotes = 0;
+	int			charclass_depth = 0;	/* Nesting level of character classes,
+										 * encompassed by square brackets */
+	int			charclass_start = 0;	/* State of the character class start,
+										 * for carets */
 
 	p = VARDATA_ANY(pat_text);
 	plen = VARSIZE_ANY_EXHDR(pat_text);
@@ -904,7 +907,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
 		/* fast path */
 		if (afterescape)
 		{
-			if (pchar == '"' && !incharclass)	/* escape-double-quote? */
+			if (pchar == '"' && charclass_depth < 1)	/* escape-double-quote? */
 			{
 				/* emit appropriate part separator, per notes above */
 				if (nquotes == 0)
@@ -953,18 +956,41 @@ similar_escape_internal(text *pat_text, text *esc_text)
 			/* SQL escape character; do not send to output */
 			afterescape = true;
 		}
-		else if (incharclass)
+		else if (charclass_depth > 0)
 		{
 			if (pchar == '\\')
 				*r++ = '\\';
 			*r++ = pchar;
-			if (pchar == ']')
-				incharclass = false;
+
+			/*
+			 * Ignore a closing bracket at the start of a character class.
+			 * Such a bracket is taken literally rather than closing the
+			 * class.  "charclass_start" is 1 right at the beginning of a
+			 * class and 2 after an initial caret.
+			 */
+			if (pchar == ']' && charclass_start > 2)
+				charclass_depth--;
+			else if (pchar == '[')
+				charclass_depth++;
+
+			/*
+			 * If there is a caret right after the opening bracket, it negates
+			 * the character class, but a following closing bracket should
+			 * still be treated as a normal character.  That holds only for
+			 * the first caret, so only the values 1 and 2 mean that closing
+			 * brackets should be taken literally.
+			 */
+			if (pchar == '^')
+				charclass_start++;
+			else
+				charclass_start = 3;	/* definitely past the start */
 		}
 		else if (pchar == '[')
 		{
+			/* start of a character class */
 			*r++ = pchar;
-			incharclass = true;
+			charclass_depth++;
+			charclass_start = 1;
 		}
 		else if (pchar == '%')
 		{
diff --git a/src/backend/utils/adt/regproc.c b/src/backend/utils/adt/regproc.c
index 5ee608a2b39..b8bbe95e82e 100644
--- a/src/backend/utils/adt/regproc.c
+++ b/src/backend/utils/adt/regproc.c
@@ -30,6 +30,7 @@
 #include "catalog/pg_ts_config.h"
 #include "catalog/pg_ts_dict.h"
 #include "catalog/pg_type.h"
+#include "commands/dbcommands.h"
 #include "lib/stringinfo.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
@@ -1764,6 +1765,123 @@ regnamespacesend(PG_FUNCTION_ARGS)
 }
 
 /*
+ * regdatabasein - converts database name to database OID
+ *
+ * We also accept a numeric OID, for symmetry with the output routine.
+ *
+ * '-' signifies unknown (OID 0).  In all other cases, the input must
+ * match an existing pg_database entry.
+ */
+Datum
+regdatabasein(PG_FUNCTION_ARGS)
+{
+	char	   *db_name_or_oid = PG_GETARG_CSTRING(0);
+	Node	   *escontext = fcinfo->context;
+	Oid			result;
+	List	   *names;
+
+	/* Handle "-" or numeric OID */
+	if (parseDashOrOid(db_name_or_oid, &result, escontext))
+		PG_RETURN_OID(result);
+
+	/* The rest of this wouldn't work in bootstrap mode */
+	if (IsBootstrapProcessingMode())
+		elog(ERROR, "regdatabase values must be OIDs in bootstrap mode");
+
+	/* Normal case: see if the name matches any pg_database entry. */
+	names = stringToQualifiedNameList(db_name_or_oid, escontext);
+	if (names == NIL)
+		PG_RETURN_NULL();
+
+	if (list_length(names) != 1)
+		ereturn(escontext, (Datum) 0,
+				(errcode(ERRCODE_INVALID_NAME),
+				 errmsg("invalid name syntax")));
+
+	result = get_database_oid(strVal(linitial(names)), true);
+
+	if (!OidIsValid(result))
+		ereturn(escontext, (Datum) 0,
+				(errcode(ERRCODE_UNDEFINED_OBJECT),
+				 errmsg("database \"%s\" does not exist",
+						strVal(linitial(names)))));
+
+	PG_RETURN_OID(result);
+}
+
+/*
+ * to_regdatabase - converts database name to database OID
+ *
+ * If the name is not found, we return NULL.
+ */
+Datum
+to_regdatabase(PG_FUNCTION_ARGS)
+{
+	char	   *db_name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+	Datum		result;
+	ErrorSaveContext escontext = {T_ErrorSaveContext};
+
+	if (!DirectInputFunctionCallSafe(regdatabasein, db_name,
+									 InvalidOid, -1,
+									 (Node *) &escontext,
+									 &result))
+		PG_RETURN_NULL();
+	PG_RETURN_DATUM(result);
+}
+
+/*
+ * regdatabaseout - converts database OID to database name
+ */
+Datum
+regdatabaseout(PG_FUNCTION_ARGS)
+{
+	Oid			dboid = PG_GETARG_OID(0);
+	char	   *result;
+
+	if (dboid == InvalidOid)
+	{
+		result = pstrdup("-");
+		PG_RETURN_CSTRING(result);
+	}
+
+	result = get_database_name(dboid);
+
+	if (result)
+	{
+		/* pstrdup is not really necessary, but it avoids a compiler warning */
+		result = pstrdup(quote_identifier(result));
+	}
+	else
+	{
+		/* If OID doesn't match any database, return it numerically */
+		result = (char *) palloc(NAMEDATALEN);
+		snprintf(result, NAMEDATALEN, "%u", dboid);
+	}
+
+	PG_RETURN_CSTRING(result);
+}
+
+/*
+ * regdatabaserecv - converts external binary format to regdatabase
+ */
+Datum
+regdatabaserecv(PG_FUNCTION_ARGS)
+{
+	/* Exactly the same as oidrecv, so share code */
+	return oidrecv(fcinfo);
+}
+
+/*
+ * regdatabasesend - converts regdatabase to binary format
+ */
+Datum
+regdatabasesend(PG_FUNCTION_ARGS)
+{
+	/* Exactly the same as oidsend, so share code */
+	return oidsend(fcinfo);
+}
+
+/*
  * text_regclass: convert text to regclass
  *
  * This could be replaced by CoerceViaIO, except that we need to treat
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
index 6239900fa28..059fc5ebf60 100644
--- a/src/backend/utils/adt/ri_triggers.c
+++ b/src/backend/utils/adt/ri_triggers.c
@@ -30,7 +30,6 @@
 #include "access/xact.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_constraint.h"
-#include "catalog/pg_proc.h"
 #include "commands/trigger.h"
 #include "executor/executor.h"
 #include "executor/spi.h"
@@ -46,7 +45,6 @@
 #include "utils/inval.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
-#include "utils/rangetypes.h"
 #include "utils/rel.h"
 #include "utils/rls.h"
 #include "utils/ruleutils.h"
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index a96b1b9c0bc..ce6a626eba2 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -103,7 +103,6 @@
 #include "access/table.h"
 #include "access/tableam.h"
 #include "access/visibilitymap.h"
-#include "catalog/pg_am.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_operator.h"
 #include "catalog/pg_statistic.h"
@@ -4620,6 +4619,7 @@ convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue,
 		case REGDICTIONARYOID:
 		case REGROLEOID:
 		case REGNAMESPACEOID:
+		case REGDATABASEOID:
 			*scaledvalue = convert_numeric_to_scalar(value, valuetypid,
 													 &failure);
 			*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid,
@@ -4752,6 +4752,7 @@ convert_numeric_to_scalar(Datum value, Oid typid, bool *failure)
 		case REGDICTIONARYOID:
 		case REGROLEOID:
 		case REGNAMESPACEOID:
+		case REGDATABASEOID:
 			/* we can treat OIDs as integers... */
 			return (double) DatumGetObjectId(value);
 	}
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 347089b7626..25cff56c3d0 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -5312,10 +5312,10 @@ isoweekdate2date(int isoweek, int wday, int *year, int *mon, int *mday)
 int
 date2isoweek(int year, int mon, int mday)
 {
-	float8		result;
 	int			day0,
 				day4,
-				dayn;
+				dayn,
+				week;
 
 	/* current day */
 	dayn = date2j(year, mon, mday);
@@ -5338,13 +5338,13 @@ date2isoweek(int year, int mon, int mday)
 		day0 = j2day(day4 - 1);
 	}
 
-	result = (dayn - (day4 - day0)) / 7 + 1;
+	week = (dayn - (day4 - day0)) / 7 + 1;
 
 	/*
 	 * Sometimes the last few days in a year will fall into the first week of
 	 * the next year, so check for this.
 	 */
-	if (result >= 52)
+	if (week >= 52)
 	{
 		day4 = date2j(year + 1, 1, 4);
 
@@ -5352,10 +5352,10 @@ date2isoweek(int year, int mon, int mday)
 		day0 = j2day(day4 - 1);
 
 		if (dayn >= day4 - day0)
-			result = (dayn - (day4 - day0)) / 7 + 1;
+			week = (dayn - (day4 - day0)) / 7 + 1;
 	}
 
-	return (int) result;
+	return week;
 }
 
 
@@ -5367,10 +5367,10 @@ date2isoweek(int year, int mon, int mday)
 int
 date2isoyear(int year, int mon, int mday)
 {
-	float8		result;
 	int			day0,
 				day4,
-				dayn;
+				dayn,
+				week;
 
 	/* current day */
 	dayn = date2j(year, mon, mday);
@@ -5395,13 +5395,13 @@ date2isoyear(int year, int mon, int mday)
 		year--;
 	}
 
-	result = (dayn - (day4 - day0)) / 7 + 1;
+	week = (dayn - (day4 - day0)) / 7 + 1;
 
 	/*
 	 * Sometimes the last few days in a year will fall into the first week of
 	 * the next year, so check for this.
 	 */
-	if (result >= 52)
+	if (week >= 52)
 	{
 		day4 = date2j(year + 1, 1, 4);
 
@@ -6477,7 +6477,7 @@ timestamp2timestamptz_opt_overflow(Timestamp timestamp, int *overflow)
 	if (TIMESTAMP_NOT_FINITE(timestamp))
 		return timestamp;
 
-	/* We don't expect this to fail, but check it pro forma */
+	/* timestamp2tm should not fail on valid timestamps, but cope */
 	if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) == 0)
 	{
 		tz = DetermineTimeZoneOffset(tm, session_timezone);
@@ -6485,23 +6485,22 @@ timestamp2timestamptz_opt_overflow(Timestamp timestamp, int *overflow)
 		result = dt2local(timestamp, -tz);
 
 		if (IS_VALID_TIMESTAMP(result))
-		{
 			return result;
+	}
+
+	if (overflow)
+	{
+		if (timestamp < 0)
+		{
+			*overflow = -1;
+			TIMESTAMP_NOBEGIN(result);
 		}
-		else if (overflow)
+		else
 		{
-			if (result < MIN_TIMESTAMP)
-			{
-				*overflow = -1;
-				TIMESTAMP_NOBEGIN(result);
-			}
-			else
-			{
-				*overflow = 1;
-				TIMESTAMP_NOEND(result);
-			}
-			return result;
+			*overflow = 1;
+			TIMESTAMP_NOEND(result);
 		}
+		return result;
 	}
 
 	ereport(ERROR,
@@ -6531,27 +6530,81 @@ timestamptz_timestamp(PG_FUNCTION_ARGS)
 	PG_RETURN_TIMESTAMP(timestamptz2timestamp(timestamp));
 }
 
+/*
+ * Convert timestamptz to timestamp, throwing error for overflow.
+ */
 static Timestamp
 timestamptz2timestamp(TimestampTz timestamp)
 {
+	return timestamptz2timestamp_opt_overflow(timestamp, NULL);
+}
+
+/*
+ * Convert timestamp with time zone to timestamp.
+ *
+ * On successful conversion, *overflow is set to zero if it's not NULL.
+ *
+ * If the timestamptz is finite but out of the valid range for timestamp, then:
+ * if overflow is NULL, we throw an out-of-range error.
+ * if overflow is not NULL, we store +1 or -1 there to indicate the sign
+ * of the overflow, and return the appropriate timestamp infinity.
+ */
+Timestamp
+timestamptz2timestamp_opt_overflow(TimestampTz timestamp, int *overflow)
+{
 	Timestamp	result;
 	struct pg_tm tt,
 			   *tm = &tt;
 	fsec_t		fsec;
 	int			tz;
 
+	if (overflow)
+		*overflow = 0;
+
 	if (TIMESTAMP_NOT_FINITE(timestamp))
 		result = timestamp;
 	else
 	{
 		if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0)
+		{
+			if (overflow)
+			{
+				if (timestamp < 0)
+				{
+					*overflow = -1;
+					TIMESTAMP_NOBEGIN(result);
+				}
+				else
+				{
+					*overflow = 1;
+					TIMESTAMP_NOEND(result);
+				}
+				return result;
+			}
 			ereport(ERROR,
 					(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
 					 errmsg("timestamp out of range")));
+		}
 		if (tm2timestamp(tm, fsec, NULL, &result) != 0)
+		{
+			if (overflow)
+			{
+				if (timestamp < 0)
+				{
+					*overflow = -1;
+					TIMESTAMP_NOBEGIN(result);
+				}
+				else
+				{
+					*overflow = 1;
+					TIMESTAMP_NOEND(result);
+				}
+				return result;
+			}
 			ereport(ERROR,
 					(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
 					 errmsg("timestamp out of range")));
+		}
 	}
 	return result;
 }
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 3e4d5568bde..ffae8c23abf 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -35,7 +35,6 @@
 #include "port/pg_bswap.h"
 #include "regex/regex.h"
 #include "utils/builtins.h"
-#include "utils/bytea.h"
 #include "utils/guc.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
@@ -43,10 +42,6 @@
 #include "utils/sortsupport.h"
 #include "utils/varlena.h"
 
-
-/* GUC variable */
-int			bytea_output = BYTEA_OUTPUT_HEX;
-
 typedef struct varlena VarString;
 
 /*
@@ -148,12 +143,6 @@ static int	text_position_get_match_pos(TextPositionState *state);
 static void text_position_cleanup(TextPositionState *state);
 static void check_collation_set(Oid collid);
 static int	text_cmp(text *arg1, text *arg2, Oid collid);
-static bytea *bytea_catenate(bytea *t1, bytea *t2);
-static bytea *bytea_substring(Datum str,
-							  int S,
-							  int L,
-							  bool length_not_specified);
-static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
 static void appendStringInfoText(StringInfo str, const text *t);
 static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate);
 static void split_text_accum_result(SplitTextOutputData *tstate,
@@ -279,307 +268,6 @@ text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
  *	 USER I/O ROUTINES														 *
  *****************************************************************************/
 
-
-#define VAL(CH)			((CH) - '0')
-#define DIG(VAL)		((VAL) + '0')
-
-/*
- *		byteain			- converts from printable representation of byte array
- *
- *		Non-printable characters must be passed as '\nnn' (octal) and are
- *		converted to internal form.  '\' must be passed as '\\'.
- *		ereport(ERROR, ...) if bad form.
- *
- *		BUGS:
- *				The input is scanned twice.
- *				The error checking of input is minimal.
- */
-Datum
-byteain(PG_FUNCTION_ARGS)
-{
-	char	   *inputText = PG_GETARG_CSTRING(0);
-	Node	   *escontext = fcinfo->context;
-	char	   *tp;
-	char	   *rp;
-	int			bc;
-	bytea	   *result;
-
-	/* Recognize hex input */
-	if (inputText[0] == '\\' && inputText[1] == 'x')
-	{
-		size_t		len = strlen(inputText);
-
-		bc = (len - 2) / 2 + VARHDRSZ;	/* maximum possible length */
-		result = palloc(bc);
-		bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
-							 escontext);
-		SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
-
-		PG_RETURN_BYTEA_P(result);
-	}
-
-	/* Else, it's the traditional escaped style */
-	for (bc = 0, tp = inputText; *tp != '\0'; bc++)
-	{
-		if (tp[0] != '\\')
-			tp++;
-		else if ((tp[0] == '\\') &&
-				 (tp[1] >= '0' && tp[1] <= '3') &&
-				 (tp[2] >= '0' && tp[2] <= '7') &&
-				 (tp[3] >= '0' && tp[3] <= '7'))
-			tp += 4;
-		else if ((tp[0] == '\\') &&
-				 (tp[1] == '\\'))
-			tp += 2;
-		else
-		{
-			/*
-			 * one backslash, not followed by another or ### valid octal
-			 */
-			ereturn(escontext, (Datum) 0,
-					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-					 errmsg("invalid input syntax for type %s", "bytea")));
-		}
-	}
-
-	bc += VARHDRSZ;
-
-	result = (bytea *) palloc(bc);
-	SET_VARSIZE(result, bc);
-
-	tp = inputText;
-	rp = VARDATA(result);
-	while (*tp != '\0')
-	{
-		if (tp[0] != '\\')
-			*rp++ = *tp++;
-		else if ((tp[0] == '\\') &&
-				 (tp[1] >= '0' && tp[1] <= '3') &&
-				 (tp[2] >= '0' && tp[2] <= '7') &&
-				 (tp[3] >= '0' && tp[3] <= '7'))
-		{
-			bc = VAL(tp[1]);
-			bc <<= 3;
-			bc += VAL(tp[2]);
-			bc <<= 3;
-			*rp++ = bc + VAL(tp[3]);
-
-			tp += 4;
-		}
-		else if ((tp[0] == '\\') &&
-				 (tp[1] == '\\'))
-		{
-			*rp++ = '\\';
-			tp += 2;
-		}
-		else
-		{
-			/*
-			 * We should never get here. The first pass should not allow it.
-			 */
-			ereturn(escontext, (Datum) 0,
-					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-					 errmsg("invalid input syntax for type %s", "bytea")));
-		}
-	}
-
-	PG_RETURN_BYTEA_P(result);
-}
-
-/*
- *		byteaout		- converts to printable representation of byte array
- *
- *		In the traditional escaped format, non-printable characters are
- *		printed as '\nnn' (octal) and '\' as '\\'.
- */
-Datum
-byteaout(PG_FUNCTION_ARGS)
-{
-	bytea	   *vlena = PG_GETARG_BYTEA_PP(0);
-	char	   *result;
-	char	   *rp;
-
-	if (bytea_output == BYTEA_OUTPUT_HEX)
-	{
-		/* Print hex format */
-		rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
-		*rp++ = '\\';
-		*rp++ = 'x';
-		rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
-	}
-	else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
-	{
-		/* Print traditional escaped format */
-		char	   *vp;
-		uint64		len;
-		int			i;
-
-		len = 1;				/* empty string has 1 char */
-		vp = VARDATA_ANY(vlena);
-		for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
-		{
-			if (*vp == '\\')
-				len += 2;
-			else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
-				len += 4;
-			else
-				len++;
-		}
-
-		/*
-		 * In principle len can't overflow uint32 if the input fit in 1GB, but
-		 * for safety let's check rather than relying on palloc's internal
-		 * check.
-		 */
-		if (len > MaxAllocSize)
-			ereport(ERROR,
-					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
-					 errmsg_internal("result of bytea output conversion is too large")));
-		rp = result = (char *) palloc(len);
-
-		vp = VARDATA_ANY(vlena);
-		for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
-		{
-			if (*vp == '\\')
-			{
-				*rp++ = '\\';
-				*rp++ = '\\';
-			}
-			else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
-			{
-				int			val;	/* holds unprintable chars */
-
-				val = *vp;
-				rp[0] = '\\';
-				rp[3] = DIG(val & 07);
-				val >>= 3;
-				rp[2] = DIG(val & 07);
-				val >>= 3;
-				rp[1] = DIG(val & 03);
-				rp += 4;
-			}
-			else
-				*rp++ = *vp;
-		}
-	}
-	else
-	{
-		elog(ERROR, "unrecognized \"bytea_output\" setting: %d",
-			 bytea_output);
-		rp = result = NULL;		/* keep compiler quiet */
-	}
-	*rp = '\0';
-	PG_RETURN_CSTRING(result);
-}
-
-/*
- *		bytearecv			- converts external binary format to bytea
- */
-Datum
-bytearecv(PG_FUNCTION_ARGS)
-{
-	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
-	bytea	   *result;
-	int			nbytes;
-
-	nbytes = buf->len - buf->cursor;
-	result = (bytea *) palloc(nbytes + VARHDRSZ);
-	SET_VARSIZE(result, nbytes + VARHDRSZ);
-	pq_copymsgbytes(buf, VARDATA(result), nbytes);
-	PG_RETURN_BYTEA_P(result);
-}
-
-/*
- *		byteasend			- converts bytea to binary format
- *
- * This is a special case: just copy the input...
- */
-Datum
-byteasend(PG_FUNCTION_ARGS)
-{
-	bytea	   *vlena = PG_GETARG_BYTEA_P_COPY(0);
-
-	PG_RETURN_BYTEA_P(vlena);
-}
-
-Datum
-bytea_string_agg_transfn(PG_FUNCTION_ARGS)
-{
-	StringInfo	state;
-
-	state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
-
-	/* Append the value unless null, preceding it with the delimiter. */
-	if (!PG_ARGISNULL(1))
-	{
-		bytea	   *value = PG_GETARG_BYTEA_PP(1);
-		bool		isfirst = false;
-
-		/*
-		 * You might think we can just throw away the first delimiter, however
-		 * we must keep it as we may be a parallel worker doing partial
-		 * aggregation building a state to send to the main process.  We need
-		 * to keep the delimiter of every aggregation so that the combine
-		 * function can properly join up the strings of two separately
-		 * partially aggregated results.  The first delimiter is only stripped
-		 * off in the final function.  To know how much to strip off the front
-		 * of the string, we store the length of the first delimiter in the
-		 * StringInfo's cursor field, which we don't otherwise need here.
-		 */
-		if (state == NULL)
-		{
-			state = makeStringAggState(fcinfo);
-			isfirst = true;
-		}
-
-		if (!PG_ARGISNULL(2))
-		{
-			bytea	   *delim = PG_GETARG_BYTEA_PP(2);
-
-			appendBinaryStringInfo(state, VARDATA_ANY(delim),
-								   VARSIZE_ANY_EXHDR(delim));
-			if (isfirst)
-				state->cursor = VARSIZE_ANY_EXHDR(delim);
-		}
-
-		appendBinaryStringInfo(state, VARDATA_ANY(value),
-							   VARSIZE_ANY_EXHDR(value));
-	}
-
-	/*
-	 * The transition type for string_agg() is declared to be "internal",
-	 * which is a pass-by-value type the same size as a pointer.
-	 */
-	if (state)
-		PG_RETURN_POINTER(state);
-	PG_RETURN_NULL();
-}
-
-Datum
-bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
-{
-	StringInfo	state;
-
-	/* cannot be called directly because of internal-type argument */
-	Assert(AggCheckCallContext(fcinfo, NULL));
-
-	state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
-
-	if (state != NULL)
-	{
-		/* As per comment in transfn, strip data before the cursor position */
-		bytea	   *result;
-		int			strippedlen = state->len - state->cursor;
-
-		result = (bytea *) palloc(strippedlen + VARHDRSZ);
-		SET_VARSIZE(result, strippedlen + VARHDRSZ);
-		memcpy(VARDATA(result), &state->data[state->cursor], strippedlen);
-		PG_RETURN_BYTEA_P(result);
-	}
-	else
-		PG_RETURN_NULL();
-}
-
 /*
  *		textin			- converts cstring to internal representation
  */
@@ -2959,467 +2647,6 @@ bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
 }
 
 
-/*-------------------------------------------------------------
- * byteaoctetlen
- *
- * get the number of bytes contained in an instance of type 'bytea'
- *-------------------------------------------------------------
- */
-Datum
-byteaoctetlen(PG_FUNCTION_ARGS)
-{
-	Datum		str = PG_GETARG_DATUM(0);
-
-	/* We need not detoast the input at all */
-	PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
-}
-
-/*
- * byteacat -
- *	  takes two bytea* and returns a bytea* that is the concatenation of
- *	  the two.
- *
- * Cloned from textcat and modified as required.
- */
-Datum
-byteacat(PG_FUNCTION_ARGS)
-{
-	bytea	   *t1 = PG_GETARG_BYTEA_PP(0);
-	bytea	   *t2 = PG_GETARG_BYTEA_PP(1);
-
-	PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
-}
-
-/*
- * bytea_catenate
- *	Guts of byteacat(), broken out so it can be used by other functions
- *
- * Arguments can be in short-header form, but not compressed or out-of-line
- */
-static bytea *
-bytea_catenate(bytea *t1, bytea *t2)
-{
-	bytea	   *result;
-	int			len1,
-				len2,
-				len;
-	char	   *ptr;
-
-	len1 = VARSIZE_ANY_EXHDR(t1);
-	len2 = VARSIZE_ANY_EXHDR(t2);
-
-	/* paranoia ... probably should throw error instead? */
-	if (len1 < 0)
-		len1 = 0;
-	if (len2 < 0)
-		len2 = 0;
-
-	len = len1 + len2 + VARHDRSZ;
-	result = (bytea *) palloc(len);
-
-	/* Set size of result string... */
-	SET_VARSIZE(result, len);
-
-	/* Fill data field of result string... */
-	ptr = VARDATA(result);
-	if (len1 > 0)
-		memcpy(ptr, VARDATA_ANY(t1), len1);
-	if (len2 > 0)
-		memcpy(ptr + len1, VARDATA_ANY(t2), len2);
-
-	return result;
-}
-
-#define PG_STR_GET_BYTEA(str_) \
-	DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
-
-/*
- * bytea_substr()
- * Return a substring starting at the specified position.
- * Cloned from text_substr and modified as required.
- *
- * Input:
- *	- string
- *	- starting position (is one-based)
- *	- string length (optional)
- *
- * If the starting position is zero or less, then return from the start of the string
- * adjusting the length to be consistent with the "negative start" per SQL.
- * If the length is less than zero, an ERROR is thrown. If no third argument
- * (length) is provided, the length to the end of the string is assumed.
- */
-Datum
-bytea_substr(PG_FUNCTION_ARGS)
-{
-	PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
-									  PG_GETARG_INT32(1),
-									  PG_GETARG_INT32(2),
-									  false));
-}
-
-/*
- * bytea_substr_no_len -
- *	  Wrapper to avoid opr_sanity failure due to
- *	  one function accepting a different number of args.
- */
-Datum
-bytea_substr_no_len(PG_FUNCTION_ARGS)
-{
-	PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
-									  PG_GETARG_INT32(1),
-									  -1,
-									  true));
-}
-
-static bytea *
-bytea_substring(Datum str,
-				int S,
-				int L,
-				bool length_not_specified)
-{
-	int32		S1;				/* adjusted start position */
-	int32		L1;				/* adjusted substring length */
-	int32		E;				/* end position */
-
-	/*
-	 * The logic here should generally match text_substring().
-	 */
-	S1 = Max(S, 1);
-
-	if (length_not_specified)
-	{
-		/*
-		 * Not passed a length - DatumGetByteaPSlice() grabs everything to the
-		 * end of the string if we pass it a negative value for length.
-		 */
-		L1 = -1;
-	}
-	else if (L < 0)
-	{
-		/* SQL99 says to throw an error for E < S, i.e., negative length */
-		ereport(ERROR,
-				(errcode(ERRCODE_SUBSTRING_ERROR),
-				 errmsg("negative substring length not allowed")));
-		L1 = -1;				/* silence stupider compilers */
-	}
-	else if (pg_add_s32_overflow(S, L, &E))
-	{
-		/*
-		 * L could be large enough for S + L to overflow, in which case the
-		 * substring must run to end of string.
-		 */
-		L1 = -1;
-	}
-	else
-	{
-		/*
-		 * A zero or negative value for the end position can happen if the
-		 * start was negative or one. SQL99 says to return a zero-length
-		 * string.
-		 */
-		if (E < 1)
-			return PG_STR_GET_BYTEA("");
-
-		L1 = E - S1;
-	}
-
-	/*
-	 * If the start position is past the end of the string, SQL99 says to
-	 * return a zero-length string -- DatumGetByteaPSlice() will do that for
-	 * us.  We need only convert S1 to zero-based starting position.
-	 */
-	return DatumGetByteaPSlice(str, S1 - 1, L1);
-}
-
-/*
- * byteaoverlay
- *	Replace specified substring of first string with second
- *
- * The SQL standard defines OVERLAY() in terms of substring and concatenation.
- * This code is a direct implementation of what the standard says.
- */
-Datum
-byteaoverlay(PG_FUNCTION_ARGS)
-{
-	bytea	   *t1 = PG_GETARG_BYTEA_PP(0);
-	bytea	   *t2 = PG_GETARG_BYTEA_PP(1);
-	int			sp = PG_GETARG_INT32(2);	/* substring start position */
-	int			sl = PG_GETARG_INT32(3);	/* substring length */
-
-	PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
-}
-
-Datum
-byteaoverlay_no_len(PG_FUNCTION_ARGS)
-{
-	bytea	   *t1 = PG_GETARG_BYTEA_PP(0);
-	bytea	   *t2 = PG_GETARG_BYTEA_PP(1);
-	int			sp = PG_GETARG_INT32(2);	/* substring start position */
-	int			sl;
-
-	sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
-	PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
-}
-
-static bytea *
-bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
-{
-	bytea	   *result;
-	bytea	   *s1;
-	bytea	   *s2;
-	int			sp_pl_sl;
-
-	/*
-	 * Check for possible integer-overflow cases.  For negative sp, throw a
-	 * "substring length" error because that's what should be expected
-	 * according to the spec's definition of OVERLAY().
-	 */
-	if (sp <= 0)
-		ereport(ERROR,
-				(errcode(ERRCODE_SUBSTRING_ERROR),
-				 errmsg("negative substring length not allowed")));
-	if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
-		ereport(ERROR,
-				(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-				 errmsg("integer out of range")));
-
-	s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
-	s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
-	result = bytea_catenate(s1, t2);
-	result = bytea_catenate(result, s2);
-
-	return result;
-}
-
-/*
- * bit_count
- */
-Datum
-bytea_bit_count(PG_FUNCTION_ARGS)
-{
-	bytea	   *t1 = PG_GETARG_BYTEA_PP(0);
-
-	PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
-}
-
-/*
- * byteapos -
- *	  Return the position of the specified substring.
- *	  Implements the SQL POSITION() function.
- * Cloned from textpos and modified as required.
- */
-Datum
-byteapos(PG_FUNCTION_ARGS)
-{
-	bytea	   *t1 = PG_GETARG_BYTEA_PP(0);
-	bytea	   *t2 = PG_GETARG_BYTEA_PP(1);
-	int			pos;
-	int			px,
-				p;
-	int			len1,
-				len2;
-	char	   *p1,
-			   *p2;
-
-	len1 = VARSIZE_ANY_EXHDR(t1);
-	len2 = VARSIZE_ANY_EXHDR(t2);
-
-	if (len2 <= 0)
-		PG_RETURN_INT32(1);		/* result for empty pattern */
-
-	p1 = VARDATA_ANY(t1);
-	p2 = VARDATA_ANY(t2);
-
-	pos = 0;
-	px = (len1 - len2);
-	for (p = 0; p <= px; p++)
-	{
-		if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
-		{
-			pos = p + 1;
-			break;
-		};
-		p1++;
-	};
-
-	PG_RETURN_INT32(pos);
-}
-
-/*-------------------------------------------------------------
- * byteaGetByte
- *
- * this routine treats "bytea" as an array of bytes.
- * It returns the Nth byte (a number between 0 and 255).
- *-------------------------------------------------------------
- */
-Datum
-byteaGetByte(PG_FUNCTION_ARGS)
-{
-	bytea	   *v = PG_GETARG_BYTEA_PP(0);
-	int32		n = PG_GETARG_INT32(1);
-	int			len;
-	int			byte;
-
-	len = VARSIZE_ANY_EXHDR(v);
-
-	if (n < 0 || n >= len)
-		ereport(ERROR,
-				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
-				 errmsg("index %d out of valid range, 0..%d",
-						n, len - 1)));
-
-	byte = ((unsigned char *) VARDATA_ANY(v))[n];
-
-	PG_RETURN_INT32(byte);
-}
-
-/*-------------------------------------------------------------
- * byteaGetBit
- *
- * This routine treats a "bytea" type like an array of bits.
- * It returns the value of the Nth bit (0 or 1).
- *
- *-------------------------------------------------------------
- */
-Datum
-byteaGetBit(PG_FUNCTION_ARGS)
-{
-	bytea	   *v = PG_GETARG_BYTEA_PP(0);
-	int64		n = PG_GETARG_INT64(1);
-	int			byteNo,
-				bitNo;
-	int			len;
-	int			byte;
-
-	len = VARSIZE_ANY_EXHDR(v);
-
-	if (n < 0 || n >= (int64) len * 8)
-		ereport(ERROR,
-				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
-				 errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
-						n, (int64) len * 8 - 1)));
-
-	/* n/8 is now known < len, so safe to cast to int */
-	byteNo = (int) (n / 8);
-	bitNo = (int) (n % 8);
-
-	byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
-
-	if (byte & (1 << bitNo))
-		PG_RETURN_INT32(1);
-	else
-		PG_RETURN_INT32(0);
-}
-
-/*-------------------------------------------------------------
- * byteaSetByte
- *
- * Given an instance of type 'bytea' creates a new one with
- * the Nth byte set to the given value.
- *
- *-------------------------------------------------------------
- */
-Datum
-byteaSetByte(PG_FUNCTION_ARGS)
-{
-	bytea	   *res = PG_GETARG_BYTEA_P_COPY(0);
-	int32		n = PG_GETARG_INT32(1);
-	int32		newByte = PG_GETARG_INT32(2);
-	int			len;
-
-	len = VARSIZE(res) - VARHDRSZ;
-
-	if (n < 0 || n >= len)
-		ereport(ERROR,
-				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
-				 errmsg("index %d out of valid range, 0..%d",
-						n, len - 1)));
-
-	/*
-	 * Now set the byte.
-	 */
-	((unsigned char *) VARDATA(res))[n] = newByte;
-
-	PG_RETURN_BYTEA_P(res);
-}
-
-/*-------------------------------------------------------------
- * byteaSetBit
- *
- * Given an instance of type 'bytea' creates a new one with
- * the Nth bit set to the given value.
- *
- *-------------------------------------------------------------
- */
-Datum
-byteaSetBit(PG_FUNCTION_ARGS)
-{
-	bytea	   *res = PG_GETARG_BYTEA_P_COPY(0);
-	int64		n = PG_GETARG_INT64(1);
-	int32		newBit = PG_GETARG_INT32(2);
-	int			len;
-	int			oldByte,
-				newByte;
-	int			byteNo,
-				bitNo;
-
-	len = VARSIZE(res) - VARHDRSZ;
-
-	if (n < 0 || n >= (int64) len * 8)
-		ereport(ERROR,
-				(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
-				 errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
-						n, (int64) len * 8 - 1)));
-
-	/* n/8 is now known < len, so safe to cast to int */
-	byteNo = (int) (n / 8);
-	bitNo = (int) (n % 8);
-
-	/*
-	 * sanity check!
-	 */
-	if (newBit != 0 && newBit != 1)
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-				 errmsg("new bit must be 0 or 1")));
-
-	/*
-	 * Update the byte.
-	 */
-	oldByte = ((unsigned char *) VARDATA(res))[byteNo];
-
-	if (newBit == 0)
-		newByte = oldByte & (~(1 << bitNo));
-	else
-		newByte = oldByte | (1 << bitNo);
-
-	((unsigned char *) VARDATA(res))[byteNo] = newByte;
-
-	PG_RETURN_BYTEA_P(res);
-}
-
-/*
- * Return reversed bytea
- */
-Datum
-bytea_reverse(PG_FUNCTION_ARGS)
-{
-	bytea	   *v = PG_GETARG_BYTEA_PP(0);
-	const char *p = VARDATA_ANY(v);
-	int			len = VARSIZE_ANY_EXHDR(v);
-	const char *endp = p + len;
-	bytea	   *result = palloc(len + VARHDRSZ);
-	char	   *dst = (char *) VARDATA(result) + len;
-
-	SET_VARSIZE(result, len + VARHDRSZ);
-
-	while (p < endp)
-		*(--dst) = *p++;
-
-	PG_RETURN_BYTEA_P(result);
-}
-
-
 /* text_name()
  * Converts a text type to a Name type.
  */
@@ -3849,331 +3076,6 @@ SplitGUCList(char *rawstring, char separator,
 	return true;
 }
 
-
-/*****************************************************************************
- *	Comparison Functions used for bytea
- *
- * Note: btree indexes need these routines not to leak memory; therefore,
- * be careful to free working copies of toasted datums.  Most places don't
- * need to be so careful.
- *****************************************************************************/
-
-Datum
-byteaeq(PG_FUNCTION_ARGS)
-{
-	Datum		arg1 = PG_GETARG_DATUM(0);
-	Datum		arg2 = PG_GETARG_DATUM(1);
-	bool		result;
-	Size		len1,
-				len2;
-
-	/*
-	 * We can use a fast path for unequal lengths, which might save us from
-	 * having to detoast one or both values.
-	 */
-	len1 = toast_raw_datum_size(arg1);
-	len2 = toast_raw_datum_size(arg2);
-	if (len1 != len2)
-		result = false;
-	else
-	{
-		bytea	   *barg1 = DatumGetByteaPP(arg1);
-		bytea	   *barg2 = DatumGetByteaPP(arg2);
-
-		result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
-						 len1 - VARHDRSZ) == 0);
-
-		PG_FREE_IF_COPY(barg1, 0);
-		PG_FREE_IF_COPY(barg2, 1);
-	}
-
-	PG_RETURN_BOOL(result);
-}
-
-Datum
-byteane(PG_FUNCTION_ARGS)
-{
-	Datum		arg1 = PG_GETARG_DATUM(0);
-	Datum		arg2 = PG_GETARG_DATUM(1);
-	bool		result;
-	Size		len1,
-				len2;
-
-	/*
-	 * We can use a fast path for unequal lengths, which might save us from
-	 * having to detoast one or both values.
-	 */
-	len1 = toast_raw_datum_size(arg1);
-	len2 = toast_raw_datum_size(arg2);
-	if (len1 != len2)
-		result = true;
-	else
-	{
-		bytea	   *barg1 = DatumGetByteaPP(arg1);
-		bytea	   *barg2 = DatumGetByteaPP(arg2);
-
-		result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
-						 len1 - VARHDRSZ) != 0);
-
-		PG_FREE_IF_COPY(barg1, 0);
-		PG_FREE_IF_COPY(barg2, 1);
-	}
-
-	PG_RETURN_BOOL(result);
-}
-
-Datum
-bytealt(PG_FUNCTION_ARGS)
-{
-	bytea	   *arg1 = PG_GETARG_BYTEA_PP(0);
-	bytea	   *arg2 = PG_GETARG_BYTEA_PP(1);
-	int			len1,
-				len2;
-	int			cmp;
-
-	len1 = VARSIZE_ANY_EXHDR(arg1);
-	len2 = VARSIZE_ANY_EXHDR(arg2);
-
-	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
-
-	PG_FREE_IF_COPY(arg1, 0);
-	PG_FREE_IF_COPY(arg2, 1);
-
-	PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
-}
-
-Datum
-byteale(PG_FUNCTION_ARGS)
-{
-	bytea	   *arg1 = PG_GETARG_BYTEA_PP(0);
-	bytea	   *arg2 = PG_GETARG_BYTEA_PP(1);
-	int			len1,
-				len2;
-	int			cmp;
-
-	len1 = VARSIZE_ANY_EXHDR(arg1);
-	len2 = VARSIZE_ANY_EXHDR(arg2);
-
-	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
-
-	PG_FREE_IF_COPY(arg1, 0);
-	PG_FREE_IF_COPY(arg2, 1);
-
-	PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
-}
-
-Datum
-byteagt(PG_FUNCTION_ARGS)
-{
-	bytea	   *arg1 = PG_GETARG_BYTEA_PP(0);
-	bytea	   *arg2 = PG_GETARG_BYTEA_PP(1);
-	int			len1,
-				len2;
-	int			cmp;
-
-	len1 = VARSIZE_ANY_EXHDR(arg1);
-	len2 = VARSIZE_ANY_EXHDR(arg2);
-
-	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
-
-	PG_FREE_IF_COPY(arg1, 0);
-	PG_FREE_IF_COPY(arg2, 1);
-
-	PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
-}
-
-Datum
-byteage(PG_FUNCTION_ARGS)
-{
-	bytea	   *arg1 = PG_GETARG_BYTEA_PP(0);
-	bytea	   *arg2 = PG_GETARG_BYTEA_PP(1);
-	int			len1,
-				len2;
-	int			cmp;
-
-	len1 = VARSIZE_ANY_EXHDR(arg1);
-	len2 = VARSIZE_ANY_EXHDR(arg2);
-
-	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
-
-	PG_FREE_IF_COPY(arg1, 0);
-	PG_FREE_IF_COPY(arg2, 1);
-
-	PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
-}
-
-Datum
-byteacmp(PG_FUNCTION_ARGS)
-{
-	bytea	   *arg1 = PG_GETARG_BYTEA_PP(0);
-	bytea	   *arg2 = PG_GETARG_BYTEA_PP(1);
-	int			len1,
-				len2;
-	int			cmp;
-
-	len1 = VARSIZE_ANY_EXHDR(arg1);
-	len2 = VARSIZE_ANY_EXHDR(arg2);
-
-	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
-	if ((cmp == 0) && (len1 != len2))
-		cmp = (len1 < len2) ? -1 : 1;
-
-	PG_FREE_IF_COPY(arg1, 0);
-	PG_FREE_IF_COPY(arg2, 1);
-
-	PG_RETURN_INT32(cmp);
-}
-
-Datum
-bytea_larger(PG_FUNCTION_ARGS)
-{
-	bytea	   *arg1 = PG_GETARG_BYTEA_PP(0);
-	bytea	   *arg2 = PG_GETARG_BYTEA_PP(1);
-	bytea	   *result;
-	int			len1,
-				len2;
-	int			cmp;
-
-	len1 = VARSIZE_ANY_EXHDR(arg1);
-	len2 = VARSIZE_ANY_EXHDR(arg2);
-
-	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
-	result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
-
-	PG_RETURN_BYTEA_P(result);
-}
-
-Datum
-bytea_smaller(PG_FUNCTION_ARGS)
-{
-	bytea	   *arg1 = PG_GETARG_BYTEA_PP(0);
-	bytea	   *arg2 = PG_GETARG_BYTEA_PP(1);
-	bytea	   *result;
-	int			len1,
-				len2;
-	int			cmp;
-
-	len1 = VARSIZE_ANY_EXHDR(arg1);
-	len2 = VARSIZE_ANY_EXHDR(arg2);
-
-	cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
-	result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
-
-	PG_RETURN_BYTEA_P(result);
-}
-
-Datum
-bytea_sortsupport(PG_FUNCTION_ARGS)
-{
-	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
-	MemoryContext oldcontext;
-
-	oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
-
-	/* Use generic string SortSupport, forcing "C" collation */
-	varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
-
-	MemoryContextSwitchTo(oldcontext);
-
-	PG_RETURN_VOID();
-}
-
-/* Cast bytea -> int2 */
-Datum
-bytea_int2(PG_FUNCTION_ARGS)
-{
-	bytea	   *v = PG_GETARG_BYTEA_PP(0);
-	int			len = VARSIZE_ANY_EXHDR(v);
-	uint16		result;
-
-	/* Check that the byte array is not too long */
-	if (len > sizeof(result))
-		ereport(ERROR,
-				errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-				errmsg("smallint out of range"));
-
-	/* Convert it to an integer; most significant bytes come first */
-	result = 0;
-	for (int i = 0; i < len; i++)
-	{
-		result <<= BITS_PER_BYTE;
-		result |= ((unsigned char *) VARDATA_ANY(v))[i];
-	}
-
-	PG_RETURN_INT16(result);
-}
-
-/* Cast bytea -> int4 */
-Datum
-bytea_int4(PG_FUNCTION_ARGS)
-{
-	bytea	   *v = PG_GETARG_BYTEA_PP(0);
-	int			len = VARSIZE_ANY_EXHDR(v);
-	uint32		result;
-
-	/* Check that the byte array is not too long */
-	if (len > sizeof(result))
-		ereport(ERROR,
-				errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-				errmsg("integer out of range"));
-
-	/* Convert it to an integer; most significant bytes come first */
-	result = 0;
-	for (int i = 0; i < len; i++)
-	{
-		result <<= BITS_PER_BYTE;
-		result |= ((unsigned char *) VARDATA_ANY(v))[i];
-	}
-
-	PG_RETURN_INT32(result);
-}
-
-/* Cast bytea -> int8 */
-Datum
-bytea_int8(PG_FUNCTION_ARGS)
-{
-	bytea	   *v = PG_GETARG_BYTEA_PP(0);
-	int			len = VARSIZE_ANY_EXHDR(v);
-	uint64		result;
-
-	/* Check that the byte array is not too long */
-	if (len > sizeof(result))
-		ereport(ERROR,
-				errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
-				errmsg("bigint out of range"));
-
-	/* Convert it to an integer; most significant bytes come first */
-	result = 0;
-	for (int i = 0; i < len; i++)
-	{
-		result <<= BITS_PER_BYTE;
-		result |= ((unsigned char *) VARDATA_ANY(v))[i];
-	}
-
-	PG_RETURN_INT64(result);
-}
-
-/* Cast int2 -> bytea; can just use int2send() */
-Datum
-int2_bytea(PG_FUNCTION_ARGS)
-{
-	return int2send(fcinfo);
-}
-
-/* Cast int4 -> bytea; can just use int4send() */
-Datum
-int4_bytea(PG_FUNCTION_ARGS)
-{
-	return int4send(fcinfo);
-}
-
-/* Cast int8 -> bytea; can just use int8send() */
-Datum
-int8_bytea(PG_FUNCTION_ARGS)
-{
-	return int8send(fcinfo);
-}
-
 /*
  * appendStringInfoText
  *
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index a4150bff2ea..f7b731825fc 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -529,14 +529,36 @@ xmltext(PG_FUNCTION_ARGS)
 #ifdef USE_LIBXML
 	text	   *arg = PG_GETARG_TEXT_PP(0);
 	text	   *result;
-	xmlChar    *xmlbuf = NULL;
+	volatile xmlChar *xmlbuf = NULL;
+	PgXmlErrorContext *xmlerrcxt;
+
+	/* First we gotta spin up some error handling. */
+	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
+
+	PG_TRY();
+	{
+		xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg));
 
-	xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg));
+		if (xmlbuf == NULL || xmlerrcxt->err_occurred)
+			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+						"could not allocate xmlChar");
 
-	Assert(xmlbuf);
+		result = cstring_to_text_with_len((const char *) xmlbuf,
+										  xmlStrlen((const xmlChar *) xmlbuf));
+	}
+	PG_CATCH();
+	{
+		if (xmlbuf)
+			xmlFree((xmlChar *) xmlbuf);
+
+		pg_xml_done(xmlerrcxt, true);
+		PG_RE_THROW();
+	}
+	PG_END_TRY();
+
+	xmlFree((xmlChar *) xmlbuf);
+	pg_xml_done(xmlerrcxt, false);
 
-	result = cstring_to_text_with_len((const char *) xmlbuf, xmlStrlen(xmlbuf));
-	xmlFree(xmlbuf);
 	PG_RETURN_XML_P(result);
 #else
 	NO_XML_SUPPORT();
@@ -663,7 +685,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 	volatile xmlBufferPtr buf = NULL;
 	volatile xmlSaveCtxtPtr ctxt = NULL;
 	ErrorSaveContext escontext = {T_ErrorSaveContext};
-	PgXmlErrorContext *xmlerrcxt;
+	PgXmlErrorContext *volatile xmlerrcxt = NULL;
 #endif
 
 	if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
@@ -704,13 +726,18 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 		return (text *) data;
 	}
 
-	/* Otherwise, we gotta spin up some error handling. */
-	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
-
+	/*
+	 * Otherwise, we gotta spin up some error handling.  Unlike most other
+	 * routines in this module, we already have a libxml "doc" structure to
+	 * free, so we need to call pg_xml_init() inside the PG_TRY and be
+	 * prepared for it to fail (typically due to palloc OOM).
+	 */
 	PG_TRY();
 	{
 		size_t		decl_len = 0;
 
+		xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
+
 		/* The serialized data will go into this buffer. */
 		buf = xmlBufferCreate();
 
@@ -770,7 +797,10 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 			if (oldroot != NULL)
 				xmlFreeNode(oldroot);
 
-			xmlAddChildList(root, content_nodes);
+			if (xmlAddChildList(root, content_nodes) == NULL ||
+				xmlerrcxt->err_occurred)
+				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+							"could not append xml node list");
 
 			/*
 			 * We use this node to insert newlines in the dump.  Note: in at
@@ -838,10 +868,10 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
 			xmlSaveClose(ctxt);
 		if (buf)
 			xmlBufferFree(buf);
-		if (doc)
-			xmlFreeDoc(doc);
+		xmlFreeDoc(doc);
 
-		pg_xml_done(xmlerrcxt, true);
+		if (xmlerrcxt)
+			pg_xml_done(xmlerrcxt, true);
 
 		PG_RE_THROW();
 	}
@@ -931,7 +961,10 @@ xmlelement(XmlExpr *xexpr,
 			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
 						"could not allocate xmlTextWriter");
 
-		xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
+		if (xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name) < 0 ||
+			xmlerrcxt->err_occurred)
+			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+						"could not start xml element");
 
 		forboth(arg, named_arg_strings, narg, xexpr->arg_names)
 		{
@@ -939,19 +972,30 @@ xmlelement(XmlExpr *xexpr,
 			char	   *argname = strVal(lfirst(narg));
 
 			if (str)
-				xmlTextWriterWriteAttribute(writer,
-											(xmlChar *) argname,
-											(xmlChar *) str);
+			{
+				if (xmlTextWriterWriteAttribute(writer,
+												(xmlChar *) argname,
+												(xmlChar *) str) < 0 ||
+					xmlerrcxt->err_occurred)
+					xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+								"could not write xml attribute");
+			}
 		}
 
 		foreach(arg, arg_strings)
 		{
 			char	   *str = (char *) lfirst(arg);
 
-			xmlTextWriterWriteRaw(writer, (xmlChar *) str);
+			if (xmlTextWriterWriteRaw(writer, (xmlChar *) str) < 0 ||
+				xmlerrcxt->err_occurred)
+				xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+							"could not write raw xml text");
 		}
 
-		xmlTextWriterEndElement(writer);
+		if (xmlTextWriterEndElement(writer) < 0 ||
+			xmlerrcxt->err_occurred)
+			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+						"could not end xml element");
 
 		/* we MUST do this now to flush data out to the buffer ... */
 		xmlFreeTextWriter(writer);
@@ -4220,20 +4264,27 @@ xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
 	}
 	else
 	{
-		xmlChar    *str;
+		volatile xmlChar *str = NULL;
 
-		str = xmlXPathCastNodeToString(cur);
 		PG_TRY();
 		{
+			char	   *escaped;
+
+			str = xmlXPathCastNodeToString(cur);
+			if (str == NULL || xmlerrcxt->err_occurred)
+				xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+							"could not allocate xmlChar");
+
 			/* Here we rely on XML having the same representation as TEXT */
-			char	   *escaped = escape_xml((char *) str);
+			escaped = escape_xml((char *) str);
 
 			result = (xmltype *) cstring_to_text(escaped);
 			pfree(escaped);
 		}
 		PG_FINALLY();
 		{
-			xmlFree(str);
+			if (str)
+				xmlFree((xmlChar *) str);
 		}
 		PG_END_TRY();
 	}
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 657648996c2..d1b25214376 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -317,6 +317,7 @@ GetCCHashEqFuncs(Oid keytype, CCHashFN *hashfunc, RegProcedure *eqfunc, CCFastEq
 		case REGDICTIONARYOID:
 		case REGROLEOID:
 		case REGNAMESPACEOID:
+		case REGDATABASEOID:
 			*hashfunc = int4hashfast;
 			*fasteqfunc = int4eqfast;
 			*eqfunc = F_OIDEQ;
diff --git a/src/backend/utils/cache/funccache.c b/src/backend/utils/cache/funccache.c
index 150c502a612..afc048a051e 100644
--- a/src/backend/utils/cache/funccache.c
+++ b/src/backend/utils/cache/funccache.c
@@ -491,6 +491,7 @@ cached_function_compile(FunctionCallInfo fcinfo,
 	CachedFunctionHashKey hashkey;
 	bool		function_valid = false;
 	bool		hashkey_valid = false;
+	bool		new_function = false;
 
 	/*
 	 * Lookup the pg_proc tuple by Oid; we'll need it in any case
@@ -570,13 +571,15 @@ recheck:
 
 		/*
 		 * Create the new function struct, if not done already.  The function
-		 * structs are never thrown away, so keep them in TopMemoryContext.
+		 * cache entry will be kept for the life of the backend, so put it in
+		 * TopMemoryContext.
 		 */
 		Assert(cacheEntrySize >= sizeof(CachedFunction));
 		if (function == NULL)
 		{
 			function = (CachedFunction *)
 				MemoryContextAllocZero(TopMemoryContext, cacheEntrySize);
+			new_function = true;
 		}
 		else
 		{
@@ -585,17 +588,36 @@ recheck:
 		}
 
 		/*
-		 * Fill in the CachedFunction part.  fn_hashkey and use_count remain
-		 * zeroes for now.
+		 * However, if function compilation fails, we'd like not to leak the
+		 * function struct, so use a PG_TRY block to prevent that.  (It's up
+		 * to the compile callback function to avoid its own internal leakage
+		 * in such cases.)  Unfortunately, freeing the struct is only safe if
+		 * we just allocated it: otherwise there are probably fn_extra
+		 * pointers to it.
 		 */
-		function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
-		function->fn_tid = procTup->t_self;
-		function->dcallback = dcallback;
+		PG_TRY();
+		{
+			/*
+			 * Do the hard, language-specific part.
+			 */
+			ccallback(fcinfo, procTup, &hashkey, function, forValidator);
+		}
+		PG_CATCH();
+		{
+			if (new_function)
+				pfree(function);
+			PG_RE_THROW();
+		}
+		PG_END_TRY();
 
 		/*
-		 * Do the hard, language-specific part.
+		 * Fill in the CachedFunction part.  (We do this last to prevent the
+		 * function from looking valid before it's fully built.)  fn_hashkey
+		 * will be set by cfunc_hashtable_insert; use_count remains zero.
 		 */
-		ccallback(fcinfo, procTup, &hashkey, function, forValidator);
+		function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
+		function->fn_tid = procTup->t_self;
+		function->dcallback = dcallback;
 
 		/*
 		 * Add the completed struct to the hash table.
diff --git a/src/backend/utils/fmgr/dfmgr.c b/src/backend/utils/fmgr/dfmgr.c
index 603632581d0..4bb84ff7087 100644
--- a/src/backend/utils/fmgr/dfmgr.c
+++ b/src/backend/utils/fmgr/dfmgr.c
@@ -99,6 +99,14 @@ load_external_function(const char *filename, const char *funcname,
 	void	   *lib_handle;
 	void	   *retval;
 
+	/*
+	 * If the value starts with "$libdir/", strip that.  This is because many
+	 * extensions have hardcoded '$libdir/foo' as their library name, which
+	 * prevents using the path.
+	 */
+	if (strncmp(filename, "$libdir/", 8) == 0)
+		filename += 8;
+
 	/* Expand the possibly-abbreviated filename to an exact path name */
 	fullname = expand_dynamic_library_name(filename);
 
@@ -456,14 +464,6 @@ expand_dynamic_library_name(const char *name)
 
 	Assert(name);
 
-	/*
-	 * If the value starts with "$libdir/", strip that.  This is because many
-	 * extensions have hardcoded '$libdir/foo' as their library name, which
-	 * prevents using the path.
-	 */
-	if (strncmp(name, "$libdir/", 8) == 0)
-		name += 8;
-
 	have_slash = (first_dir_separator(name) != NULL);
 
 	if (!have_slash)
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 92b0446b80c..d31cb45a058 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -39,7 +39,6 @@ volatile sig_atomic_t TransactionTimeoutPending = false;
 volatile sig_atomic_t IdleSessionTimeoutPending = false;
 volatile sig_atomic_t ProcSignalBarrierPending = false;
 volatile sig_atomic_t LogMemoryContextPending = false;
-volatile sig_atomic_t PublishMemoryContextPending = false;
 volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false;
 volatile uint32 InterruptHoldoffCount = 0;
 volatile uint32 QueryCancelHoldoffCount = 0;
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 89d72cdd5ff..641e535a73c 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -417,12 +417,11 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
 	datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype);
 	ctype = TextDatumGetCString(datum);
 
-	if (pg_perm_setlocale(LC_COLLATE, collate) == NULL)
-		ereport(FATAL,
-				(errmsg("database locale is incompatible with operating system"),
-				 errdetail("The database was initialized with LC_COLLATE \"%s\", "
-						   " which is not recognized by setlocale().", collate),
-				 errhint("Recreate the database with another locale or install the missing locale.")));
+	/*
+	 * Historcally, we set LC_COLLATE from datcollate, as well. That's no
+	 * longer necessary because all collation behavior is handled through
+	 * pg_locale_t.
+	 */
 
 	if (pg_perm_setlocale(LC_CTYPE, ctype) == NULL)
 		ereport(FATAL,
@@ -663,13 +662,6 @@ BaseInit(void)
 	 * drop ephemeral slots, which in turn triggers stats reporting.
 	 */
 	ReplicationSlotInitialize();
-
-	/*
-	 * The before shmem exit callback frees the DSA memory occupied by the
-	 * latest memory context statistics that could be published by this proc
-	 * if requested.
-	 */
-	before_shmem_exit(AtProcExit_memstats_cleanup, 0);
 }
 
 
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 308016d7763..886ecbad871 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -39,6 +39,7 @@
 #include "mb/pg_wchar.h"
 #include "utils/fmgrprotos.h"
 #include "utils/memutils.h"
+#include "utils/relcache.h"
 #include "varatt.h"
 
 /*
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 2f8cbd86759..d14b1678e7f 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1028,7 +1028,7 @@ struct config_bool ConfigureNamesBool[] =
 	},
 	{
 		{"enable_distinct_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
-			gettext_noop("Enables reordering of DISTINCT pathkeys."),
+			gettext_noop("Enables reordering of DISTINCT keys."),
 			NULL,
 			GUC_EXPLAIN
 		},
@@ -1602,11 +1602,11 @@ struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 	{
-		{"log_lock_failure", PGC_SUSET, LOGGING_WHAT,
+		{"log_lock_failures", PGC_SUSET, LOGGING_WHAT,
 			gettext_noop("Logs lock failures."),
 			NULL
 		},
-		&log_lock_failure,
+		&log_lock_failures,
 		false,
 		NULL, NULL, NULL
 	},
@@ -3081,7 +3081,7 @@ struct config_int ConfigureNamesInt[] =
 		},
 		&max_slot_wal_keep_size_mb,
 		-1, -1, MAX_KILOBYTES,
-		check_max_slot_wal_keep_size, NULL, NULL
+		NULL, NULL, NULL
 	},
 
 	{
@@ -3100,11 +3100,11 @@ struct config_int ConfigureNamesInt[] =
 			gettext_noop("Sets the duration a replication slot can remain idle before "
 						 "it is invalidated."),
 			NULL,
-			GUC_UNIT_MIN
+			GUC_UNIT_S
 		},
-		&idle_replication_slot_timeout_mins,
-		0, 0, INT_MAX / SECS_PER_MINUTE,
-		check_idle_replication_slot_timeout, NULL, NULL
+		&idle_replication_slot_timeout_secs,
+		0, 0, INT_MAX,
+		NULL, NULL, NULL
 	},
 
 	{
@@ -4837,7 +4837,7 @@ struct config_string ConfigureNamesString[] =
 	{
 		{"ssl_groups", PGC_SIGHUP, CONN_AUTH_SSL,
 			gettext_noop("Sets the group(s) to use for Diffie-Hellman key exchange."),
-			gettext_noop("Multiple groups can be specified using colon-separated list."),
+			gettext_noop("Multiple groups can be specified using a colon-separated list."),
 			GUC_SUPERUSER_ONLY
 		},
 		&SSLECDHCurve,
diff --git a/src/backend/utils/misc/injection_point.c b/src/backend/utils/misc/injection_point.c
index f58ebc8ee52..83b887b6978 100644
--- a/src/backend/utils/misc/injection_point.c
+++ b/src/backend/utils/misc/injection_point.c
@@ -584,3 +584,49 @@ IsInjectionPointAttached(const char *name)
 	return false;				/* silence compiler */
 #endif
 }
+
+/*
+ * Retrieve a list of all the injection points currently attached.
+ *
+ * This list is palloc'd in the current memory context.
+ */
+List *
+InjectionPointList(void)
+{
+#ifdef USE_INJECTION_POINTS
+	List	   *inj_points = NIL;
+	uint32		max_inuse;
+
+	LWLockAcquire(InjectionPointLock, LW_SHARED);
+
+	max_inuse = pg_atomic_read_u32(&ActiveInjectionPoints->max_inuse);
+
+	for (uint32 idx = 0; idx < max_inuse; idx++)
+	{
+		InjectionPointEntry *entry;
+		InjectionPointData *inj_point;
+		uint64		generation;
+
+		entry = &ActiveInjectionPoints->entries[idx];
+		generation = pg_atomic_read_u64(&entry->generation);
+
+		/* skip free slots */
+		if (generation % 2 == 0)
+			continue;
+
+		inj_point = (InjectionPointData *) palloc0(sizeof(InjectionPointData));
+		inj_point->name = pstrdup(entry->name);
+		inj_point->library = pstrdup(entry->library);
+		inj_point->function = pstrdup(entry->function);
+		inj_points = lappend(inj_points, inj_point);
+	}
+
+	LWLockRelease(InjectionPointLock);
+
+	return inj_points;
+
+#else
+	elog(ERROR, "Injection points are not supported by this build");
+	return NIL;					/* keep compiler quiet */
+#endif
+}
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 34826d01380..a9d8293474a 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -97,6 +97,7 @@
 #password_encryption = scram-sha-256	# scram-sha-256 or md5
 #scram_iterations = 4096
 #md5_password_warnings = on
+#oauth_validator_libraries = ''	# comma-separated list of trusted validator modules
 
 # GSSAPI using Kerberos
 #krb_server_keyfile = 'FILE:${sysconfdir}/krb5.keytab'
@@ -121,9 +122,6 @@
 #ssl_passphrase_command = ''
 #ssl_passphrase_command_supports_reload = off
 
-# OAuth
-#oauth_validator_libraries = ''	# comma-separated list of trusted validator modules
-
 
 #------------------------------------------------------------------------------
 # RESOURCE USAGE (except WAL)
@@ -180,13 +178,11 @@
 #temp_file_limit = -1			# limits per-process temp file space
 					# in kilobytes, or -1 for no limit
 
+#file_copy_method = copy		# copy, clone (if supported by OS)
+
 #max_notify_queue_pages = 1048576	# limits the number of SLRU pages allocated
 					# for NOTIFY / LISTEN queue
 
-#file_copy_method = copy	# the default is the first option
-					# 	copy
-					# 	clone (if system support is available)
-
 # - Kernel Resources -
 
 #max_files_per_process = 1000		# min 64
@@ -346,7 +342,7 @@
 				# (change requires restart)
 #wal_keep_size = 0		# in megabytes; 0 disables
 #max_slot_wal_keep_size = -1	# in megabytes; -1 disables
-#idle_replication_slot_timeout = 0	# in minutes; 0 disables
+#idle_replication_slot_timeout = 0	# in seconds; 0 disables
 #wal_sender_timeout = 60s	# in milliseconds; 0 disables
 #track_commit_timestamp = off	# collect timestamp of transaction commit
 				# (change requires restart)
@@ -628,7 +624,7 @@
 					#   %% = '%'
 					# e.g. '<%u%%%d> '
 #log_lock_waits = off			# log lock waits >= deadlock_timeout
-#log_lock_failure = off		# log lock failures
+#log_lock_failures = off		# log lock failures
 #log_recovery_conflict_waits = off	# log standby recovery conflict waits
 					# >= deadlock_timeout
 #log_parameter_max_length = -1		# when logging statements, limit logged
diff --git a/src/backend/utils/mmgr/alignedalloc.c b/src/backend/utils/mmgr/alignedalloc.c
index 85aee389d6b..7eea695de62 100644
--- a/src/backend/utils/mmgr/alignedalloc.c
+++ b/src/backend/utils/mmgr/alignedalloc.c
@@ -45,6 +45,7 @@ AlignedAllocFree(void *pointer)
 			 GetMemoryChunkContext(unaligned)->name, chunk);
 #endif
 
+	/* Recursively pfree the unaligned chunk */
 	pfree(unaligned);
 }
 
@@ -96,18 +97,32 @@ AlignedAllocRealloc(void *pointer, Size size, int flags)
 	Assert(old_size >= redirchunk->requested_size);
 #endif
 
+	/*
+	 * To keep things simple, we always allocate a new aligned chunk and copy
+	 * data into it.  Because of the above inaccuracy, this may end in copying
+	 * more data than was in the original allocation request size, but that
+	 * should be OK.
+	 */
 	ctx = GetMemoryChunkContext(unaligned);
 	newptr = MemoryContextAllocAligned(ctx, size, alignto, flags);
 
-	/*
-	 * We may memcpy beyond the end of the original allocation request size,
-	 * so we must mark the entire allocation as defined.
-	 */
-	if (likely(newptr != NULL))
+	/* Cope cleanly with OOM */
+	if (unlikely(newptr == NULL))
 	{
-		VALGRIND_MAKE_MEM_DEFINED(pointer, old_size);
-		memcpy(newptr, pointer, Min(size, old_size));
+		VALGRIND_MAKE_MEM_NOACCESS(redirchunk, sizeof(MemoryChunk));
+		return MemoryContextAllocationFailure(ctx, size, flags);
 	}
+
+	/*
+	 * We may memcpy more than the original allocation request size, which
+	 * would result in trying to copy trailing bytes that the original
+	 * MemoryContextAllocAligned call marked NOACCESS.  So we must mark the
+	 * entire old_size as defined.  That's slightly annoying, but probably not
+	 * worth improving.
+	 */
+	VALGRIND_MAKE_MEM_DEFINED(pointer, old_size);
+	memcpy(newptr, pointer, Min(size, old_size));
+
 	pfree(unaligned);
 
 	return newptr;
diff --git a/src/backend/utils/mmgr/dsa.c b/src/backend/utils/mmgr/dsa.c
index 17d4f7a7a06..be43e9351c3 100644
--- a/src/backend/utils/mmgr/dsa.c
+++ b/src/backend/utils/mmgr/dsa.c
@@ -532,6 +532,21 @@ dsa_attach(dsa_handle handle)
 }
 
 /*
+ * Returns whether the area with the given handle was already attached by the
+ * current process.  The area must have been created with dsa_create (not
+ * dsa_create_in_place).
+ */
+bool
+dsa_is_attached(dsa_handle handle)
+{
+	/*
+	 * An area handle is really a DSM segment handle for the first segment, so
+	 * we can just search for that.
+	 */
+	return dsm_find_mapping(handle) != NULL;
+}
+
+/*
  * Attach to an area that was created with dsa_create_in_place.  The caller
  * must somehow know the location in memory that was used when the area was
  * created, though it may be mapped at a different virtual address in this
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index 7d28ca706eb..15fa4d0a55e 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -23,11 +23,6 @@
 
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
-#include "nodes/pg_list.h"
-#include "storage/lwlock.h"
-#include "storage/ipc.h"
-#include "utils/dsa.h"
-#include "utils/hsearch.h"
 #include "utils/memdebug.h"
 #include "utils/memutils.h"
 #include "utils/memutils_internal.h"
@@ -140,17 +135,6 @@ static const MemoryContextMethods mcxt_methods[] = {
 };
 
 #undef BOGUS_MCTX
-/*
- * This is passed to MemoryContextStatsInternal to determine whether
- * to print context statistics or not and where to print them logs or
- * stderr.
- */
-typedef enum PrintDestination
-{
-	PRINT_STATS_TO_STDERR = 0,
-	PRINT_STATS_TO_LOGS,
-	PRINT_STATS_NONE
-}			PrintDestination;
 
 /*
  * CurrentMemoryContext
@@ -172,31 +156,16 @@ MemoryContext CurTransactionContext = NULL;
 
 /* This is a transient link to the active portal's memory context: */
 MemoryContext PortalContext = NULL;
-dsa_area   *MemoryStatsDsaArea = NULL;
 
 static void MemoryContextDeleteOnly(MemoryContext context);
 static void MemoryContextCallResetCallbacks(MemoryContext context);
 static void MemoryContextStatsInternal(MemoryContext context, int level,
 									   int max_level, int max_children,
 									   MemoryContextCounters *totals,
-									   PrintDestination print_location,
-									   int *num_contexts);
+									   bool print_to_stderr);
 static void MemoryContextStatsPrint(MemoryContext context, void *passthru,
 									const char *stats_string,
 									bool print_to_stderr);
-static void PublishMemoryContext(MemoryStatsEntry *memcxt_info,
-								 int curr_id, MemoryContext context,
-								 List *path,
-								 MemoryContextCounters stat,
-								 int num_contexts, dsa_area *area,
-								 int max_levels);
-static void compute_contexts_count_and_ids(List *contexts, HTAB *context_id_lookup,
-										   int *stats_count,
-										   bool summary);
-static List *compute_context_path(MemoryContext c, HTAB *context_id_lookup);
-static void free_memorycontextstate_dsa(dsa_area *area, int total_stats,
-										dsa_pointer prev_dsa_pointer);
-static void end_memorycontext_reporting(void);
 
 /*
  * You should not do memory allocations within a critical section, because
@@ -862,19 +831,11 @@ MemoryContextStatsDetail(MemoryContext context,
 						 bool print_to_stderr)
 {
 	MemoryContextCounters grand_totals;
-	int			num_contexts;
-	PrintDestination print_location;
 
 	memset(&grand_totals, 0, sizeof(grand_totals));
 
-	if (print_to_stderr)
-		print_location = PRINT_STATS_TO_STDERR;
-	else
-		print_location = PRINT_STATS_TO_LOGS;
-
-	/* num_contexts report number of contexts aggregated in the output */
 	MemoryContextStatsInternal(context, 1, max_level, max_children,
-							   &grand_totals, print_location, &num_contexts);
+							   &grand_totals, print_to_stderr);
 
 	if (print_to_stderr)
 		fprintf(stderr,
@@ -909,14 +870,13 @@ MemoryContextStatsDetail(MemoryContext context,
  *		One recursion level for MemoryContextStats
  *
  * Print stats for this context if possible, but in any case accumulate counts
- * into *totals (if not NULL). The callers should make sure that print_location
- * is set to PRINT_STATS_TO_STDERR or PRINT_STATS_TO_LOGS or PRINT_STATS_NONE.
+ * into *totals (if not NULL).
  */
 static void
 MemoryContextStatsInternal(MemoryContext context, int level,
 						   int max_level, int max_children,
 						   MemoryContextCounters *totals,
-						   PrintDestination print_location, int *num_contexts)
+						   bool print_to_stderr)
 {
 	MemoryContext child;
 	int			ichild;
@@ -924,39 +884,10 @@ MemoryContextStatsInternal(MemoryContext context, int level,
 	Assert(MemoryContextIsValid(context));
 
 	/* Examine the context itself */
-	switch (print_location)
-	{
-		case PRINT_STATS_TO_STDERR:
-			context->methods->stats(context,
-									MemoryContextStatsPrint,
-									&level,
-									totals, true);
-			break;
-
-		case PRINT_STATS_TO_LOGS:
-			context->methods->stats(context,
-									MemoryContextStatsPrint,
-									&level,
-									totals, false);
-			break;
-
-		case PRINT_STATS_NONE:
-
-			/*
-			 * Do not print the statistics if print_location is
-			 * PRINT_STATS_NONE, only compute totals. This is used in
-			 * reporting of memory context statistics via a sql function. Last
-			 * parameter is not relevant.
-			 */
-			context->methods->stats(context,
-									NULL,
-									NULL,
-									totals, false);
-			break;
-	}
-
-	/* Increment the context count for each of the recursive call */
-	*num_contexts = *num_contexts + 1;
+	context->methods->stats(context,
+							MemoryContextStatsPrint,
+							&level,
+							totals, print_to_stderr);
 
 	/*
 	 * Examine children.
@@ -976,7 +907,7 @@ MemoryContextStatsInternal(MemoryContext context, int level,
 			MemoryContextStatsInternal(child, level + 1,
 									   max_level, max_children,
 									   totals,
-									   print_location, num_contexts);
+									   print_to_stderr);
 		}
 	}
 
@@ -995,13 +926,7 @@ MemoryContextStatsInternal(MemoryContext context, int level,
 			child = MemoryContextTraverseNext(child, context);
 		}
 
-		/*
-		 * Add the count of children contexts which are traversed in the
-		 * non-recursive manner.
-		 */
-		*num_contexts = *num_contexts + ichild;
-
-		if (print_location == PRINT_STATS_TO_STDERR)
+		if (print_to_stderr)
 		{
 			for (int i = 0; i < level; i++)
 				fprintf(stderr, "  ");
@@ -1014,7 +939,7 @@ MemoryContextStatsInternal(MemoryContext context, int level,
 					local_totals.freechunks,
 					local_totals.totalspace - local_totals.freespace);
 		}
-		else if (print_location == PRINT_STATS_TO_LOGS)
+		else
 			ereport(LOG_SERVER_ONLY,
 					(errhidestmt(true),
 					 errhidecontext(true),
@@ -1356,22 +1281,6 @@ HandleLogMemoryContextInterrupt(void)
 }
 
 /*
- * HandleGetMemoryContextInterrupt
- *		Handle receipt of an interrupt indicating a request to publish memory
- *		contexts statistics.
- *
- * All the actual work is deferred to ProcessGetMemoryContextInterrupt() as
- * this cannot be performed in a signal handler.
- */
-void
-HandleGetMemoryContextInterrupt(void)
-{
-	InterruptPending = true;
-	PublishMemoryContextPending = true;
-	/* latch will be set by procsignal_sigusr1_handler */
-}
-
-/*
  * ProcessLogMemoryContextInterrupt
  * 		Perform logging of memory contexts of this backend process.
  *
@@ -1408,539 +1317,6 @@ ProcessLogMemoryContextInterrupt(void)
 	MemoryContextStatsDetail(TopMemoryContext, 100, 100, false);
 }
 
-/*
- * ProcessGetMemoryContextInterrupt
- *		Generate information about memory contexts used by the process.
- *
- * Performs a breadth first search on the memory context tree, thus parents
- * statistics are reported before their children in the monitoring function
- * output.
- *
- * Statistics for all the processes are shared via the same dynamic shared
- * area.  Statistics written by each process are tracked independently in
- * per-process DSA pointers. These pointers are stored in static shared memory.
- *
- * We calculate maximum number of context's statistics that can be displayed
- * using a pre-determined limit for memory available per process for this
- * utility maximum size of statistics for each context.  The remaining context
- * statistics if any are captured as a cumulative total at the end of
- * individual context's statistics.
- *
- * If summary is true, we capture the level 1 and level 2 contexts
- * statistics.  For that we traverse the memory context tree recursively in
- * depth first search manner to cover all the children of a parent context, to
- * be able to display a cumulative total of memory consumption by a parent at
- * level 2 and all its children.
- */
-void
-ProcessGetMemoryContextInterrupt(void)
-{
-	List	   *contexts;
-	HASHCTL		ctl;
-	HTAB	   *context_id_lookup;
-	int			context_id = 0;
-	MemoryStatsEntry *meminfo;
-	bool		summary = false;
-	int			max_stats;
-	int			idx = MyProcNumber;
-	int			stats_count = 0;
-	int			stats_num = 0;
-	MemoryContextCounters stat;
-	int			num_individual_stats = 0;
-
-	PublishMemoryContextPending = false;
-
-	/*
-	 * The hash table is used for constructing "path" column of the view,
-	 * similar to its local backend counterpart.
-	 */
-	ctl.keysize = sizeof(MemoryContext);
-	ctl.entrysize = sizeof(MemoryStatsContextId);
-	ctl.hcxt = CurrentMemoryContext;
-
-	context_id_lookup = hash_create("pg_get_remote_backend_memory_contexts",
-									256,
-									&ctl,
-									HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
-
-	/* List of contexts to process in the next round - start at the top. */
-	contexts = list_make1(TopMemoryContext);
-
-	/* Compute the number of stats that can fit in the defined limit */
-	max_stats =
-		MEMORY_CONTEXT_REPORT_MAX_PER_BACKEND / MAX_MEMORY_CONTEXT_STATS_SIZE;
-	LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE);
-	summary = memCxtState[idx].summary;
-	LWLockRelease(&memCxtState[idx].lw_lock);
-
-	/*
-	 * Traverse the memory context tree to find total number of contexts. If
-	 * summary is requested report the total number of contexts at level 1 and
-	 * 2 from the top. Also, populate the hash table of context ids.
-	 */
-	compute_contexts_count_and_ids(contexts, context_id_lookup, &stats_count,
-								   summary);
-
-	/*
-	 * Allocate memory in this process's DSA for storing statistics of the
-	 * memory contexts upto max_stats, for contexts that don't fit within a
-	 * limit, a cumulative total is written as the last record in the DSA
-	 * segment.
-	 */
-	stats_num = Min(stats_count, max_stats);
-
-	LWLockAcquire(&memCxtArea->lw_lock, LW_EXCLUSIVE);
-
-	/*
-	 * Create a DSA and send handle to the client process after storing the
-	 * context statistics. If number of contexts exceed a predefined limit
-	 * (1MB), a cumulative total is stored for such contexts.
-	 */
-	if (memCxtArea->memstats_dsa_handle == DSA_HANDLE_INVALID)
-	{
-		MemoryContext oldcontext = CurrentMemoryContext;
-		dsa_handle	handle;
-
-		MemoryContextSwitchTo(TopMemoryContext);
-
-		MemoryStatsDsaArea = dsa_create(memCxtArea->lw_lock.tranche);
-
-		handle = dsa_get_handle(MemoryStatsDsaArea);
-		MemoryContextSwitchTo(oldcontext);
-
-		dsa_pin_mapping(MemoryStatsDsaArea);
-
-		/*
-		 * Pin the DSA area, this is to make sure the area remains attachable
-		 * even if the backend that created it exits. This is done so that the
-		 * statistics are published even if the process exits while a client
-		 * is waiting. Also, other processes that publish statistics will use
-		 * the same area.
-		 */
-		dsa_pin(MemoryStatsDsaArea);
-
-		/* Set the handle in shared memory */
-		memCxtArea->memstats_dsa_handle = handle;
-	}
-
-	/*
-	 * If DSA exists, created by another process publishing statistics, attach
-	 * to it.
-	 */
-	else if (MemoryStatsDsaArea == NULL)
-	{
-		MemoryContext oldcontext = CurrentMemoryContext;
-
-		MemoryContextSwitchTo(TopMemoryContext);
-		MemoryStatsDsaArea = dsa_attach(memCxtArea->memstats_dsa_handle);
-		MemoryContextSwitchTo(oldcontext);
-		dsa_pin_mapping(MemoryStatsDsaArea);
-	}
-	LWLockRelease(&memCxtArea->lw_lock);
-
-	/*
-	 * Hold the process lock to protect writes to process specific memory. Two
-	 * processes publishing statistics do not block each other.
-	 */
-	LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE);
-	memCxtState[idx].proc_id = MyProcPid;
-
-	if (DsaPointerIsValid(memCxtState[idx].memstats_dsa_pointer))
-	{
-		/*
-		 * Free any previous allocations, free the name, ident and path
-		 * pointers before freeing the pointer that contains them.
-		 */
-		free_memorycontextstate_dsa(MemoryStatsDsaArea, memCxtState[idx].total_stats,
-									memCxtState[idx].memstats_dsa_pointer);
-	}
-
-	/*
-	 * Assigning total stats before allocating memory so that memory cleanup
-	 * can run if any subsequent dsa_allocate call to allocate name/ident/path
-	 * fails.
-	 */
-	memCxtState[idx].total_stats = stats_num;
-	memCxtState[idx].memstats_dsa_pointer =
-		dsa_allocate0(MemoryStatsDsaArea, stats_num * sizeof(MemoryStatsEntry));
-
-	meminfo = (MemoryStatsEntry *)
-		dsa_get_address(MemoryStatsDsaArea, memCxtState[idx].memstats_dsa_pointer);
-
-	if (summary)
-	{
-		int			cxt_id = 0;
-		List	   *path = NIL;
-
-		/* Copy TopMemoryContext statistics to DSA */
-		memset(&stat, 0, sizeof(stat));
-		(*TopMemoryContext->methods->stats) (TopMemoryContext, NULL, NULL,
-											 &stat, true);
-		path = lcons_int(1, path);
-		PublishMemoryContext(meminfo, cxt_id, TopMemoryContext, path, stat,
-							 1, MemoryStatsDsaArea, 100);
-		cxt_id = cxt_id + 1;
-
-		/*
-		 * Copy statistics for each of TopMemoryContexts children.  This
-		 * includes statistics of at most 100 children per node, with each
-		 * child node limited to a depth of 100 in its subtree.
-		 */
-		for (MemoryContext c = TopMemoryContext->firstchild; c != NULL;
-			 c = c->nextchild)
-		{
-			MemoryContextCounters grand_totals;
-			int			num_contexts = 0;
-
-			path = NIL;
-			memset(&grand_totals, 0, sizeof(grand_totals));
-
-			MemoryContextStatsInternal(c, 1, 100, 100, &grand_totals,
-									   PRINT_STATS_NONE, &num_contexts);
-
-			path = compute_context_path(c, context_id_lookup);
-
-			/*
-			 * Register the stats entry first, that way the cleanup handler
-			 * can reach it in case of allocation failures of one or more
-			 * members.
-			 */
-			memCxtState[idx].total_stats = cxt_id++;
-			PublishMemoryContext(meminfo, cxt_id, c, path,
-								 grand_totals, num_contexts, MemoryStatsDsaArea, 100);
-		}
-		memCxtState[idx].total_stats = cxt_id;
-
-		/* Notify waiting backends and return */
-		end_memorycontext_reporting();
-
-		hash_destroy(context_id_lookup);
-
-		return;
-	}
-
-	foreach_ptr(MemoryContextData, cur, contexts)
-	{
-		List	   *path = NIL;
-
-		/*
-		 * Figure out the transient context_id of this context and each of its
-		 * ancestors, to compute a path for this context.
-		 */
-		path = compute_context_path(cur, context_id_lookup);
-
-		/* Examine the context stats */
-		memset(&stat, 0, sizeof(stat));
-		(*cur->methods->stats) (cur, NULL, NULL, &stat, true);
-
-		/* Account for saving one statistics slot for cumulative reporting */
-		if (context_id < (max_stats - 1) || stats_count <= max_stats)
-		{
-			/* Copy statistics to DSA memory */
-			PublishMemoryContext(meminfo, context_id, cur, path, stat, 1, MemoryStatsDsaArea, 100);
-		}
-		else
-		{
-			meminfo[max_stats - 1].totalspace += stat.totalspace;
-			meminfo[max_stats - 1].nblocks += stat.nblocks;
-			meminfo[max_stats - 1].freespace += stat.freespace;
-			meminfo[max_stats - 1].freechunks += stat.freechunks;
-		}
-
-		/*
-		 * DSA max limit per process is reached, write aggregate of the
-		 * remaining statistics.
-		 *
-		 * We can store contexts from 0 to max_stats - 1. When stats_count is
-		 * greater than max_stats, we stop reporting individual statistics
-		 * when context_id equals max_stats - 2. As we use max_stats - 1 array
-		 * slot for reporting cumulative statistics or "Remaining Totals".
-		 */
-		if (stats_count > max_stats && context_id == (max_stats - 2))
-		{
-			char	   *nameptr;
-			int			namelen = strlen("Remaining Totals");
-
-			num_individual_stats = context_id + 1;
-			meminfo[max_stats - 1].name = dsa_allocate(MemoryStatsDsaArea, namelen + 1);
-			nameptr = dsa_get_address(MemoryStatsDsaArea, meminfo[max_stats - 1].name);
-			strlcpy(nameptr, "Remaining Totals", namelen + 1);
-			meminfo[max_stats - 1].ident = InvalidDsaPointer;
-			meminfo[max_stats - 1].path = InvalidDsaPointer;
-			meminfo[max_stats - 1].type = 0;
-		}
-		context_id++;
-	}
-
-	/*
-	 * Statistics are not aggregated, i.e individual statistics reported when
-	 * stats_count <= max_stats.
-	 */
-	if (stats_count <= max_stats)
-	{
-		memCxtState[idx].total_stats = context_id;
-	}
-	/* Report number of aggregated memory contexts */
-	else
-	{
-		meminfo[max_stats - 1].num_agg_stats = context_id -
-			num_individual_stats;
-
-		/*
-		 * Total stats equals num_individual_stats + 1 record for cumulative
-		 * statistics.
-		 */
-		memCxtState[idx].total_stats = num_individual_stats + 1;
-	}
-
-	/* Notify waiting backends and return */
-	end_memorycontext_reporting();
-
-	hash_destroy(context_id_lookup);
-}
-
-/*
- * Update timestamp and signal all the waiting client backends after copying
- * all the statistics.
- */
-static void
-end_memorycontext_reporting(void)
-{
-	memCxtState[MyProcNumber].stats_timestamp = GetCurrentTimestamp();
-	LWLockRelease(&memCxtState[MyProcNumber].lw_lock);
-	ConditionVariableBroadcast(&memCxtState[MyProcNumber].memcxt_cv);
-}
-
-/*
- * compute_context_path
- *
- * Append the transient context_id of this context and each of its ancestors
- * to a list, in order to compute a path.
- */
-static List *
-compute_context_path(MemoryContext c, HTAB *context_id_lookup)
-{
-	bool		found;
-	List	   *path = NIL;
-	MemoryContext cur_context;
-
-	for (cur_context = c; cur_context != NULL; cur_context = cur_context->parent)
-	{
-		MemoryStatsContextId *cur_entry;
-
-		cur_entry = hash_search(context_id_lookup, &cur_context, HASH_FIND, &found);
-
-		if (!found)
-			elog(ERROR, "hash table corrupted, can't construct path value");
-
-		path = lcons_int(cur_entry->context_id, path);
-	}
-
-	return path;
-}
-
-/*
- * Return the number of contexts allocated currently by the backend
- * Assign context ids to each of the contexts.
- */
-static void
-compute_contexts_count_and_ids(List *contexts, HTAB *context_id_lookup,
-							   int *stats_count, bool summary)
-{
-	foreach_ptr(MemoryContextData, cur, contexts)
-	{
-		MemoryStatsContextId *entry;
-		bool		found;
-
-		entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &cur,
-													 HASH_ENTER, &found);
-		Assert(!found);
-
-		/*
-		 * context id starts with 1 so increment the stats_count before
-		 * assigning.
-		 */
-		entry->context_id = ++(*stats_count);
-
-		/* Append the children of the current context to the main list. */
-		for (MemoryContext c = cur->firstchild; c != NULL; c = c->nextchild)
-		{
-			if (summary)
-			{
-				entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &c,
-															 HASH_ENTER, &found);
-				Assert(!found);
-
-				entry->context_id = ++(*stats_count);
-			}
-
-			contexts = lappend(contexts, c);
-		}
-
-		/*
-		 * In summary mode only the first two level (from top) contexts are
-		 * displayed.
-		 */
-		if (summary)
-			break;
-	}
-}
-
-/*
- * PublishMemoryContext
- *
- * Copy the memory context statistics of a single context to a DSA memory
- */
-static void
-PublishMemoryContext(MemoryStatsEntry *memcxt_info, int curr_id,
-					 MemoryContext context, List *path,
-					 MemoryContextCounters stat, int num_contexts,
-					 dsa_area *area, int max_levels)
-{
-	const char *ident = context->ident;
-	const char *name = context->name;
-	int		   *path_list;
-
-	/*
-	 * To be consistent with logging output, we label dynahash contexts with
-	 * just the hash table name as with MemoryContextStatsPrint().
-	 */
-	if (context->ident && strncmp(context->name, "dynahash", 8) == 0)
-	{
-		name = context->ident;
-		ident = NULL;
-	}
-
-	if (name != NULL)
-	{
-		int			namelen = strlen(name);
-		char	   *nameptr;
-
-		if (strlen(name) >= MEMORY_CONTEXT_IDENT_SHMEM_SIZE)
-			namelen = pg_mbcliplen(name, namelen,
-								   MEMORY_CONTEXT_IDENT_SHMEM_SIZE - 1);
-
-		memcxt_info[curr_id].name = dsa_allocate(area, namelen + 1);
-		nameptr = (char *) dsa_get_address(area, memcxt_info[curr_id].name);
-		strlcpy(nameptr, name, namelen + 1);
-	}
-	else
-		memcxt_info[curr_id].name = InvalidDsaPointer;
-
-	/* Trim and copy the identifier if it is not set to NULL */
-	if (ident != NULL)
-	{
-		int			idlen = strlen(context->ident);
-		char	   *identptr;
-
-		/*
-		 * Some identifiers such as SQL query string can be very long,
-		 * truncate oversize identifiers.
-		 */
-		if (idlen >= MEMORY_CONTEXT_IDENT_SHMEM_SIZE)
-			idlen = pg_mbcliplen(ident, idlen,
-								 MEMORY_CONTEXT_IDENT_SHMEM_SIZE - 1);
-
-		memcxt_info[curr_id].ident = dsa_allocate(area, idlen + 1);
-		identptr = (char *) dsa_get_address(area, memcxt_info[curr_id].ident);
-		strlcpy(identptr, ident, idlen + 1);
-	}
-	else
-		memcxt_info[curr_id].ident = InvalidDsaPointer;
-
-	/* Allocate DSA memory for storing path information */
-	if (path == NIL)
-		memcxt_info[curr_id].path = InvalidDsaPointer;
-	else
-	{
-		int			levels = Min(list_length(path), max_levels);
-
-		memcxt_info[curr_id].path_length = levels;
-		memcxt_info[curr_id].path = dsa_allocate0(area, levels * sizeof(int));
-		memcxt_info[curr_id].levels = list_length(path);
-		path_list = (int *) dsa_get_address(area, memcxt_info[curr_id].path);
-
-		foreach_int(i, path)
-		{
-			path_list[foreach_current_index(i)] = i;
-			if (--levels == 0)
-				break;
-		}
-	}
-	memcxt_info[curr_id].type = context->type;
-	memcxt_info[curr_id].totalspace = stat.totalspace;
-	memcxt_info[curr_id].nblocks = stat.nblocks;
-	memcxt_info[curr_id].freespace = stat.freespace;
-	memcxt_info[curr_id].freechunks = stat.freechunks;
-	memcxt_info[curr_id].num_agg_stats = num_contexts;
-}
-
-/*
- * free_memorycontextstate_dsa
- *
- * Worker for freeing resources from a MemoryStatsEntry.  Callers are
- * responsible for ensuring that the DSA pointer is valid.
- */
-static void
-free_memorycontextstate_dsa(dsa_area *area, int total_stats,
-							dsa_pointer prev_dsa_pointer)
-{
-	MemoryStatsEntry *meminfo;
-
-	meminfo = (MemoryStatsEntry *) dsa_get_address(area, prev_dsa_pointer);
-	Assert(meminfo != NULL);
-	for (int i = 0; i < total_stats; i++)
-	{
-		if (DsaPointerIsValid(meminfo[i].name))
-			dsa_free(area, meminfo[i].name);
-
-		if (DsaPointerIsValid(meminfo[i].ident))
-			dsa_free(area, meminfo[i].ident);
-
-		if (DsaPointerIsValid(meminfo[i].path))
-			dsa_free(area, meminfo[i].path);
-	}
-
-	dsa_free(area, memCxtState[MyProcNumber].memstats_dsa_pointer);
-	memCxtState[MyProcNumber].memstats_dsa_pointer = InvalidDsaPointer;
-}
-
-/*
- * Free the memory context statistics stored by this process
- * in DSA area.
- */
-void
-AtProcExit_memstats_cleanup(int code, Datum arg)
-{
-	int			idx = MyProcNumber;
-
-	if (memCxtArea->memstats_dsa_handle == DSA_HANDLE_INVALID)
-		return;
-
-	LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE);
-
-	if (!DsaPointerIsValid(memCxtState[idx].memstats_dsa_pointer))
-	{
-		LWLockRelease(&memCxtState[idx].lw_lock);
-		return;
-	}
-
-	/* If the dsa mapping could not be found, attach to the area */
-	if (MemoryStatsDsaArea == NULL)
-		MemoryStatsDsaArea = dsa_attach(memCxtArea->memstats_dsa_handle);
-
-	/*
-	 * Free the memory context statistics, free the name, ident and path
-	 * pointers before freeing the pointer that contains these pointers and
-	 * integer statistics.
-	 */
-	free_memorycontextstate_dsa(MemoryStatsDsaArea, memCxtState[idx].total_stats,
-								memCxtState[idx].memstats_dsa_pointer);
-
-	dsa_detach(MemoryStatsDsaArea);
-	LWLockRelease(&memCxtState[idx].lw_lock);
-}
-
 void *
 palloc(Size size)
 {