45 files changed, 640 insertions, 241 deletions
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 01e1db7f856..4204088fa0d 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -68,7 +68,7 @@ typedef struct BrinShared
 	int			scantuplesortstates;
 
 	/* Query ID, for report in worker processes */
-	uint64		queryid;
+	int64		queryid;
 
 	/*
 	 * workersdonecv is used to monitor the progress of workers.  All parallel
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 46c1dce222d..50747c16396 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -1243,8 +1243,9 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace,
 		}
 		else
 		{
-			text	   *t;
+			const char *name;
 			const char *value;
+			text	   *t;
 			Size		len;
 
 			/*
@@ -1291,11 +1292,19 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace,
 			 * have just "name", assume "name=true" is meant.  Note: the
 			 * namespace is not output.
 			 */
+			name = def->defname;
 			if (def->arg != NULL)
 				value = defGetString(def);
 			else
 				value = "true";
 
+			/* Insist that name not contain "=", else "a=b=c" is ambiguous */
+			if (strchr(name, '=') != NULL)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("invalid option name \"%s\": must not contain \"=\"",
+								name)));
+
 			/*
 			 * This is not a great place for this test, but there's no other
 			 * convenient place to filter the option out. As WITH (oids =
@@ -1303,7 +1312,7 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace,
 			 * amount of ugly.
 			 */
 			if (acceptOidsOff && def->defnamespace == NULL &&
-				strcmp(def->defname, "oids") == 0)
+				strcmp(name, "oids") == 0)
 			{
 				if (defGetBoolean(def))
 					ereport(ERROR,
@@ -1313,11 +1322,11 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace,
 				continue;
 			}
 
-			len = VARHDRSZ + strlen(def->defname) + 1 + strlen(value);
+			len = VARHDRSZ + strlen(name) + 1 + strlen(value);
 			/* +1 leaves room for sprintf's trailing null */
 			t = (text *) palloc(len + 1);
 			SET_VARSIZE(t, len);
-			sprintf(VARDATA(t), "%s=%s", def->defname, value);
+			sprintf(VARDATA(t), "%s=%s", name, value);
 
 			astate = accumArrayResult(astate, PointerGetDatum(t),
 									  false, TEXTOID,
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index a6b701943d3..c0aa7d0222f 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -1058,11 +1058,11 @@ gistGetFakeLSN(Relation rel)
 }
 
 /*
- * This is a stratnum support function for GiST opclasses that use the
- * RT*StrategyNumber constants.
+ * This is a stratnum translation support function for GiST opclasses that use
+ * the RT*StrategyNumber constants.
  */
 Datum
-gist_stratnum_common(PG_FUNCTION_ARGS)
+gist_translate_cmptype_common(PG_FUNCTION_ARGS)
 {
 	CompareType cmptype = PG_GETARG_INT32(0);
 
@@ -1090,9 +1090,9 @@ gist_stratnum_common(PG_FUNCTION_ARGS)
 /*
  * Returns the opclass's private stratnum used for the given compare type.
  *
- * Calls the opclass's GIST_STRATNUM_PROC support function, if any,
- * and returns the result.
- * Returns InvalidStrategy if the function is not defined.
+ * Calls the opclass's GIST_TRANSLATE_CMPTYPE_PROC support function, if any,
+ * and returns the result.  Returns InvalidStrategy if the function is not
+ * defined.
  */
 StrategyNumber
 gisttranslatecmptype(CompareType cmptype, Oid opfamily)
@@ -1101,7 +1101,7 @@ gisttranslatecmptype(CompareType cmptype, Oid opfamily)
 	Datum		result;
 
 	/* Check whether the function is provided. */
-	funcid = get_opfamily_proc(opfamily, ANYOID, ANYOID, GIST_STRATNUM_PROC);
+	funcid = get_opfamily_proc(opfamily, ANYOID, ANYOID, GIST_TRANSLATE_CMPTYPE_PROC);
 	if (!OidIsValid(funcid))
 		return InvalidStrategy;
 
diff --git a/src/backend/access/gist/gistvalidate.c b/src/backend/access/gist/gistvalidate.c
index 2a49e6d20f0..2ed6f74fce9 100644
--- a/src/backend/access/gist/gistvalidate.c
+++ b/src/backend/access/gist/gistvalidate.c
@@ -138,7 +138,7 @@ gistvalidate(Oid opclassoid)
 				ok = check_amproc_signature(procform->amproc, VOIDOID, true,
 											1, 1, INTERNALOID);
 				break;
-			case GIST_STRATNUM_PROC:
+			case GIST_TRANSLATE_CMPTYPE_PROC:
 				ok = check_amproc_signature(procform->amproc, INT2OID, true,
 											1, 1, INT4OID) &&
 					procform->amproclefttype == ANYOID &&
@@ -265,7 +265,7 @@ gistvalidate(Oid opclassoid)
 		if (i == GIST_DISTANCE_PROC || i == GIST_FETCH_PROC ||
 			i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC ||
 			i == GIST_OPTIONS_PROC || i == GIST_SORTSUPPORT_PROC ||
-			i == GIST_STRATNUM_PROC)
+			i == GIST_TRANSLATE_CMPTYPE_PROC)
 			continue;			/* optional methods */
 		ereport(INFO,
 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
@@ -336,7 +336,7 @@ gistadjustmembers(Oid opfamilyoid,
 			case GIST_FETCH_PROC:
 			case GIST_OPTIONS_PROC:
 			case GIST_SORTSUPPORT_PROC:
-			case GIST_STRATNUM_PROC:
+			case GIST_TRANSLATE_CMPTYPE_PROC:
 				/* Optional, so force it to be a soft family dependency */
 				op->ref_is_hard = false;
 				op->ref_is_family = true;
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 9ec8cda1c68..0dcd6ee817e 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -213,6 +213,27 @@ static const int MultiXactStatusLock[MaxMultiXactStatus + 1] =
 #define TUPLOCK_from_mxstatus(status) \
 			(MultiXactStatusLock[(status)])
 
+/*
+ * Check that we have a valid snapshot if we might need TOAST access.
+ */
+static inline void
+AssertHasSnapshotForToast(Relation rel)
+{
+#ifdef USE_ASSERT_CHECKING
+
+	/* bootstrap mode in particular breaks this rule */
+	if (!IsNormalProcessingMode())
+		return;
+
+	/* if the relation doesn't have a TOAST table, we are good */
+	if (!OidIsValid(rel->rd_rel->reltoastrelid))
+		return;
+
+	Assert(HaveRegisteredOrActiveSnapshot());
+
+#endif							/* USE_ASSERT_CHECKING */
+}
+
 /* ----------------------------------------------------------------
  *						 heap support routines
  * ----------------------------------------------------------------
@@ -2066,6 +2087,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 	Assert(HeapTupleHeaderGetNatts(tup->t_data) <=
 		   RelationGetNumberOfAttributes(relation));
 
+	AssertHasSnapshotForToast(relation);
+
 	/*
 	 * Fill in tuple header fields and toast the tuple if necessary.
 	 *
@@ -2343,6 +2366,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
 	/* currently not needed (thus unsupported) for heap_multi_insert() */
 	Assert(!(options & HEAP_INSERT_NO_LOGICAL));
 
+	AssertHasSnapshotForToast(relation);
+
 	needwal = RelationNeedsWAL(relation);
 	saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
 												   HEAP_DEFAULT_FILLFACTOR);
@@ -2765,6 +2790,8 @@ heap_delete(Relation relation, ItemPointer tid,
 
 	Assert(ItemPointerIsValid(tid));
 
+	AssertHasSnapshotForToast(relation);
+
 	/*
 	 * Forbid this during a parallel operation, lest it allocate a combo CID.
 	 * Other workers might need that combo CID for visibility checks, and we
@@ -3260,6 +3287,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 	Assert(HeapTupleHeaderGetNatts(newtup->t_data) <=
 		   RelationGetNumberOfAttributes(relation));
 
+	AssertHasSnapshotForToast(relation);
+
 	/*
 	 * Forbid this during a parallel operation, lest it allocate a combo CID.
 	 * Other workers might need that combo CID for visibility checks, and we
@@ -4953,7 +4982,7 @@ l3:
 					case LockWaitError:
 						if (!ConditionalMultiXactIdWait((MultiXactId) xwait,
 														status, infomask, relation,
-														NULL, log_lock_failure))
+														NULL, log_lock_failures))
 							ereport(ERROR,
 									(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
 									 errmsg("could not obtain lock on row in relation \"%s\"",
@@ -4991,7 +5020,7 @@ l3:
 						}
 						break;
 					case LockWaitError:
-						if (!ConditionalXactLockTableWait(xwait, log_lock_failure))
+						if (!ConditionalXactLockTableWait(xwait, log_lock_failures))
 							ereport(ERROR,
 									(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
 									 errmsg("could not obtain lock on row in relation \"%s\"",
@@ -5256,7 +5285,7 @@ heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode,
 			break;
 
 		case LockWaitError:
-			if (!ConditionalLockTupleTuplock(relation, tid, mode, log_lock_failure))
+			if (!ConditionalLockTupleTuplock(relation, tid, mode, log_lock_failures))
 				ereport(ERROR,
 						(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
 						 errmsg("could not obtain lock on row in relation \"%s\"",
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index ac082fefa77..cb4bc35c93e 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -464,7 +464,7 @@ tuple_lock_retry:
 									return TM_WouldBlock;
 								break;
 							case LockWaitError:
-								if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, log_lock_failure))
+								if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, log_lock_failures))
 									ereport(ERROR,
 											(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
 											 errmsg("could not obtain lock on row in relation \"%s\"",
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 708674d8fcf..09416450af9 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -757,7 +757,6 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	vacrel->vm_new_visible_pages = 0;
 	vacrel->vm_new_visible_frozen_pages = 0;
 	vacrel->vm_new_frozen_pages = 0;
-	vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
 
 	/*
 	 * Get cutoffs that determine which deleted tuples are considered DEAD,
@@ -776,7 +775,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	 * to increase the number of dead tuples it can prune away.)
 	 */
 	vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
+	vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
 	vacrel->vistest = GlobalVisTestFor(rel);
+
 	/* Initialize state used to track oldest extant XID/MXID */
 	vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
 	vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 765659887af..03a1d7b027a 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -228,6 +228,8 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
 	BTScanOpaque so = (BTScanOpaque) scan->opaque;
 	bool		res;
 
+	Assert(scan->heapRelation != NULL);
+
 	/* btree indexes are never lossy */
 	scan->xs_recheck = false;
 
@@ -289,6 +291,8 @@ btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
 	int64		ntids = 0;
 	ItemPointer heapTid;
 
+	Assert(scan->heapRelation == NULL);
+
 	/* Each loop iteration performs another primitive index scan */
 	do
 	{
@@ -393,6 +397,32 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
 		BTScanPosInvalidate(so->currPos);
 	}
 
+	/*
+	 * We prefer to eagerly drop leaf page pins before btgettuple returns.
+	 * This avoids making VACUUM wait to acquire a cleanup lock on the page.
+	 *
+	 * We cannot safely drop leaf page pins during index-only scans due to a
+	 * race condition involving VACUUM setting pages all-visible in the VM.
+	 * It's also unsafe for plain index scans that use a non-MVCC snapshot.
+	 *
+	 * When we drop pins eagerly, the mechanism that marks so->killedItems[]
+	 * index tuples LP_DEAD has to deal with concurrent TID recycling races.
+	 * The scheme used to detect unsafe TID recycling won't work when scanning
+	 * unlogged relations (since it involves saving an affected page's LSN).
+	 * Opt out of eager pin dropping during unlogged relation scans for now
+	 * (this is preferable to opting out of kill_prior_tuple LP_DEAD setting).
+	 *
+	 * Also opt out of dropping leaf page pins eagerly during bitmap scans.
+	 * Pins cannot be held for more than an instant during bitmap scans either
+	 * way, so we might as well avoid wasting cycles on acquiring page LSNs.
+	 *
+	 * See nbtree/README section on making concurrent TID recycling safe.
+	 */
+	so->dropPin = (!scan->xs_want_itup &&
+				   IsMVCCSnapshot(scan->xs_snapshot) &&
+				   RelationNeedsWAL(scan->indexRelation) &&
+				   scan->heapRelation != NULL);
+
 	so->markItemIndex = -1;
 	so->needPrimScan = false;
 	so->scanBehind = false;
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index fe9a3886913..070f14c8b91 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -25,7 +25,7 @@
 #include "utils/rel.h"
 
 
-static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp);
+static inline void _bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so);
 static Buffer _bt_moveright(Relation rel, Relation heaprel, BTScanInsert key,
 							Buffer buf, bool forupdate, BTStack stack,
 							int access);
@@ -57,24 +57,29 @@ static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
 /*
  *	_bt_drop_lock_and_maybe_pin()
  *
- * Unlock the buffer; and if it is safe to release the pin, do that, too.
- * This will prevent vacuum from stalling in a blocked state trying to read a
- * page when a cursor is sitting on it.
- *
- * See nbtree/README section on making concurrent TID recycling safe.
+ * Unlock so->currPos.buf.  If scan is so->dropPin, drop the pin, too.
+ * Dropping the pin prevents VACUUM from blocking on acquiring a cleanup lock.
  */
-static void
-_bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp)
+static inline void
+_bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so)
 {
-	_bt_unlockbuf(scan->indexRelation, sp->buf);
-
-	if (IsMVCCSnapshot(scan->xs_snapshot) &&
-		RelationNeedsWAL(scan->indexRelation) &&
-		!scan->xs_want_itup)
+	if (!so->dropPin)
 	{
-		ReleaseBuffer(sp->buf);
-		sp->buf = InvalidBuffer;
+		/* Just drop the lock (not the pin) */
+		_bt_unlockbuf(rel, so->currPos.buf);
+		return;
 	}
+
+	/*
+	 * Drop both the lock and the pin.
+	 *
+	 * Have to set so->currPos.lsn so that _bt_killitems has a way to detect
+	 * when concurrent heap TID recycling by VACUUM might have taken place.
+	 */
+	Assert(RelationNeedsWAL(rel));
+	so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf);
+	_bt_relbuf(rel, so->currPos.buf);
+	so->currPos.buf = InvalidBuffer;
 }
 
 /*
@@ -866,8 +871,8 @@ _bt_compare(Relation rel,
  *		if backwards scan, the last item) in the tree that satisfies the
  *		qualifications in the scan key.  On success exit, data about the
  *		matching tuple(s) on the page has been loaded into so->currPos.  We'll
- *		drop all locks and hold onto a pin on page's buffer, except when
- *		_bt_drop_lock_and_maybe_pin dropped the pin to avoid blocking VACUUM.
+ *		drop all locks and hold onto a pin on page's buffer, except during
+ *		so->dropPin scans, when we drop both the lock and the pin.
  *		_bt_returnitem sets the next item to return to scan on success exit.
  *
  * If there are no matching items in the index, we return false, with no
@@ -1610,7 +1615,13 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
 	so->currPos.currPage = BufferGetBlockNumber(so->currPos.buf);
 	so->currPos.prevPage = opaque->btpo_prev;
 	so->currPos.nextPage = opaque->btpo_next;
+	/* delay setting so->currPos.lsn until _bt_drop_lock_and_maybe_pin */
+	so->currPos.dir = dir;
+	so->currPos.nextTupleOffset = 0;
 
+	/* either moreRight or moreLeft should be set now (may be unset later) */
+	Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight :
+		   so->currPos.moreLeft);
 	Assert(!P_IGNORE(opaque));
 	Assert(BTScanPosIsPinned(so->currPos));
 	Assert(!so->needPrimScan);
@@ -1626,14 +1637,6 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
 								 so->currPos.currPage);
 	}
 
-	/* initialize remaining currPos fields related to current page */
-	so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf);
-	so->currPos.dir = dir;
-	so->currPos.nextTupleOffset = 0;
-	/* either moreLeft or moreRight should be set now (may be unset later) */
-	Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight :
-		   so->currPos.moreLeft);
-
 	PredicateLockPage(rel, so->currPos.currPage, scan->xs_snapshot);
 
 	/* initialize local variables */
@@ -2107,10 +2110,9 @@ _bt_returnitem(IndexScanDesc scan, BTScanOpaque so)
  *
  * Wrapper on _bt_readnextpage that performs final steps for the current page.
  *
- * On entry, if so->currPos.buf is valid the buffer is pinned but not locked.
- * If there's no pin held, it's because _bt_drop_lock_and_maybe_pin dropped
- * the pin eagerly earlier on.  The scan must have so->currPos.currPage set to
- * a valid block, in any case.
+ * On entry, so->currPos must be valid.  Its buffer will be pinned, though
+ * never locked. (Actually, when so->dropPin there won't even be a pin held,
+ * though so->currPos.currPage must still be set to a valid block number.)
  */
 static bool
 _bt_steppage(IndexScanDesc scan, ScanDirection dir)
@@ -2251,12 +2253,14 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
 	 */
 	if (_bt_readpage(scan, dir, offnum, true))
 	{
+		Relation	rel = scan->indexRelation;
+
 		/*
 		 * _bt_readpage succeeded.  Drop the lock (and maybe the pin) on
 		 * so->currPos.buf in preparation for btgettuple returning tuples.
 		 */
 		Assert(BTScanPosIsPinned(so->currPos));
-		_bt_drop_lock_and_maybe_pin(scan, &so->currPos);
+		_bt_drop_lock_and_maybe_pin(rel, so);
 		return true;
 	}
 
@@ -2294,8 +2298,8 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
  *
  * On success exit, so->currPos is updated to contain data from the next
  * interesting page, and we return true.  We hold a pin on the buffer on
- * success exit, except when _bt_drop_lock_and_maybe_pin decided it was safe
- * to eagerly drop the pin (to avoid blocking VACUUM).
+ * success exit (except during so->dropPin index scans, when we drop the pin
+ * eagerly to avoid blocking VACUUM).
  *
  * If there are no more matching records in the given direction, we drop all
  * locks and pins, invalidate so->currPos, and return false.
@@ -2413,7 +2417,7 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno,
 	 */
 	Assert(so->currPos.currPage == blkno);
 	Assert(BTScanPosIsPinned(so->currPos));
-	_bt_drop_lock_and_maybe_pin(scan, &so->currPos);
+	_bt_drop_lock_and_maybe_pin(rel, so);
 
 	return true;
 }
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 3794cc924ad..9d70e89c1f3 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -105,7 +105,7 @@ typedef struct BTShared
 	int			scantuplesortstates;
 
 	/* Query ID, for report in worker processes */
-	uint64		queryid;
+	int64		queryid;
 
 	/*
 	 * workersdonecv is used to monitor the progress of workers.  All parallel
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index 1a15dfcb7d3..29f0dca1b08 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -63,7 +63,7 @@ static bool _bt_check_compare(IndexScanDesc scan, ScanDirection dir,
 							  bool *continuescan, int *ikey);
 static bool _bt_check_rowcompare(ScanKey skey,
 								 IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
-								 ScanDirection dir, bool forcenonrequired, bool *continuescan);
+								 ScanDirection dir, bool *continuescan);
 static void _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate,
 									 int tupnatts, TupleDesc tupdesc);
 static int	_bt_keep_natts(Relation rel, IndexTuple lastleft,
@@ -2902,8 +2902,10 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir,
 		/* row-comparison keys need special processing */
 		if (key->sk_flags & SK_ROW_HEADER)
 		{
+			Assert(!forcenonrequired);	/* forbidden by _bt_set_startikey */
+
 			if (_bt_check_rowcompare(key, tuple, tupnatts, tupdesc, dir,
-									 forcenonrequired, continuescan))
+									 continuescan))
 				continue;
 			return false;
 		}
@@ -3060,8 +3062,7 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir,
  */
 static bool
 _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
-					 TupleDesc tupdesc, ScanDirection dir,
-					 bool forcenonrequired, bool *continuescan)
+					 TupleDesc tupdesc, ScanDirection dir, bool *continuescan)
 {
 	ScanKey		subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
 	int32		cmpresult = 0;
@@ -3101,11 +3102,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 
 		if (isNull)
 		{
-			if (forcenonrequired)
-			{
-				/* treating scan's keys as non-required */
-			}
-			else if (subkey->sk_flags & SK_BT_NULLS_FIRST)
+			if (subkey->sk_flags & SK_BT_NULLS_FIRST)
 			{
 				/*
 				 * Since NULLs are sorted before non-NULLs, we know we have
@@ -3159,12 +3156,8 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 			 */
 			Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument));
 			subkey--;
-			if (forcenonrequired)
-			{
-				/* treating scan's keys as non-required */
-			}
-			else if ((subkey->sk_flags & SK_BT_REQFWD) &&
-					 ScanDirectionIsForward(dir))
+			if ((subkey->sk_flags & SK_BT_REQFWD) &&
+				ScanDirectionIsForward(dir))
 				*continuescan = false;
 			else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
 					 ScanDirectionIsBackward(dir))
@@ -3216,7 +3209,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
 			break;
 	}
 
-	if (!result && !forcenonrequired)
+	if (!result)
 	{
 		/*
 		 * Tuple fails this qual.  If it's a required qual for the current
@@ -3342,75 +3335,71 @@ _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate,
  *
  * Note that if we hold a pin on the target page continuously from initially
  * reading the items until applying this function, VACUUM cannot have deleted
- * any items from the page, and so there is no need to search left from the
- * recorded offset.  (This observation also guarantees that the item is still
- * the right one to delete, which might otherwise be questionable since heap
- * TIDs can get recycled.)	This holds true even if the page has been modified
- * by inserts and page splits, so there is no need to consult the LSN.
- *
- * If the pin was released after reading the page, then we re-read it.  If it
- * has been modified since we read it (as determined by the LSN), we dare not
- * flag any entries because it is possible that the old entry was vacuumed
- * away and the TID was re-used by a completely different heap tuple.
+ * any items on the page, so the page's TIDs can't have been recycled by now.
+ * There's no risk that we'll confuse a new index tuple that happens to use a
+ * recycled TID with a now-removed tuple with the same TID (that used to be on
+ * this same page).  We can't rely on that during scans that drop pins eagerly
+ * (so->dropPin scans), though, so we must condition setting LP_DEAD bits on
+ * the page LSN having not changed since back when _bt_readpage saw the page.
  */
 void
 _bt_killitems(IndexScanDesc scan)
 {
+	Relation	rel = scan->indexRelation;
 	BTScanOpaque so = (BTScanOpaque) scan->opaque;
 	Page		page;
 	BTPageOpaque opaque;
 	OffsetNumber minoff;
 	OffsetNumber maxoff;
-	int			i;
 	int			numKilled = so->numKilled;
 	bool		killedsomething = false;
-	bool		droppedpin PG_USED_FOR_ASSERTS_ONLY;
 
+	Assert(numKilled > 0);
 	Assert(BTScanPosIsValid(so->currPos));
+	Assert(scan->heapRelation != NULL); /* can't be a bitmap index scan */
 
-	/*
-	 * Always reset the scan state, so we don't look for same items on other
-	 * pages.
-	 */
+	/* Always invalidate so->killedItems[] before leaving so->currPos */
 	so->numKilled = 0;
 
-	if (BTScanPosIsPinned(so->currPos))
+	if (!so->dropPin)
 	{
 		/*
 		 * We have held the pin on this page since we read the index tuples,
 		 * so all we need to do is lock it.  The pin will have prevented
-		 * re-use of any TID on the page, so there is no need to check the
-		 * LSN.
+		 * concurrent VACUUMs from recycling any of the TIDs on the page.
 		 */
-		droppedpin = false;
-		_bt_lockbuf(scan->indexRelation, so->currPos.buf, BT_READ);
-
-		page = BufferGetPage(so->currPos.buf);
+		Assert(BTScanPosIsPinned(so->currPos));
+		_bt_lockbuf(rel, so->currPos.buf, BT_READ);
 	}
 	else
 	{
 		Buffer		buf;
+		XLogRecPtr	latestlsn;
 
-		droppedpin = true;
-		/* Attempt to re-read the buffer, getting pin and lock. */
-		buf = _bt_getbuf(scan->indexRelation, so->currPos.currPage, BT_READ);
+		Assert(!BTScanPosIsPinned(so->currPos));
+		Assert(RelationNeedsWAL(rel));
+		buf = _bt_getbuf(rel, so->currPos.currPage, BT_READ);
 
-		page = BufferGetPage(buf);
-		if (BufferGetLSNAtomic(buf) == so->currPos.lsn)
-			so->currPos.buf = buf;
-		else
+		latestlsn = BufferGetLSNAtomic(buf);
+		Assert(!XLogRecPtrIsInvalid(so->currPos.lsn));
+		Assert(so->currPos.lsn <= latestlsn);
+		if (so->currPos.lsn != latestlsn)
 		{
-			/* Modified while not pinned means hinting is not safe. */
-			_bt_relbuf(scan->indexRelation, buf);
+			/* Modified, give up on hinting */
+			_bt_relbuf(rel, buf);
 			return;
 		}
+
+		/* Unmodified, hinting is safe */
+		so->currPos.buf = buf;
 	}
 
+	page = BufferGetPage(so->currPos.buf);
 	opaque = BTPageGetOpaque(page);
 	minoff = P_FIRSTDATAKEY(opaque);
 	maxoff = PageGetMaxOffsetNumber(page);
 
-	for (i = 0; i < numKilled; i++)
+	for (int i = 0; i < numKilled; i++)
 	{
 		int			itemIndex = so->killedItems[i];
 		BTScanPosItem *kitem = &so->currPos.items[itemIndex];
@@ -3442,7 +3431,7 @@ _bt_killitems(IndexScanDesc scan)
 				 * correctness.
 				 *
 				 * Note that the page may have been modified in almost any way
-				 * since we first read it (in the !droppedpin case), so it's
+				 * since we first read it (in the !so->dropPin case), so it's
 				 * possible that this posting list tuple wasn't a posting list
 				 * tuple when we first encountered its heap TIDs.
 				 */
@@ -3458,7 +3447,7 @@ _bt_killitems(IndexScanDesc scan)
 					 * though only in the common case where the page can't
 					 * have been concurrently modified
 					 */
-					Assert(kitem->indexOffset == offnum || !droppedpin);
+					Assert(kitem->indexOffset == offnum || !so->dropPin);
 
 					/*
 					 * Read-ahead to later kitems here.
@@ -3525,7 +3514,7 @@ _bt_killitems(IndexScanDesc scan)
 		MarkBufferDirtyHint(so->currPos.buf, true);
 	}
 
-	_bt_unlockbuf(scan->indexRelation, so->currPos.buf);
+	_bt_unlockbuf(rel, so->currPos.buf);
 }
 
 
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index b885513f765..2e67e998adb 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -1045,6 +1045,34 @@ TransactionStartedDuringRecovery(void)
 }
 
 /*
+ *	GetTopReadOnlyTransactionNestLevel
+ *
+ * Note: this will return zero when not inside any transaction or when neither
+ * a top-level transaction nor subtransactions are read-only, one when the
+ * top-level transaction is read-only, two when one level of subtransaction is
+ * read-only, etc.
+ *
+ * Note: subtransactions of the topmost read-only transaction are also
+ * read-only, because they inherit read-only mode from the transaction, and
+ * thus can't change to read-write mode.  See check_transaction_read_only().
+ */
+int
+GetTopReadOnlyTransactionNestLevel(void)
+{
+	TransactionState s = CurrentTransactionState;
+
+	if (!XactReadOnly)
+		return 0;
+	while (s->nestingLevel > 1)
+	{
+		if (!s->prevXactReadOnly)
+			return s->nestingLevel;
+		s = s->parent;
+	}
+	return s->nestingLevel;
+}
+
+/*
  *	EnterParallelMode
  */
 void
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 5fbbcdaabb1..c95eb945016 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -1065,16 +1065,41 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
 
 	/* Check that the chosen locales are valid, and get canonical spellings */
 	if (!check_locale(LC_COLLATE, dbcollate, &canonname))
-		ereport(ERROR,
-				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-				 errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate),
-				 errhint("If the locale name is specific to ICU, use ICU_LOCALE.")));
+	{
+		if (dblocprovider == COLLPROVIDER_BUILTIN)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate),
+					 errhint("If the locale name is specific to the builtin provider, use BUILTIN_LOCALE.")));
+		else if (dblocprovider == COLLPROVIDER_ICU)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate),
+					 errhint("If the locale name is specific to the ICU provider, use ICU_LOCALE.")));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate)));
+	}
 	dbcollate = canonname;
 	if (!check_locale(LC_CTYPE, dbctype, &canonname))
-		ereport(ERROR,
-				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
-				 errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype),
-				 errhint("If the locale name is specific to ICU, use ICU_LOCALE.")));
+	{
+		if (dblocprovider == COLLPROVIDER_BUILTIN)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype),
+					 errhint("If the locale name is specific to the builtin provider, use BUILTIN_LOCALE.")));
+		else if (dblocprovider == COLLPROVIDER_ICU)
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype),
+					 errhint("If the locale name is specific to the ICU provider, use ICU_LOCALE.")));
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
+					 errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype)));
+	}
+
 	dbctype = canonname;
 
 	check_encoding_locale_matches(encoding, dbcollate, dbctype);
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index bfa83fbc3fe..7e2792ead71 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -811,14 +811,10 @@ ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc)
 	 * the queryid in any of the EXPLAIN plans to keep stable the results
 	 * generated by regression test suites.
 	 */
-	if (es->verbose && queryDesc->plannedstmt->queryId != UINT64CONST(0) &&
+	if (es->verbose && queryDesc->plannedstmt->queryId != INT64CONST(0) &&
 		compute_query_id != COMPUTE_QUERY_ID_REGRESS)
 	{
-		/*
-		 * Output the queryid as an int64 rather than a uint64 so we match
-		 * what would be seen in the BIGINT pg_stat_statements.queryid column.
-		 */
-		ExplainPropertyInteger("Query Identifier", NULL, (int64)
+		ExplainPropertyInteger("Query Identifier", NULL,
 							   queryDesc->plannedstmt->queryId, es);
 	}
 }
diff --git a/src/backend/commands/foreigncmds.c b/src/backend/commands/foreigncmds.c
index c14e038d54f..8d2d7431544 100644
--- a/src/backend/commands/foreigncmds.c
+++ b/src/backend/commands/foreigncmds.c
@@ -71,15 +71,26 @@ optionListToArray(List *options)
 	foreach(cell, options)
 	{
 		DefElem    *def = lfirst(cell);
+		const char *name;
 		const char *value;
 		Size		len;
 		text	   *t;
 
+		name = def->defname;
 		value = defGetString(def);
-		len = VARHDRSZ + strlen(def->defname) + 1 + strlen(value);
+
+		/* Insist that name not contain "=", else "a=b=c" is ambiguous */
+		if (strchr(name, '=') != NULL)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("invalid option name \"%s\": must not contain \"=\"",
+							name)));
+
+		len = VARHDRSZ + strlen(name) + 1 + strlen(value);
+		/* +1 leaves room for sprintf's trailing null */
 		t = palloc(len + 1);
 		SET_VARSIZE(t, len);
-		sprintf(VARDATA(t), "%s=%s", def->defname, value);
+		sprintf(VARDATA(t), "%s=%s", name, value);
 
 		astate = accumArrayResult(astate, PointerGetDatum(t),
 								  false, TEXTOID,
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index d962fe392cd..c3ec2076a52 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -4226,7 +4226,7 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein
 									 false);
 
 		/*
-		 * Updating pg_index might involve TOAST table access, so ensure we
+		 * Swapping the indexes might involve TOAST table access, so ensure we
 		 * have a valid snapshot.
 		 */
 		PushActiveSnapshot(GetTransactionSnapshot());
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 54ad38247aa..ea96947d813 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -430,8 +430,8 @@ static void AlterConstrUpdateConstraintEntry(ATAlterConstraint *cmdcon, Relation
 static ObjectAddress ATExecValidateConstraint(List **wqueue,
 											  Relation rel, char *constrName,
 											  bool recurse, bool recursing, LOCKMODE lockmode);
-static void QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
-										HeapTuple contuple, LOCKMODE lockmode);
+static void QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation fkrel,
+										Oid pkrelid, HeapTuple contuple, LOCKMODE lockmode);
 static void QueueCheckConstraintValidation(List **wqueue, Relation conrel, Relation rel,
 										   char *constrName, HeapTuple contuple,
 										   bool recurse, bool recursing, LOCKMODE lockmode);
@@ -11858,6 +11858,7 @@ AttachPartitionForeignKey(List **wqueue,
 	if (queueValidation)
 	{
 		Relation	conrel;
+		Oid			confrelid;
 
 		conrel = table_open(ConstraintRelationId, RowExclusiveLock);
 
@@ -11865,9 +11866,11 @@ AttachPartitionForeignKey(List **wqueue,
 		if (!HeapTupleIsValid(partcontup))
 			elog(ERROR, "cache lookup failed for constraint %u", partConstrOid);
 
+		confrelid = ((Form_pg_constraint) GETSTRUCT(partcontup))->confrelid;
+
 		/* Use the same lock as for AT_ValidateConstraint */
-		QueueFKConstraintValidation(wqueue, conrel, partition, partcontup,
-									ShareUpdateExclusiveLock);
+		QueueFKConstraintValidation(wqueue, conrel, partition, confrelid,
+									partcontup, ShareUpdateExclusiveLock);
 		ReleaseSysCache(partcontup);
 		table_close(conrel, RowExclusiveLock);
 	}
@@ -12463,9 +12466,12 @@ ATExecAlterConstrEnforceability(List **wqueue, ATAlterConstraint *cmdcon,
 
 		/*
 		 * Tell Phase 3 to check that the constraint is satisfied by existing
-		 * rows.
+		 * rows.  Only applies to leaf partitions, and (for constraints that
+		 * reference a partitioned table) only if this is not one of the
+		 * pg_constraint rows that exist solely to support action triggers.
 		 */
-		if (rel->rd_rel->relkind == RELKIND_RELATION)
+		if (rel->rd_rel->relkind == RELKIND_RELATION &&
+			currcon->confrelid == pkrelid)
 		{
 			AlteredTableInfo *tab;
 			NewConstraint *newcon;
@@ -12919,7 +12925,8 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
 	{
 		if (con->contype == CONSTRAINT_FOREIGN)
 		{
-			QueueFKConstraintValidation(wqueue, conrel, rel, tuple, lockmode);
+			QueueFKConstraintValidation(wqueue, conrel, rel, con->confrelid,
+										tuple, lockmode);
 		}
 		else if (con->contype == CONSTRAINT_CHECK)
 		{
@@ -12952,8 +12959,8 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
  * for the specified relation and all its children.
  */
 static void
-QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
-							HeapTuple contuple, LOCKMODE lockmode)
+QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation fkrel,
+							Oid pkrelid, HeapTuple contuple, LOCKMODE lockmode)
 {
 	Form_pg_constraint con;
 	AlteredTableInfo *tab;
@@ -12964,7 +12971,17 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
 	Assert(con->contype == CONSTRAINT_FOREIGN);
 	Assert(!con->convalidated);
 
-	if (rel->rd_rel->relkind == RELKIND_RELATION)
+	/*
+	 * Add the validation to phase 3's queue; not needed for partitioned
+	 * tables themselves, only for their partitions.
+	 *
+	 * When the referenced table (pkrelid) is partitioned, the referencing
+	 * table (fkrel) has one pg_constraint row pointing to each partition
+	 * thereof.  These rows are there only to support action triggers and no
+	 * table scan is needed, therefore skip this for them as well.
+	 */
+	if (fkrel->rd_rel->relkind == RELKIND_RELATION &&
+		con->confrelid == pkrelid)
 	{
 		NewConstraint *newcon;
 		Constraint *fkconstraint;
@@ -12983,15 +13000,16 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
 		newcon->qual = (Node *) fkconstraint;
 
 		/* Find or create work queue entry for this table */
-		tab = ATGetQueueEntry(wqueue, rel);
+		tab = ATGetQueueEntry(wqueue, fkrel);
 		tab->constraints = lappend(tab->constraints, newcon);
 	}
 
 	/*
 	 * If the table at either end of the constraint is partitioned, we need to
-	 * recurse and handle every constraint that is a child of this constraint.
+	 * recurse and handle every unvalidate constraint that is a child of this
+	 * constraint.
 	 */
-	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
+	if (fkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
 		get_rel_relkind(con->confrelid) == RELKIND_PARTITIONED_TABLE)
 	{
 		ScanKeyData pkey;
@@ -13023,8 +13041,12 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
 
 			childrel = table_open(childcon->conrelid, lockmode);
 
-			QueueFKConstraintValidation(wqueue, conrel, childrel, childtup,
-										lockmode);
+			/*
+			 * NB: Note that pkrelid should be passed as-is during recursion,
+			 * as it is required to identify the root referenced table.
+			 */
+			QueueFKConstraintValidation(wqueue, conrel, childrel, pkrelid,
+										childtup, lockmode);
 			table_close(childrel, NoLock);
 		}
 
@@ -13032,7 +13054,11 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
 	}
 
 	/*
-	 * Now update the catalog, while we have the door open.
+	 * Now mark the pg_constraint row as validated (even if we didn't check,
+	 * notably the ones for partitions on the referenced side).
+	 *
+	 * We rely on transaction abort to roll back this change if phase 3
+	 * ultimately finds violating rows.  This is a bit ugly.
 	 */
 	copyTuple = heap_copytuple(contuple);
 	copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple);
@@ -20964,9 +20990,17 @@ ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab, Relation rel,
 		tab->rel = rel;
 	}
 
+	/*
+	 * Detaching the partition might involve TOAST table access, so ensure we
+	 * have a valid snapshot.
+	 */
+	PushActiveSnapshot(GetTransactionSnapshot());
+
 	/* Do the final part of detaching */
 	DetachPartitionFinalize(rel, partRel, concurrent, defaultPartOid);
 
+	PopActiveSnapshot();
+
 	ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel));
 
 	/* keep our lock until commit */
diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c
index 2b9d548cdeb..0feea1d30ec 100644
--- a/src/backend/commands/vacuumparallel.c
+++ b/src/backend/commands/vacuumparallel.c
@@ -63,7 +63,7 @@ typedef struct PVShared
 	 */
 	Oid			relid;
 	int			elevel;
-	uint64		queryid;
+	int64		queryid;
 
 	/*
 	 * Fields for both index vacuum and cleanup.
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 2bc89bf84dc..54da8e7995b 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -64,6 +64,7 @@
 #include "nodes/nodeFuncs.h"
 #include "optimizer/optimizer.h"
 #include "rewrite/rewriteHandler.h"
+#include "rewrite/rewriteManip.h"
 #include "storage/lmgr.h"
 #include "utils/builtins.h"
 #include "utils/datum.h"
@@ -3735,6 +3736,7 @@ ExecInitMerge(ModifyTableState *mtstate, EState *estate)
 			switch (action->commandType)
 			{
 				case CMD_INSERT:
+					/* INSERT actions always use rootRelInfo */
 					ExecCheckPlanOutput(rootRelInfo->ri_RelationDesc,
 										action->targetList);
 
@@ -3774,9 +3776,23 @@ ExecInitMerge(ModifyTableState *mtstate, EState *estate)
 					}
 					else
 					{
-						/* not partitioned? use the stock relation and slot */
-						tgtslot = resultRelInfo->ri_newTupleSlot;
-						tgtdesc = RelationGetDescr(resultRelInfo->ri_RelationDesc);
+						/*
+						 * If the MERGE targets an inherited table, we insert
+						 * into the root table, so we must initialize its
+						 * "new" tuple slot, if not already done, and use its
+						 * relation descriptor for the projection.
+						 *
+						 * For non-inherited tables, rootRelInfo and
+						 * resultRelInfo are the same, and the "new" tuple
+						 * slot will already have been initialized.
+						 */
+						if (rootRelInfo->ri_newTupleSlot == NULL)
+							rootRelInfo->ri_newTupleSlot =
+								table_slot_create(rootRelInfo->ri_RelationDesc,
+												  &estate->es_tupleTable);
+
+						tgtslot = rootRelInfo->ri_newTupleSlot;
+						tgtdesc = RelationGetDescr(rootRelInfo->ri_RelationDesc);
 					}
 
 					action_state->mas_proj =
@@ -3809,6 +3825,114 @@ ExecInitMerge(ModifyTableState *mtstate, EState *estate)
 			}
 		}
 	}
+
+	/*
+	 * If the MERGE targets an inherited table, any INSERT actions will use
+	 * rootRelInfo, and rootRelInfo will not be in the resultRelInfo array.
+	 * Therefore we must initialize its WITH CHECK OPTION constraints and
+	 * RETURNING projection, as ExecInitModifyTable did for the resultRelInfo
+	 * entries.
+	 *
+	 * Note that the planner does not build a withCheckOptionList or
+	 * returningList for the root relation, but as in ExecInitPartitionInfo,
+	 * we can use the first resultRelInfo entry as a reference to calculate
+	 * the attno's for the root table.
+	 */
+	if (rootRelInfo != mtstate->resultRelInfo &&
+		rootRelInfo->ri_RelationDesc->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
+		(mtstate->mt_merge_subcommands & MERGE_INSERT) != 0)
+	{
+		ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
+		Relation	rootRelation = rootRelInfo->ri_RelationDesc;
+		Relation	firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
+		int			firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
+		AttrMap    *part_attmap = NULL;
+		bool		found_whole_row;
+
+		if (node->withCheckOptionLists != NIL)
+		{
+			List	   *wcoList;
+			List	   *wcoExprs = NIL;
+
+			/* There should be as many WCO lists as result rels */
+			Assert(list_length(node->withCheckOptionLists) ==
+				   list_length(node->resultRelations));
+
+			/*
+			 * Use the first WCO list as a reference. In the most common case,
+			 * this will be for the same relation as rootRelInfo, and so there
+			 * will be no need to adjust its attno's.
+			 */
+			wcoList = linitial(node->withCheckOptionLists);
+			if (rootRelation != firstResultRel)
+			{
+				/* Convert any Vars in it to contain the root's attno's */
+				part_attmap =
+					build_attrmap_by_name(RelationGetDescr(rootRelation),
+										  RelationGetDescr(firstResultRel),
+										  false);
+
+				wcoList = (List *)
+					map_variable_attnos((Node *) wcoList,
+										firstVarno, 0,
+										part_attmap,
+										RelationGetForm(rootRelation)->reltype,
+										&found_whole_row);
+			}
+
+			foreach(lc, wcoList)
+			{
+				WithCheckOption *wco = lfirst_node(WithCheckOption, lc);
+				ExprState  *wcoExpr = ExecInitQual(castNode(List, wco->qual),
+												   &mtstate->ps);
+
+				wcoExprs = lappend(wcoExprs, wcoExpr);
+			}
+
+			rootRelInfo->ri_WithCheckOptions = wcoList;
+			rootRelInfo->ri_WithCheckOptionExprs = wcoExprs;
+		}
+
+		if (node->returningLists != NIL)
+		{
+			List	   *returningList;
+
+			/* There should be as many returning lists as result rels */
+			Assert(list_length(node->returningLists) ==
+				   list_length(node->resultRelations));
+
+			/*
+			 * Use the first returning list as a reference. In the most common
+			 * case, this will be for the same relation as rootRelInfo, and so
+			 * there will be no need to adjust its attno's.
+			 */
+			returningList = linitial(node->returningLists);
+			if (rootRelation != firstResultRel)
+			{
+				/* Convert any Vars in it to contain the root's attno's */
+				if (part_attmap == NULL)
+					part_attmap =
+						build_attrmap_by_name(RelationGetDescr(rootRelation),
+											  RelationGetDescr(firstResultRel),
+											  false);
+
+				returningList = (List *)
+					map_variable_attnos((Node *) returningList,
+										firstVarno, 0,
+										part_attmap,
+										RelationGetForm(rootRelation)->reltype,
+										&found_whole_row);
+			}
+			rootRelInfo->ri_returningList = returningList;
+
+			/* Initialize the RETURNING projection */
+			rootRelInfo->ri_projectReturning =
+				ExecBuildProjectionInfo(returningList, econtext,
+										mtstate->ps.ps_ResultTupleSlot,
+										&mtstate->ps,
+										RelationGetDescr(rootRelation));
+		}
+	}
 }
 
 /*
diff --git a/src/backend/libpq/be-secure-gssapi.c b/src/backend/libpq/be-secure-gssapi.c
index 717ba9824f9..3534f0b8111 100644
--- a/src/backend/libpq/be-secure-gssapi.c
+++ b/src/backend/libpq/be-secure-gssapi.c
@@ -46,11 +46,18 @@
  * don't want the other side to send arbitrarily huge packets as we
  * would have to allocate memory for them to then pass them to GSSAPI.
  *
- * Therefore, these two #define's are effectively part of the protocol
+ * Therefore, this #define is effectively part of the protocol
  * spec and can't ever be changed.
  */
-#define PQ_GSS_SEND_BUFFER_SIZE 16384
-#define PQ_GSS_RECV_BUFFER_SIZE 16384
+#define PQ_GSS_MAX_PACKET_SIZE 16384	/* includes uint32 header word */
+
+/*
+ * However, during the authentication exchange we must cope with whatever
+ * message size the GSSAPI library wants to send (because our protocol
+ * doesn't support splitting those messages).  Depending on configuration
+ * those messages might be as much as 64kB.
+ */
+#define PQ_GSS_AUTH_BUFFER_SIZE 65536	/* includes uint32 header word */
 
 /*
  * Since we manage at most one GSS-encrypted connection per backend,
@@ -210,12 +217,12 @@ be_gssapi_write(Port *port, const void *ptr, size_t len)
 			errno = ECONNRESET;
 			return -1;
 		}
-		if (output.length > PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32))
+		if (output.length > PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32))
 		{
 			ereport(COMMERROR,
 					(errmsg("server tried to send oversize GSSAPI packet (%zu > %zu)",
 							(size_t) output.length,
-							PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32))));
+							PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32))));
 			errno = ECONNRESET;
 			return -1;
 		}
@@ -346,12 +353,12 @@ be_gssapi_read(Port *port, void *ptr, size_t len)
 		/* Decode the packet length and check for overlength packet */
 		input.length = pg_ntoh32(*(uint32 *) PqGSSRecvBuffer);
 
-		if (input.length > PQ_GSS_RECV_BUFFER_SIZE - sizeof(uint32))
+		if (input.length > PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32))
 		{
 			ereport(COMMERROR,
 					(errmsg("oversize GSSAPI packet sent by the client (%zu > %zu)",
 							(size_t) input.length,
-							PQ_GSS_RECV_BUFFER_SIZE - sizeof(uint32))));
+							PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32))));
 			errno = ECONNRESET;
 			return -1;
 		}
@@ -517,10 +524,13 @@ secure_open_gssapi(Port *port)
 	 * that will never use them, and we ensure that the buffers are
 	 * sufficiently aligned for the length-word accesses that we do in some
 	 * places in this file.
+	 *
+	 * We'll use PQ_GSS_AUTH_BUFFER_SIZE-sized buffers until transport
+	 * negotiation is complete, then switch to PQ_GSS_MAX_PACKET_SIZE.
 	 */
-	PqGSSSendBuffer = malloc(PQ_GSS_SEND_BUFFER_SIZE);
-	PqGSSRecvBuffer = malloc(PQ_GSS_RECV_BUFFER_SIZE);
-	PqGSSResultBuffer = malloc(PQ_GSS_RECV_BUFFER_SIZE);
+	PqGSSSendBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE);
+	PqGSSRecvBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE);
+	PqGSSResultBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE);
 	if (!PqGSSSendBuffer || !PqGSSRecvBuffer || !PqGSSResultBuffer)
 		ereport(FATAL,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
@@ -568,16 +578,16 @@ secure_open_gssapi(Port *port)
 
 		/*
 		 * During initialization, packets are always fully consumed and
-		 * shouldn't ever be over PQ_GSS_RECV_BUFFER_SIZE in length.
+		 * shouldn't ever be over PQ_GSS_AUTH_BUFFER_SIZE in total length.
 		 *
 		 * Verify on our side that the client doesn't do something funny.
 		 */
-		if (input.length > PQ_GSS_RECV_BUFFER_SIZE)
+		if (input.length > PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32))
 		{
 			ereport(COMMERROR,
-					(errmsg("oversize GSSAPI packet sent by the client (%zu > %d)",
+					(errmsg("oversize GSSAPI packet sent by the client (%zu > %zu)",
 							(size_t) input.length,
-							PQ_GSS_RECV_BUFFER_SIZE)));
+							PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32))));
 			return -1;
 		}
 
@@ -631,12 +641,12 @@ secure_open_gssapi(Port *port)
 		{
 			uint32		netlen = pg_hton32(output.length);
 
-			if (output.length > PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32))
+			if (output.length > PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32))
 			{
 				ereport(COMMERROR,
 						(errmsg("server tried to send oversize GSSAPI packet (%zu > %zu)",
 								(size_t) output.length,
-								PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32))));
+								PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32))));
 				gss_release_buffer(&minor, &output);
 				return -1;
 			}
@@ -692,11 +702,28 @@ secure_open_gssapi(Port *port)
 	}
 
 	/*
+	 * Release the large authentication buffers and allocate the ones we want
+	 * for normal operation.
+	 */
+	free(PqGSSSendBuffer);
+	free(PqGSSRecvBuffer);
+	free(PqGSSResultBuffer);
+	PqGSSSendBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE);
+	PqGSSRecvBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE);
+	PqGSSResultBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE);
+	if (!PqGSSSendBuffer || !PqGSSRecvBuffer || !PqGSSResultBuffer)
+		ereport(FATAL,
+				(errcode(ERRCODE_OUT_OF_MEMORY),
+				 errmsg("out of memory")));
+	PqGSSSendLength = PqGSSSendNext = PqGSSSendConsumed = 0;
+	PqGSSRecvLength = PqGSSResultLength = PqGSSResultNext = 0;
+
+	/*
 	 * Determine the max packet size which will fit in our buffer, after
 	 * accounting for the length.  be_gssapi_write will need this.
 	 */
 	major = gss_wrap_size_limit(&minor, port->gss->ctx, 1, GSS_C_QOP_DEFAULT,
-								PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32),
+								PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32),
 								&PqGSSMaxPktSize);
 
 	if (GSS_ERROR(major))
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 77659b0f760..c8595109b0e 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -1039,6 +1039,11 @@ _read${n}(void)
 			print $off "\tWRITE_UINT_FIELD($f);\n";
 			print $rff "\tREAD_UINT_FIELD($f);\n" unless $no_read;
 		}
+		elsif ($t eq 'int64')
+		{
+			print $off "\tWRITE_INT64_FIELD($f);\n";
+			print $rff "\tREAD_INT64_FIELD($f);\n" unless $no_read;
+		}
 		elsif ($t eq 'uint64'
 			|| $t eq 'AclMode')
 		{
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index ceac3fd8620..25e08ba3426 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -51,6 +51,12 @@ static void outDouble(StringInfo str, double d);
 #define WRITE_UINT_FIELD(fldname) \
 	appendStringInfo(str, " :" CppAsString(fldname) " %u", node->fldname)
 
+/* Write a signed integer field (anything written with INT64_FORMAT) */
+#define WRITE_INT64_FIELD(fldname) \
+	appendStringInfo(str, \
+					 " :" CppAsString(fldname) " " INT64_FORMAT, \
+					 node->fldname)
+
 /* Write an unsigned integer field (anything written with UINT64_FORMAT) */
 #define WRITE_UINT64_FIELD(fldname) \
 	appendStringInfo(str, " :" CppAsString(fldname) " " UINT64_FORMAT, \
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index d1e82a63f09..ac3cb3d9caf 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -56,7 +56,7 @@ int			compute_query_id = COMPUTE_QUERY_ID_AUTO;
 bool		query_id_enabled = false;
 
 static JumbleState *InitJumble(void);
-static uint64 DoJumble(JumbleState *jstate, Node *node);
+static int64 DoJumble(JumbleState *jstate, Node *node);
 static void AppendJumble(JumbleState *jstate,
 						 const unsigned char *value, Size size);
 static void FlushPendingNulls(JumbleState *jstate);
@@ -141,12 +141,12 @@ JumbleQuery(Query *query)
 	 * If we are unlucky enough to get a hash of zero, use 1 instead for
 	 * normal statements and 2 for utility queries.
 	 */
-	if (query->queryId == UINT64CONST(0))
+	if (query->queryId == INT64CONST(0))
 	{
 		if (query->utilityStmt)
-			query->queryId = UINT64CONST(2);
+			query->queryId = INT64CONST(2);
 		else
-			query->queryId = UINT64CONST(1);
+			query->queryId = INT64CONST(1);
 	}
 
 	return jstate;
@@ -197,7 +197,7 @@ InitJumble(void)
  *		Jumble the given Node using the given JumbleState and return the resulting
  *		jumble hash.
  */
-static uint64
+static int64
 DoJumble(JumbleState *jstate, Node *node)
 {
 	/* Jumble the given node */
@@ -208,9 +208,9 @@ DoJumble(JumbleState *jstate, Node *node)
 		FlushPendingNulls(jstate);
 
 	/* Process the jumble buffer and produce the hash value */
-	return DatumGetUInt64(hash_any_extended(jstate->jumble,
-											jstate->jumble_len,
-											0));
+	return DatumGetInt64(hash_any_extended(jstate->jumble,
+										   jstate->jumble_len,
+										   0));
 }
 
 /*
@@ -256,10 +256,10 @@ AppendJumbleInternal(JumbleState *jstate, const unsigned char *item,
 
 		if (unlikely(jumble_len >= JUMBLE_SIZE))
 		{
-			uint64		start_hash;
+			int64		start_hash;
 
-			start_hash = DatumGetUInt64(hash_any_extended(jumble,
-														  JUMBLE_SIZE, 0));
+			start_hash = DatumGetInt64(hash_any_extended(jumble,
+														 JUMBLE_SIZE, 0));
 			memcpy(jumble, &start_hash, sizeof(start_hash));
 			jumble_len = sizeof(start_hash);
 		}
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 64d3a09f765..8c90ab54af8 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -68,6 +68,12 @@
 	token = pg_strtok(&length);		/* get field value */ \
 	local_node->fldname = atoui(token)
 
+/* Read a signed integer field (anything written using INT64_FORMAT) */
+#define READ_INT64_FIELD(fldname) \
+	token = pg_strtok(&length); /* skip :fldname */ \
+	token = pg_strtok(&length); /* get field value */ \
+	local_node->fldname = strtoi64(token, NULL, 10)
+
 /* Read an unsigned integer field (anything written using UINT64_FORMAT) */
 #define READ_UINT64_FIELD(fldname) \
 	token = pg_strtok(&length);		/* skip :fldname */ \
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 981be42e3af..451fb90a610 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -2234,6 +2234,12 @@ do_autovacuum(void)
 						get_namespace_name(classForm->relnamespace),
 						NameStr(classForm->relname))));
 
+		/*
+		 * Deletion might involve TOAST table access, so ensure we have a
+		 * valid snapshot.
+		 */
+		PushActiveSnapshot(GetTransactionSnapshot());
+
 		object.classId = RelationRelationId;
 		object.objectId = relid;
 		object.objectSubId = 0;
@@ -2246,6 +2252,7 @@ do_autovacuum(void)
 		 * To commit the deletion, end current transaction and start a new
 		 * one.  Note this also releases the locks we took.
 		 */
+		PopActiveSnapshot();
 		CommitTransactionCommand();
 		StartTransactionCommand();
 
diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c
index 10677da56b2..1c3c051403d 100644
--- a/src/backend/replication/logical/launcher.c
+++ b/src/backend/replication/logical/launcher.c
@@ -1016,7 +1016,7 @@ logicalrep_launcher_attach_dshmem(void)
 		last_start_times_dsa = dsa_attach(LogicalRepCtx->last_start_dsa);
 		dsa_pin_mapping(last_start_times_dsa);
 		last_start_times = dshash_attach(last_start_times_dsa, &dsh_params,
-										 LogicalRepCtx->last_start_dsh, 0);
+										 LogicalRepCtx->last_start_dsh, NULL);
 	}
 
 	MemoryContextSwitchTo(oldcontext);
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 4151a4b2a96..a23262957ac 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -4626,8 +4626,16 @@ run_apply_worker()
 		walrcv_startstreaming(LogRepWorkerWalRcvConn, &options);
 
 		StartTransactionCommand();
+
+		/*
+		 * Updating pg_subscription might involve TOAST table access, so
+		 * ensure we have a valid snapshot.
+		 */
+		PushActiveSnapshot(GetTransactionSnapshot());
+
 		UpdateTwoPhaseState(MySubscription->oid, LOGICALREP_TWOPHASE_STATE_ENABLED);
 		MySubscription->twophasestate = LOGICALREP_TWOPHASE_STATE_ENABLED;
+		PopActiveSnapshot();
 		CommitTransactionCommand();
 	}
 	else
@@ -4843,7 +4851,15 @@ DisableSubscriptionAndExit(void)
 
 	/* Disable the subscription */
 	StartTransactionCommand();
+
+	/*
+	 * Updating pg_subscription might involve TOAST table access, so ensure we
+	 * have a valid snapshot.
+	 */
+	PushActiveSnapshot(GetTransactionSnapshot());
+
 	DisableSubscription(MySubscription->oid);
+	PopActiveSnapshot();
 	CommitTransactionCommand();
 
 	/* Ensure we remove no-longer-useful entry for worker's start time */
@@ -4948,6 +4964,12 @@ clear_subscription_skip_lsn(XLogRecPtr finish_lsn)
 	}
 
 	/*
+	 * Updating pg_subscription might involve TOAST table access, so ensure we
+	 * have a valid snapshot.
+	 */
+	PushActiveSnapshot(GetTransactionSnapshot());
+
+	/*
 	 * Protect subskiplsn of pg_subscription from being concurrently updated
 	 * while clearing it.
 	 */
@@ -5005,6 +5027,8 @@ clear_subscription_skip_lsn(XLogRecPtr finish_lsn)
 	heap_freetuple(tup);
 	table_close(rel, NoLock);
 
+	PopActiveSnapshot();
+
 	if (started_tx)
 		CommitTransactionCommand();
 }
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 9fa8beb6103..f2c33250e8b 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -3449,8 +3449,16 @@ XLogSendLogical(void)
 	if (flushPtr == InvalidXLogRecPtr ||
 		logical_decoding_ctx->reader->EndRecPtr >= flushPtr)
 	{
+		/*
+		 * For cascading logical WAL senders, we use the replay LSN instead of
+		 * the flush LSN, since logical decoding on a standby only processes
+		 * WAL that has been replayed.  This distinction becomes particularly
+		 * important during shutdown, as new WAL is no longer replayed and the
+		 * last replayed LSN marks the furthest point up to which decoding can
+		 * proceed.
+		 */
 		if (am_cascading_walsender)
-			flushPtr = GetStandbyFlushRecPtr(NULL);
+			flushPtr = GetXLogReplayRecPtr(NULL);
 		else
 			flushPtr = GetFlushRecPtr(NULL);
 	}
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index f0bce5f9ed9..2ef0e7fbf3a 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -4544,7 +4544,7 @@ build_generation_expression(Relation rel, int attrno)
 List *
 QueryRewrite(Query *parsetree)
 {
-	uint64		input_query_id = parsetree->queryId;
+	int64		input_query_id = parsetree->queryId;
 	List	   *querylist;
 	List	   *results;
 	ListCell   *l;
diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c
index c64d815ebd1..6c6c0a908e2 100644
--- a/src/backend/storage/aio/aio.c
+++ b/src/backend/storage/aio/aio.c
@@ -752,7 +752,7 @@ pgaio_io_wait_for_free(void)
 {
 	int			reclaimed = 0;
 
-	pgaio_debug(DEBUG2, "waiting for free IO with %d pending, %d in-flight, %d idle IOs",
+	pgaio_debug(DEBUG2, "waiting for free IO with %d pending, %u in-flight, %u idle IOs",
 				pgaio_my_backend->num_staged_ios,
 				dclist_count(&pgaio_my_backend->in_flight_ios),
 				dclist_count(&pgaio_my_backend->idle_ios));
@@ -797,7 +797,7 @@ pgaio_io_wait_for_free(void)
 	if (dclist_count(&pgaio_my_backend->in_flight_ios) == 0)
 		ereport(ERROR,
 				errmsg_internal("no free IOs despite no in-flight IOs"),
-				errdetail_internal("%d pending, %d in-flight, %d idle IOs",
+				errdetail_internal("%d pending, %u in-flight, %u idle IOs",
 								   pgaio_my_backend->num_staged_ios,
 								   dclist_count(&pgaio_my_backend->in_flight_ios),
 								   dclist_count(&pgaio_my_backend->idle_ios)));
@@ -828,7 +828,7 @@ pgaio_io_wait_for_free(void)
 			case PGAIO_HS_COMPLETED_IO:
 			case PGAIO_HS_SUBMITTED:
 				pgaio_debug_io(DEBUG2, ioh,
-							   "waiting for free io with %d in flight",
+							   "waiting for free io with %u in flight",
 							   dclist_count(&pgaio_my_backend->in_flight_ios));
 
 				/*
@@ -1252,7 +1252,7 @@ pgaio_closing_fd(int fd)
 				break;
 
 			pgaio_debug_io(DEBUG2, ioh,
-						   "waiting for IO before FD %d gets closed, %d in-flight IOs",
+						   "waiting for IO before FD %d gets closed, %u in-flight IOs",
 						   fd, dclist_count(&pgaio_my_backend->in_flight_ios));
 
 			/* see comment in pgaio_io_wait_for_free() about raciness */
@@ -1288,7 +1288,7 @@ pgaio_shutdown(int code, Datum arg)
 		uint64		generation = ioh->generation;
 
 		pgaio_debug_io(DEBUG2, ioh,
-					   "waiting for IO to complete during shutdown, %d in-flight IOs",
+					   "waiting for IO to complete during shutdown, %u in-flight IOs",
 					   dclist_count(&pgaio_my_backend->in_flight_ios));
 
 		/* see comment in pgaio_io_wait_for_free() about raciness */
diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c
index c719ba2727a..cc312b641ca 100644
--- a/src/backend/storage/aio/method_io_uring.c
+++ b/src/backend/storage/aio/method_io_uring.c
@@ -126,7 +126,7 @@ pgaio_uring_shmem_size(void)
 static void
 pgaio_uring_shmem_init(bool first_time)
 {
-	int			TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS - MAX_IO_WORKERS;
+	int			TotalProcs = pgaio_uring_procs();
 	bool		found;
 
 	pgaio_uring_contexts = (PgAioUringContext *)
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index f93131a645e..667aa0c0c78 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -7320,7 +7320,7 @@ buffer_readv_report(PgAioResult result, const PgAioTargetData *td,
 				affected_count > 1 ?
 				errdetail("Block %u held first zeroed page.",
 						  first + first_off) : 0,
-				errhint("See server log for details about the other %u invalid block(s).",
+				errhint("See server log for details about the other %d invalid block(s).",
 						affected_count + checkfail_count - 1));
 		return;
 	}
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 63101d56a07..ba26627f7b0 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -629,7 +629,7 @@ InvalidateLocalBuffer(BufferDesc *bufHdr, bool check_unreferenced)
 	 */
 	if (check_unreferenced &&
 		(LocalRefCount[bufid] != 0 || BUF_STATE_GET_REFCOUNT(buf_state) != 0))
-		elog(ERROR, "block %u of %s is still referenced (local %u)",
+		elog(ERROR, "block %u of %s is still referenced (local %d)",
 			 bufHdr->tag.blockNum,
 			 relpathbackend(BufTagGetRelFileLocator(&bufHdr->tag),
 							MyProcNumber,
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index f50962983c3..3f6bf70bd3c 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -717,7 +717,10 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid,
 		 * through, to avoid slowing down the normal case.)
 		 */
 		if (!first)
+		{
+			CHECK_FOR_INTERRUPTS();
 			pg_usleep(1000L);
+		}
 		first = false;
 		xid = SubTransGetTopmostTransaction(xid);
 	}
@@ -757,7 +760,10 @@ ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
 
 		/* See XactLockTableWait about this case */
 		if (!first)
+		{
+			CHECK_FOR_INTERRUPTS();
 			pg_usleep(1000L);
+		}
 		first = false;
 		xid = SubTransGetTopmostTransaction(xid);
 	}
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 86b06b9223f..2776ceb295b 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -51,7 +51,7 @@
 
 /* GUC variables */
 int			max_locks_per_xact; /* used to set the lock table size */
-bool		log_lock_failure = false;
+bool		log_lock_failures = false;
 
 #define NLOCKENTS() \
 	mul_size(max_locks_per_xact, add_size(MaxBackends, max_prepared_xacts))
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index c242c8170b5..2f8c3d5f918 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -1682,7 +1682,7 @@ exec_bind_message(StringInfo input_message)
 	{
 		Query	   *query = lfirst_node(Query, lc);
 
-		if (query->queryId != UINT64CONST(0))
+		if (query->queryId != INT64CONST(0))
 		{
 			pgstat_report_query_id(query->queryId, false);
 			break;
@@ -2034,7 +2034,7 @@ exec_bind_message(StringInfo input_message)
 	{
 		PlannedStmt *plan = lfirst_node(PlannedStmt, lc);
 
-		if (plan->planId != UINT64CONST(0))
+		if (plan->planId != INT64CONST(0))
 		{
 			pgstat_report_plan_id(plan->planId, false);
 			break;
@@ -2174,7 +2174,7 @@ exec_execute_message(const char *portal_name, long max_rows)
 	{
 		PlannedStmt *stmt = lfirst_node(PlannedStmt, lc);
 
-		if (stmt->queryId != UINT64CONST(0))
+		if (stmt->queryId != INT64CONST(0))
 		{
 			pgstat_report_query_id(stmt->queryId, false);
 			break;
@@ -2185,7 +2185,7 @@ exec_execute_message(const char *portal_name, long max_rows)
 	{
 		PlannedStmt *stmt = lfirst_node(PlannedStmt, lc);
 
-		if (stmt->planId != UINT64CONST(0))
+		if (stmt->planId != INT64CONST(0))
 		{
 			pgstat_report_plan_id(stmt->planId, false);
 			break;
diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c
index e1576e64b6d..a290cc4c975 100644
--- a/src/backend/utils/activity/backend_status.c
+++ b/src/backend/utils/activity/backend_status.c
@@ -320,8 +320,8 @@ pgstat_bestart_initial(void)
 	lbeentry.st_state = STATE_STARTING;
 	lbeentry.st_progress_command = PROGRESS_COMMAND_INVALID;
 	lbeentry.st_progress_command_target = InvalidOid;
-	lbeentry.st_query_id = UINT64CONST(0);
-	lbeentry.st_plan_id = UINT64CONST(0);
+	lbeentry.st_query_id = INT64CONST(0);
+	lbeentry.st_plan_id = INT64CONST(0);
 
 	/*
 	 * we don't zero st_progress_param here to save cycles; nobody should
@@ -599,8 +599,8 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
 			beentry->st_activity_start_timestamp = 0;
 			/* st_xact_start_timestamp and wait_event_info are also disabled */
 			beentry->st_xact_start_timestamp = 0;
-			beentry->st_query_id = UINT64CONST(0);
-			beentry->st_plan_id = UINT64CONST(0);
+			beentry->st_query_id = INT64CONST(0);
+			beentry->st_plan_id = INT64CONST(0);
 			proc->wait_event_info = 0;
 			PGSTAT_END_WRITE_ACTIVITY(beentry);
 		}
@@ -662,8 +662,8 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
 	 */
 	if (state == STATE_RUNNING)
 	{
-		beentry->st_query_id = UINT64CONST(0);
-		beentry->st_plan_id = UINT64CONST(0);
+		beentry->st_query_id = INT64CONST(0);
+		beentry->st_plan_id = INT64CONST(0);
 	}
 
 	if (cmd_str != NULL)
@@ -683,7 +683,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
  * --------
  */
 void
-pgstat_report_query_id(uint64 query_id, bool force)
+pgstat_report_query_id(int64 query_id, bool force)
 {
 	volatile PgBackendStatus *beentry = MyBEEntry;
 
@@ -702,7 +702,7 @@ pgstat_report_query_id(uint64 query_id, bool force)
 	 * command, so ignore the one provided unless it's an explicit call to
 	 * reset the identifier.
 	 */
-	if (beentry->st_query_id != 0 && !force)
+	if (beentry->st_query_id != INT64CONST(0) && !force)
 		return;
 
 	/*
@@ -722,7 +722,7 @@ pgstat_report_query_id(uint64 query_id, bool force)
  * --------
  */
 void
-pgstat_report_plan_id(uint64 plan_id, bool force)
+pgstat_report_plan_id(int64 plan_id, bool force)
 {
 	volatile PgBackendStatus *beentry = MyBEEntry;
 
@@ -1134,7 +1134,7 @@ pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen)
  *
  * Return current backend's query identifier.
  */
-uint64
+int64
 pgstat_get_my_query_id(void)
 {
 	if (!MyBEEntry)
@@ -1154,7 +1154,7 @@ pgstat_get_my_query_id(void)
  *
  * Return current backend's plan identifier.
  */
-uint64
+int64
 pgstat_get_my_plan_id(void)
 {
 	if (!MyBEEntry)
diff --git a/src/backend/utils/activity/pgstat_shmem.c b/src/backend/utils/activity/pgstat_shmem.c
index 2e33293b000..53e7d534270 100644
--- a/src/backend/utils/activity/pgstat_shmem.c
+++ b/src/backend/utils/activity/pgstat_shmem.c
@@ -183,7 +183,7 @@ StatsShmemInit(void)
 		p += MAXALIGN(pgstat_dsa_init_size());
 		dsa = dsa_create_in_place(ctl->raw_dsa_area,
 								  pgstat_dsa_init_size(),
-								  LWTRANCHE_PGSTATS_DSA, 0);
+								  LWTRANCHE_PGSTATS_DSA, NULL);
 		dsa_pin(dsa);
 
 		/*
@@ -255,7 +255,8 @@ pgstat_attach_shmem(void)
 	dsa_pin_mapping(pgStatLocal.dsa);
 
 	pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
-											pgStatLocal.shmem->hash_handle, 0);
+											pgStatLocal.shmem->hash_handle,
+											NULL);
 
 	MemoryContextSwitchTo(oldcontext);
 }
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index 5d9e04d6823..4da68312b5f 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -401,6 +401,7 @@ SerialSLRU	"Waiting to access the serializable transaction conflict SLRU cache."
 SubtransSLRU	"Waiting to access the sub-transaction SLRU cache."
 XactSLRU	"Waiting to access the transaction status SLRU cache."
 ParallelVacuumDSA	"Waiting for parallel vacuum dynamic shared memory allocation."
+AioUringCompletion	"Waiting for another process to complete IO via io_uring."
 
 # No "ABI_compatibility" region here as WaitEventLWLock has its own C code.
 
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 793d8a9adcc..680fee2a844 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -702,9 +702,18 @@ ParseFraction(char *cp, double *frac)
 	}
 	else
 	{
+		/*
+		 * On the other hand, let's reject anything that's not digits after
+		 * the ".".  strtod is happy with input like ".123e9", but that'd
+		 * break callers' expectation that the result is in 0..1.  (It's quite
+		 * difficult to get here with such input, but not impossible.)
+		 */
+		if (strspn(cp + 1, "0123456789") != strlen(cp + 1))
+			return DTERR_BAD_FORMAT;
+
 		errno = 0;
 		*frac = strtod(cp, &cp);
-		/* check for parse failure */
+		/* check for parse failure (probably redundant given prior check) */
 		if (*cp != '\0' || errno != 0)
 			return DTERR_BAD_FORMAT;
 	}
@@ -2959,30 +2968,27 @@ DecodeNumberField(int len, char *str, int fmask,
 	char	   *cp;
 
 	/*
+	 * This function was originally meant to cope only with DTK_NUMBER fields,
+	 * but we now sometimes abuse it to parse (parts of) DTK_DATE fields,
+	 * which can contain letters and other punctuation.  Reject if it's not a
+	 * valid DTK_NUMBER, that is digits and decimal point(s).  (ParseFraction
+	 * will reject if there's more than one decimal point.)
+	 */
+	if (strspn(str, "0123456789.") != len)
+		return DTERR_BAD_FORMAT;
+
+	/*
 	 * Have a decimal point? Then this is a date or something with a seconds
 	 * field...
 	 */
 	if ((cp = strchr(str, '.')) != NULL)
 	{
-		/*
-		 * Can we use ParseFractionalSecond here?  Not clear whether trailing
-		 * junk should be rejected ...
-		 */
-		if (cp[1] == '\0')
-		{
-			/* avoid assuming that strtod will accept "." */
-			*fsec = 0;
-		}
-		else
-		{
-			double		frac;
+		int			dterr;
 
-			errno = 0;
-			frac = strtod(cp, NULL);
-			if (errno != 0)
-				return DTERR_BAD_FORMAT;
-			*fsec = rint(frac * 1000000);
-		}
+		/* Convert the fraction and store at *fsec */
+		dterr = ParseFractionalSecond(cp, fsec);
+		if (dterr)
+			return dterr;
 		/* Now truncate off the fraction for further processing */
 		*cp = '\0';
 		len = strlen(str);
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 97af7c6554f..1c12ddbae49 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -640,10 +640,10 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
 				values[28] = BoolGetDatum(false);	/* GSS credentials not
 													 * delegated */
 			}
-			if (beentry->st_query_id == 0)
+			if (beentry->st_query_id == INT64CONST(0))
 				nulls[30] = true;
 			else
-				values[30] = UInt64GetDatum(beentry->st_query_id);
+				values[30] = Int64GetDatum(beentry->st_query_id);
 		}
 		else
 		{
@@ -1510,7 +1510,7 @@ pg_stat_io_build_tuples(ReturnSetInfo *rsinfo,
 							bktype_stats->bytes[io_obj][io_context][io_op];
 
 						/* Convert to numeric */
-						snprintf(buf, sizeof buf, UINT64_FORMAT, byte);
+						snprintf(buf, sizeof buf, INT64_FORMAT, byte);
 						values[byte_idx] = DirectFunctionCall3(numeric_in,
 															   CStringGetDatum(buf),
 															   ObjectIdGetDatum(0),
diff --git a/src/backend/utils/cache/funccache.c b/src/backend/utils/cache/funccache.c
index 150c502a612..afc048a051e 100644
--- a/src/backend/utils/cache/funccache.c
+++ b/src/backend/utils/cache/funccache.c
@@ -491,6 +491,7 @@ cached_function_compile(FunctionCallInfo fcinfo,
 	CachedFunctionHashKey hashkey;
 	bool		function_valid = false;
 	bool		hashkey_valid = false;
+	bool		new_function = false;
 
 	/*
 	 * Lookup the pg_proc tuple by Oid; we'll need it in any case
@@ -570,13 +571,15 @@ recheck:
 
 		/*
 		 * Create the new function struct, if not done already.  The function
-		 * structs are never thrown away, so keep them in TopMemoryContext.
+		 * cache entry will be kept for the life of the backend, so put it in
+		 * TopMemoryContext.
 		 */
 		Assert(cacheEntrySize >= sizeof(CachedFunction));
 		if (function == NULL)
 		{
 			function = (CachedFunction *)
 				MemoryContextAllocZero(TopMemoryContext, cacheEntrySize);
+			new_function = true;
 		}
 		else
 		{
@@ -585,17 +588,36 @@ recheck:
 		}
 
 		/*
-		 * Fill in the CachedFunction part.  fn_hashkey and use_count remain
-		 * zeroes for now.
+		 * However, if function compilation fails, we'd like not to leak the
+		 * function struct, so use a PG_TRY block to prevent that.  (It's up
+		 * to the compile callback function to avoid its own internal leakage
+		 * in such cases.)  Unfortunately, freeing the struct is only safe if
+		 * we just allocated it: otherwise there are probably fn_extra
+		 * pointers to it.
 		 */
-		function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
-		function->fn_tid = procTup->t_self;
-		function->dcallback = dcallback;
+		PG_TRY();
+		{
+			/*
+			 * Do the hard, language-specific part.
+			 */
+			ccallback(fcinfo, procTup, &hashkey, function, forValidator);
+		}
+		PG_CATCH();
+		{
+			if (new_function)
+				pfree(function);
+			PG_RE_THROW();
+		}
+		PG_END_TRY();
 
 		/*
-		 * Do the hard, language-specific part.
+		 * Fill in the CachedFunction part.  (We do this last to prevent the
+		 * function from looking valid before it's fully built.)  fn_hashkey
+		 * will be set by cfunc_hashtable_insert; use_count remains zero.
 		 */
-		ccallback(fcinfo, procTup, &hashkey, function, forValidator);
+		function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
+		function->fn_tid = procTup->t_self;
+		function->dcallback = dcallback;
 
 		/*
 		 * Add the completed struct to the hash table.
diff --git a/src/backend/utils/fmgr/dfmgr.c b/src/backend/utils/fmgr/dfmgr.c
index 603632581d0..4bb84ff7087 100644
--- a/src/backend/utils/fmgr/dfmgr.c
+++ b/src/backend/utils/fmgr/dfmgr.c
@@ -99,6 +99,14 @@ load_external_function(const char *filename, const char *funcname,
 	void	   *lib_handle;
 	void	   *retval;
 
+	/*
+	 * If the value starts with "$libdir/", strip that.  This is because many
+	 * extensions have hardcoded '$libdir/foo' as their library name, which
+	 * prevents using the path.
+	 */
+	if (strncmp(filename, "$libdir/", 8) == 0)
+		filename += 8;
+
 	/* Expand the possibly-abbreviated filename to an exact path name */
 	fullname = expand_dynamic_library_name(filename);
 
@@ -456,14 +464,6 @@ expand_dynamic_library_name(const char *name)
 
 	Assert(name);
 
-	/*
-	 * If the value starts with "$libdir/", strip that.  This is because many
-	 * extensions have hardcoded '$libdir/foo' as their library name, which
-	 * prevents using the path.
-	 */
-	if (strncmp(name, "$libdir/", 8) == 0)
-		name += 8;
-
 	have_slash = (first_dir_separator(name) != NULL);
 
 	if (!have_slash)
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 2f8cbd86759..f04bfedb2fd 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1602,11 +1602,11 @@ struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 	{
-		{"log_lock_failure", PGC_SUSET, LOGGING_WHAT,
+		{"log_lock_failures", PGC_SUSET, LOGGING_WHAT,
 			gettext_noop("Logs lock failures."),
 			NULL
 		},
-		&log_lock_failure,
+		&log_lock_failures,
 		false,
 		NULL, NULL, NULL
 	},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 87ce76b18f4..341f88adc87 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -624,7 +624,7 @@
 					#   %% = '%'
 					# e.g. '<%u%%%d> '
 #log_lock_waits = off			# log lock waits >= deadlock_timeout
-#log_lock_failure = off		# log lock failures
+#log_lock_failures = off		# log lock failures
 #log_recovery_conflict_waits = off	# log standby recovery conflict waits
 					# >= deadlock_timeout
 #log_parameter_max_length = -1		# when logging statements, limit logged