aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/brin/brin.c2
-rw-r--r--src/backend/access/common/reloptions.c17
-rw-r--r--src/backend/access/gist/gistutil.c14
-rw-r--r--src/backend/access/gist/gistvalidate.c6
-rw-r--r--src/backend/access/heap/heapam.c35
-rw-r--r--src/backend/access/heap/heapam_handler.c2
-rw-r--r--src/backend/access/heap/vacuumlazy.c3
-rw-r--r--src/backend/access/nbtree/nbtree.c30
-rw-r--r--src/backend/access/nbtree/nbtsearch.c70
-rw-r--r--src/backend/access/nbtree/nbtsort.c2
-rw-r--r--src/backend/access/nbtree/nbtutils.c93
-rw-r--r--src/backend/access/transam/xact.c28
-rw-r--r--src/backend/commands/dbcommands.c41
-rw-r--r--src/backend/commands/explain.c8
-rw-r--r--src/backend/commands/foreigncmds.c15
-rw-r--r--src/backend/commands/indexcmds.c2
-rw-r--r--src/backend/commands/tablecmds.c66
-rw-r--r--src/backend/commands/vacuumparallel.c2
-rw-r--r--src/backend/executor/nodeModifyTable.c130
-rw-r--r--src/backend/libpq/be-secure-gssapi.c61
-rw-r--r--src/backend/nodes/gen_node_support.pl5
-rw-r--r--src/backend/nodes/outfuncs.c6
-rw-r--r--src/backend/nodes/queryjumblefuncs.c22
-rw-r--r--src/backend/nodes/readfuncs.c6
-rw-r--r--src/backend/postmaster/autovacuum.c7
-rw-r--r--src/backend/replication/logical/launcher.c2
-rw-r--r--src/backend/replication/logical/worker.c24
-rw-r--r--src/backend/replication/walsender.c10
-rw-r--r--src/backend/rewrite/rewriteHandler.c2
-rw-r--r--src/backend/storage/aio/aio.c10
-rw-r--r--src/backend/storage/aio/method_io_uring.c2
-rw-r--r--src/backend/storage/buffer/bufmgr.c2
-rw-r--r--src/backend/storage/buffer/localbuf.c2
-rw-r--r--src/backend/storage/lmgr/lmgr.c6
-rw-r--r--src/backend/storage/lmgr/lock.c2
-rw-r--r--src/backend/tcop/postgres.c8
-rw-r--r--src/backend/utils/activity/backend_status.c22
-rw-r--r--src/backend/utils/activity/pgstat_shmem.c5
-rw-r--r--src/backend/utils/activity/wait_event_names.txt1
-rw-r--r--src/backend/utils/adt/datetime.c44
-rw-r--r--src/backend/utils/adt/pgstatfuncs.c6
-rw-r--r--src/backend/utils/cache/funccache.c38
-rw-r--r--src/backend/utils/fmgr/dfmgr.c16
-rw-r--r--src/backend/utils/misc/guc_tables.c4
-rw-r--r--src/backend/utils/misc/postgresql.conf.sample2
45 files changed, 640 insertions, 241 deletions
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 01e1db7f856..4204088fa0d 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -68,7 +68,7 @@ typedef struct BrinShared
int scantuplesortstates;
/* Query ID, for report in worker processes */
- uint64 queryid;
+ int64 queryid;
/*
* workersdonecv is used to monitor the progress of workers. All parallel
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 46c1dce222d..50747c16396 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -1243,8 +1243,9 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace,
}
else
{
- text *t;
+ const char *name;
const char *value;
+ text *t;
Size len;
/*
@@ -1291,11 +1292,19 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace,
* have just "name", assume "name=true" is meant. Note: the
* namespace is not output.
*/
+ name = def->defname;
if (def->arg != NULL)
value = defGetString(def);
else
value = "true";
+ /* Insist that name not contain "=", else "a=b=c" is ambiguous */
+ if (strchr(name, '=') != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid option name \"%s\": must not contain \"=\"",
+ name)));
+
/*
* This is not a great place for this test, but there's no other
* convenient place to filter the option out. As WITH (oids =
@@ -1303,7 +1312,7 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace,
* amount of ugly.
*/
if (acceptOidsOff && def->defnamespace == NULL &&
- strcmp(def->defname, "oids") == 0)
+ strcmp(name, "oids") == 0)
{
if (defGetBoolean(def))
ereport(ERROR,
@@ -1313,11 +1322,11 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace,
continue;
}
- len = VARHDRSZ + strlen(def->defname) + 1 + strlen(value);
+ len = VARHDRSZ + strlen(name) + 1 + strlen(value);
/* +1 leaves room for sprintf's trailing null */
t = (text *) palloc(len + 1);
SET_VARSIZE(t, len);
- sprintf(VARDATA(t), "%s=%s", def->defname, value);
+ sprintf(VARDATA(t), "%s=%s", name, value);
astate = accumArrayResult(astate, PointerGetDatum(t),
false, TEXTOID,
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index a6b701943d3..c0aa7d0222f 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -1058,11 +1058,11 @@ gistGetFakeLSN(Relation rel)
}
/*
- * This is a stratnum support function for GiST opclasses that use the
- * RT*StrategyNumber constants.
+ * This is a stratnum translation support function for GiST opclasses that use
+ * the RT*StrategyNumber constants.
*/
Datum
-gist_stratnum_common(PG_FUNCTION_ARGS)
+gist_translate_cmptype_common(PG_FUNCTION_ARGS)
{
CompareType cmptype = PG_GETARG_INT32(0);
@@ -1090,9 +1090,9 @@ gist_stratnum_common(PG_FUNCTION_ARGS)
/*
* Returns the opclass's private stratnum used for the given compare type.
*
- * Calls the opclass's GIST_STRATNUM_PROC support function, if any,
- * and returns the result.
- * Returns InvalidStrategy if the function is not defined.
+ * Calls the opclass's GIST_TRANSLATE_CMPTYPE_PROC support function, if any,
+ * and returns the result. Returns InvalidStrategy if the function is not
+ * defined.
*/
StrategyNumber
gisttranslatecmptype(CompareType cmptype, Oid opfamily)
@@ -1101,7 +1101,7 @@ gisttranslatecmptype(CompareType cmptype, Oid opfamily)
Datum result;
/* Check whether the function is provided. */
- funcid = get_opfamily_proc(opfamily, ANYOID, ANYOID, GIST_STRATNUM_PROC);
+ funcid = get_opfamily_proc(opfamily, ANYOID, ANYOID, GIST_TRANSLATE_CMPTYPE_PROC);
if (!OidIsValid(funcid))
return InvalidStrategy;
diff --git a/src/backend/access/gist/gistvalidate.c b/src/backend/access/gist/gistvalidate.c
index 2a49e6d20f0..2ed6f74fce9 100644
--- a/src/backend/access/gist/gistvalidate.c
+++ b/src/backend/access/gist/gistvalidate.c
@@ -138,7 +138,7 @@ gistvalidate(Oid opclassoid)
ok = check_amproc_signature(procform->amproc, VOIDOID, true,
1, 1, INTERNALOID);
break;
- case GIST_STRATNUM_PROC:
+ case GIST_TRANSLATE_CMPTYPE_PROC:
ok = check_amproc_signature(procform->amproc, INT2OID, true,
1, 1, INT4OID) &&
procform->amproclefttype == ANYOID &&
@@ -265,7 +265,7 @@ gistvalidate(Oid opclassoid)
if (i == GIST_DISTANCE_PROC || i == GIST_FETCH_PROC ||
i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC ||
i == GIST_OPTIONS_PROC || i == GIST_SORTSUPPORT_PROC ||
- i == GIST_STRATNUM_PROC)
+ i == GIST_TRANSLATE_CMPTYPE_PROC)
continue; /* optional methods */
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
@@ -336,7 +336,7 @@ gistadjustmembers(Oid opfamilyoid,
case GIST_FETCH_PROC:
case GIST_OPTIONS_PROC:
case GIST_SORTSUPPORT_PROC:
- case GIST_STRATNUM_PROC:
+ case GIST_TRANSLATE_CMPTYPE_PROC:
/* Optional, so force it to be a soft family dependency */
op->ref_is_hard = false;
op->ref_is_family = true;
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 9ec8cda1c68..0dcd6ee817e 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -213,6 +213,27 @@ static const int MultiXactStatusLock[MaxMultiXactStatus + 1] =
#define TUPLOCK_from_mxstatus(status) \
(MultiXactStatusLock[(status)])
+/*
+ * Check that we have a valid snapshot if we might need TOAST access.
+ */
+static inline void
+AssertHasSnapshotForToast(Relation rel)
+{
+#ifdef USE_ASSERT_CHECKING
+
+ /* bootstrap mode in particular breaks this rule */
+ if (!IsNormalProcessingMode())
+ return;
+
+ /* if the relation doesn't have a TOAST table, we are good */
+ if (!OidIsValid(rel->rd_rel->reltoastrelid))
+ return;
+
+ Assert(HaveRegisteredOrActiveSnapshot());
+
+#endif /* USE_ASSERT_CHECKING */
+}
+
/* ----------------------------------------------------------------
* heap support routines
* ----------------------------------------------------------------
@@ -2066,6 +2087,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
Assert(HeapTupleHeaderGetNatts(tup->t_data) <=
RelationGetNumberOfAttributes(relation));
+ AssertHasSnapshotForToast(relation);
+
/*
* Fill in tuple header fields and toast the tuple if necessary.
*
@@ -2343,6 +2366,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
/* currently not needed (thus unsupported) for heap_multi_insert() */
Assert(!(options & HEAP_INSERT_NO_LOGICAL));
+ AssertHasSnapshotForToast(relation);
+
needwal = RelationNeedsWAL(relation);
saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
HEAP_DEFAULT_FILLFACTOR);
@@ -2765,6 +2790,8 @@ heap_delete(Relation relation, ItemPointer tid,
Assert(ItemPointerIsValid(tid));
+ AssertHasSnapshotForToast(relation);
+
/*
* Forbid this during a parallel operation, lest it allocate a combo CID.
* Other workers might need that combo CID for visibility checks, and we
@@ -3260,6 +3287,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
Assert(HeapTupleHeaderGetNatts(newtup->t_data) <=
RelationGetNumberOfAttributes(relation));
+ AssertHasSnapshotForToast(relation);
+
/*
* Forbid this during a parallel operation, lest it allocate a combo CID.
* Other workers might need that combo CID for visibility checks, and we
@@ -4953,7 +4982,7 @@ l3:
case LockWaitError:
if (!ConditionalMultiXactIdWait((MultiXactId) xwait,
status, infomask, relation,
- NULL, log_lock_failure))
+ NULL, log_lock_failures))
ereport(ERROR,
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
errmsg("could not obtain lock on row in relation \"%s\"",
@@ -4991,7 +5020,7 @@ l3:
}
break;
case LockWaitError:
- if (!ConditionalXactLockTableWait(xwait, log_lock_failure))
+ if (!ConditionalXactLockTableWait(xwait, log_lock_failures))
ereport(ERROR,
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
errmsg("could not obtain lock on row in relation \"%s\"",
@@ -5256,7 +5285,7 @@ heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode,
break;
case LockWaitError:
- if (!ConditionalLockTupleTuplock(relation, tid, mode, log_lock_failure))
+ if (!ConditionalLockTupleTuplock(relation, tid, mode, log_lock_failures))
ereport(ERROR,
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
errmsg("could not obtain lock on row in relation \"%s\"",
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index ac082fefa77..cb4bc35c93e 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -464,7 +464,7 @@ tuple_lock_retry:
return TM_WouldBlock;
break;
case LockWaitError:
- if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, log_lock_failure))
+ if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, log_lock_failures))
ereport(ERROR,
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
errmsg("could not obtain lock on row in relation \"%s\"",
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 708674d8fcf..09416450af9 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -757,7 +757,6 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
vacrel->vm_new_visible_pages = 0;
vacrel->vm_new_visible_frozen_pages = 0;
vacrel->vm_new_frozen_pages = 0;
- vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
/*
* Get cutoffs that determine which deleted tuples are considered DEAD,
@@ -776,7 +775,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
* to increase the number of dead tuples it can prune away.)
*/
vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
+ vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
vacrel->vistest = GlobalVisTestFor(rel);
+
/* Initialize state used to track oldest extant XID/MXID */
vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 765659887af..03a1d7b027a 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -228,6 +228,8 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
BTScanOpaque so = (BTScanOpaque) scan->opaque;
bool res;
+ Assert(scan->heapRelation != NULL);
+
/* btree indexes are never lossy */
scan->xs_recheck = false;
@@ -289,6 +291,8 @@ btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
int64 ntids = 0;
ItemPointer heapTid;
+ Assert(scan->heapRelation == NULL);
+
/* Each loop iteration performs another primitive index scan */
do
{
@@ -393,6 +397,32 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
BTScanPosInvalidate(so->currPos);
}
+ /*
+ * We prefer to eagerly drop leaf page pins before btgettuple returns.
+ * This avoids making VACUUM wait to acquire a cleanup lock on the page.
+ *
+ * We cannot safely drop leaf page pins during index-only scans due to a
+ * race condition involving VACUUM setting pages all-visible in the VM.
+ * It's also unsafe for plain index scans that use a non-MVCC snapshot.
+ *
+ * When we drop pins eagerly, the mechanism that marks so->killedItems[]
+ * index tuples LP_DEAD has to deal with concurrent TID recycling races.
+ * The scheme used to detect unsafe TID recycling won't work when scanning
+ * unlogged relations (since it involves saving an affected page's LSN).
+ * Opt out of eager pin dropping during unlogged relation scans for now
+ * (this is preferable to opting out of kill_prior_tuple LP_DEAD setting).
+ *
+ * Also opt out of dropping leaf page pins eagerly during bitmap scans.
+ * Pins cannot be held for more than an instant during bitmap scans either
+ * way, so we might as well avoid wasting cycles on acquiring page LSNs.
+ *
+ * See nbtree/README section on making concurrent TID recycling safe.
+ */
+ so->dropPin = (!scan->xs_want_itup &&
+ IsMVCCSnapshot(scan->xs_snapshot) &&
+ RelationNeedsWAL(scan->indexRelation) &&
+ scan->heapRelation != NULL);
+
so->markItemIndex = -1;
so->needPrimScan = false;
so->scanBehind = false;
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index fe9a3886913..070f14c8b91 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -25,7 +25,7 @@
#include "utils/rel.h"
-static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp);
+static inline void _bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so);
static Buffer _bt_moveright(Relation rel, Relation heaprel, BTScanInsert key,
Buffer buf, bool forupdate, BTStack stack,
int access);
@@ -57,24 +57,29 @@ static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
/*
* _bt_drop_lock_and_maybe_pin()
*
- * Unlock the buffer; and if it is safe to release the pin, do that, too.
- * This will prevent vacuum from stalling in a blocked state trying to read a
- * page when a cursor is sitting on it.
- *
- * See nbtree/README section on making concurrent TID recycling safe.
+ * Unlock so->currPos.buf. If scan is so->dropPin, drop the pin, too.
+ * Dropping the pin prevents VACUUM from blocking on acquiring a cleanup lock.
*/
-static void
-_bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp)
+static inline void
+_bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so)
{
- _bt_unlockbuf(scan->indexRelation, sp->buf);
-
- if (IsMVCCSnapshot(scan->xs_snapshot) &&
- RelationNeedsWAL(scan->indexRelation) &&
- !scan->xs_want_itup)
+ if (!so->dropPin)
{
- ReleaseBuffer(sp->buf);
- sp->buf = InvalidBuffer;
+ /* Just drop the lock (not the pin) */
+ _bt_unlockbuf(rel, so->currPos.buf);
+ return;
}
+
+ /*
+ * Drop both the lock and the pin.
+ *
+ * Have to set so->currPos.lsn so that _bt_killitems has a way to detect
+ * when concurrent heap TID recycling by VACUUM might have taken place.
+ */
+ Assert(RelationNeedsWAL(rel));
+ so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf);
+ _bt_relbuf(rel, so->currPos.buf);
+ so->currPos.buf = InvalidBuffer;
}
/*
@@ -866,8 +871,8 @@ _bt_compare(Relation rel,
* if backwards scan, the last item) in the tree that satisfies the
* qualifications in the scan key. On success exit, data about the
* matching tuple(s) on the page has been loaded into so->currPos. We'll
- * drop all locks and hold onto a pin on page's buffer, except when
- * _bt_drop_lock_and_maybe_pin dropped the pin to avoid blocking VACUUM.
+ * drop all locks and hold onto a pin on page's buffer, except during
+ * so->dropPin scans, when we drop both the lock and the pin.
* _bt_returnitem sets the next item to return to scan on success exit.
*
* If there are no matching items in the index, we return false, with no
@@ -1610,7 +1615,13 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
so->currPos.currPage = BufferGetBlockNumber(so->currPos.buf);
so->currPos.prevPage = opaque->btpo_prev;
so->currPos.nextPage = opaque->btpo_next;
+ /* delay setting so->currPos.lsn until _bt_drop_lock_and_maybe_pin */
+ so->currPos.dir = dir;
+ so->currPos.nextTupleOffset = 0;
+ /* either moreRight or moreLeft should be set now (may be unset later) */
+ Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight :
+ so->currPos.moreLeft);
Assert(!P_IGNORE(opaque));
Assert(BTScanPosIsPinned(so->currPos));
Assert(!so->needPrimScan);
@@ -1626,14 +1637,6 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
so->currPos.currPage);
}
- /* initialize remaining currPos fields related to current page */
- so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf);
- so->currPos.dir = dir;
- so->currPos.nextTupleOffset = 0;
- /* either moreLeft or moreRight should be set now (may be unset later) */
- Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight :
- so->currPos.moreLeft);
-
PredicateLockPage(rel, so->currPos.currPage, scan->xs_snapshot);
/* initialize local variables */
@@ -2107,10 +2110,9 @@ _bt_returnitem(IndexScanDesc scan, BTScanOpaque so)
*
* Wrapper on _bt_readnextpage that performs final steps for the current page.
*
- * On entry, if so->currPos.buf is valid the buffer is pinned but not locked.
- * If there's no pin held, it's because _bt_drop_lock_and_maybe_pin dropped
- * the pin eagerly earlier on. The scan must have so->currPos.currPage set to
- * a valid block, in any case.
+ * On entry, so->currPos must be valid. Its buffer will be pinned, though
+ * never locked. (Actually, when so->dropPin there won't even be a pin held,
+ * though so->currPos.currPage must still be set to a valid block number.)
*/
static bool
_bt_steppage(IndexScanDesc scan, ScanDirection dir)
@@ -2251,12 +2253,14 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
*/
if (_bt_readpage(scan, dir, offnum, true))
{
+ Relation rel = scan->indexRelation;
+
/*
* _bt_readpage succeeded. Drop the lock (and maybe the pin) on
* so->currPos.buf in preparation for btgettuple returning tuples.
*/
Assert(BTScanPosIsPinned(so->currPos));
- _bt_drop_lock_and_maybe_pin(scan, &so->currPos);
+ _bt_drop_lock_and_maybe_pin(rel, so);
return true;
}
@@ -2294,8 +2298,8 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
*
* On success exit, so->currPos is updated to contain data from the next
* interesting page, and we return true. We hold a pin on the buffer on
- * success exit, except when _bt_drop_lock_and_maybe_pin decided it was safe
- * to eagerly drop the pin (to avoid blocking VACUUM).
+ * success exit (except during so->dropPin index scans, when we drop the pin
+ * eagerly to avoid blocking VACUUM).
*
* If there are no more matching records in the given direction, we drop all
* locks and pins, invalidate so->currPos, and return false.
@@ -2413,7 +2417,7 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno,
*/
Assert(so->currPos.currPage == blkno);
Assert(BTScanPosIsPinned(so->currPos));
- _bt_drop_lock_and_maybe_pin(scan, &so->currPos);
+ _bt_drop_lock_and_maybe_pin(rel, so);
return true;
}
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 3794cc924ad..9d70e89c1f3 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -105,7 +105,7 @@ typedef struct BTShared
int scantuplesortstates;
/* Query ID, for report in worker processes */
- uint64 queryid;
+ int64 queryid;
/*
* workersdonecv is used to monitor the progress of workers. All parallel
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index 1a15dfcb7d3..29f0dca1b08 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -63,7 +63,7 @@ static bool _bt_check_compare(IndexScanDesc scan, ScanDirection dir,
bool *continuescan, int *ikey);
static bool _bt_check_rowcompare(ScanKey skey,
IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
- ScanDirection dir, bool forcenonrequired, bool *continuescan);
+ ScanDirection dir, bool *continuescan);
static void _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate,
int tupnatts, TupleDesc tupdesc);
static int _bt_keep_natts(Relation rel, IndexTuple lastleft,
@@ -2902,8 +2902,10 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir,
/* row-comparison keys need special processing */
if (key->sk_flags & SK_ROW_HEADER)
{
+ Assert(!forcenonrequired); /* forbidden by _bt_set_startikey */
+
if (_bt_check_rowcompare(key, tuple, tupnatts, tupdesc, dir,
- forcenonrequired, continuescan))
+ continuescan))
continue;
return false;
}
@@ -3060,8 +3062,7 @@ _bt_check_compare(IndexScanDesc scan, ScanDirection dir,
*/
static bool
_bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
- TupleDesc tupdesc, ScanDirection dir,
- bool forcenonrequired, bool *continuescan)
+ TupleDesc tupdesc, ScanDirection dir, bool *continuescan)
{
ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
int32 cmpresult = 0;
@@ -3101,11 +3102,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
if (isNull)
{
- if (forcenonrequired)
- {
- /* treating scan's keys as non-required */
- }
- else if (subkey->sk_flags & SK_BT_NULLS_FIRST)
+ if (subkey->sk_flags & SK_BT_NULLS_FIRST)
{
/*
* Since NULLs are sorted before non-NULLs, we know we have
@@ -3159,12 +3156,8 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
*/
Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument));
subkey--;
- if (forcenonrequired)
- {
- /* treating scan's keys as non-required */
- }
- else if ((subkey->sk_flags & SK_BT_REQFWD) &&
- ScanDirectionIsForward(dir))
+ if ((subkey->sk_flags & SK_BT_REQFWD) &&
+ ScanDirectionIsForward(dir))
*continuescan = false;
else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
ScanDirectionIsBackward(dir))
@@ -3216,7 +3209,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
break;
}
- if (!result && !forcenonrequired)
+ if (!result)
{
/*
* Tuple fails this qual. If it's a required qual for the current
@@ -3342,75 +3335,71 @@ _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate,
*
* Note that if we hold a pin on the target page continuously from initially
* reading the items until applying this function, VACUUM cannot have deleted
- * any items from the page, and so there is no need to search left from the
- * recorded offset. (This observation also guarantees that the item is still
- * the right one to delete, which might otherwise be questionable since heap
- * TIDs can get recycled.) This holds true even if the page has been modified
- * by inserts and page splits, so there is no need to consult the LSN.
- *
- * If the pin was released after reading the page, then we re-read it. If it
- * has been modified since we read it (as determined by the LSN), we dare not
- * flag any entries because it is possible that the old entry was vacuumed
- * away and the TID was re-used by a completely different heap tuple.
+ * any items on the page, so the page's TIDs can't have been recycled by now.
+ * There's no risk that we'll confuse a new index tuple that happens to use a
+ * recycled TID with a now-removed tuple with the same TID (that used to be on
+ * this same page). We can't rely on that during scans that drop pins eagerly
+ * (so->dropPin scans), though, so we must condition setting LP_DEAD bits on
+ * the page LSN having not changed since back when _bt_readpage saw the page.
*/
void
_bt_killitems(IndexScanDesc scan)
{
+ Relation rel = scan->indexRelation;
BTScanOpaque so = (BTScanOpaque) scan->opaque;
Page page;
BTPageOpaque opaque;
OffsetNumber minoff;
OffsetNumber maxoff;
- int i;
int numKilled = so->numKilled;
bool killedsomething = false;
- bool droppedpin PG_USED_FOR_ASSERTS_ONLY;
+ Assert(numKilled > 0);
Assert(BTScanPosIsValid(so->currPos));
+ Assert(scan->heapRelation != NULL); /* can't be a bitmap index scan */
- /*
- * Always reset the scan state, so we don't look for same items on other
- * pages.
- */
+ /* Always invalidate so->killedItems[] before leaving so->currPos */
so->numKilled = 0;
- if (BTScanPosIsPinned(so->currPos))
+ if (!so->dropPin)
{
/*
* We have held the pin on this page since we read the index tuples,
* so all we need to do is lock it. The pin will have prevented
- * re-use of any TID on the page, so there is no need to check the
- * LSN.
+ * concurrent VACUUMs from recycling any of the TIDs on the page.
*/
- droppedpin = false;
- _bt_lockbuf(scan->indexRelation, so->currPos.buf, BT_READ);
-
- page = BufferGetPage(so->currPos.buf);
+ Assert(BTScanPosIsPinned(so->currPos));
+ _bt_lockbuf(rel, so->currPos.buf, BT_READ);
}
else
{
Buffer buf;
+ XLogRecPtr latestlsn;
- droppedpin = true;
- /* Attempt to re-read the buffer, getting pin and lock. */
- buf = _bt_getbuf(scan->indexRelation, so->currPos.currPage, BT_READ);
+ Assert(!BTScanPosIsPinned(so->currPos));
+ Assert(RelationNeedsWAL(rel));
+ buf = _bt_getbuf(rel, so->currPos.currPage, BT_READ);
- page = BufferGetPage(buf);
- if (BufferGetLSNAtomic(buf) == so->currPos.lsn)
- so->currPos.buf = buf;
- else
+ latestlsn = BufferGetLSNAtomic(buf);
+ Assert(!XLogRecPtrIsInvalid(so->currPos.lsn));
+ Assert(so->currPos.lsn <= latestlsn);
+ if (so->currPos.lsn != latestlsn)
{
- /* Modified while not pinned means hinting is not safe. */
- _bt_relbuf(scan->indexRelation, buf);
+ /* Modified, give up on hinting */
+ _bt_relbuf(rel, buf);
return;
}
+
+ /* Unmodified, hinting is safe */
+ so->currPos.buf = buf;
}
+ page = BufferGetPage(so->currPos.buf);
opaque = BTPageGetOpaque(page);
minoff = P_FIRSTDATAKEY(opaque);
maxoff = PageGetMaxOffsetNumber(page);
- for (i = 0; i < numKilled; i++)
+ for (int i = 0; i < numKilled; i++)
{
int itemIndex = so->killedItems[i];
BTScanPosItem *kitem = &so->currPos.items[itemIndex];
@@ -3442,7 +3431,7 @@ _bt_killitems(IndexScanDesc scan)
* correctness.
*
* Note that the page may have been modified in almost any way
- * since we first read it (in the !droppedpin case), so it's
+ * since we first read it (in the !so->dropPin case), so it's
* possible that this posting list tuple wasn't a posting list
* tuple when we first encountered its heap TIDs.
*/
@@ -3458,7 +3447,7 @@ _bt_killitems(IndexScanDesc scan)
* though only in the common case where the page can't
* have been concurrently modified
*/
- Assert(kitem->indexOffset == offnum || !droppedpin);
+ Assert(kitem->indexOffset == offnum || !so->dropPin);
/*
* Read-ahead to later kitems here.
@@ -3525,7 +3514,7 @@ _bt_killitems(IndexScanDesc scan)
MarkBufferDirtyHint(so->currPos.buf, true);
}
- _bt_unlockbuf(scan->indexRelation, so->currPos.buf);
+ _bt_unlockbuf(rel, so->currPos.buf);
}
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index b885513f765..2e67e998adb 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -1045,6 +1045,34 @@ TransactionStartedDuringRecovery(void)
}
/*
+ * GetTopReadOnlyTransactionNestLevel
+ *
+ * Note: this will return zero when not inside any transaction or when neither
+ * a top-level transaction nor subtransactions are read-only, one when the
+ * top-level transaction is read-only, two when one level of subtransaction is
+ * read-only, etc.
+ *
+ * Note: subtransactions of the topmost read-only transaction are also
+ * read-only, because they inherit read-only mode from the transaction, and
+ * thus can't change to read-write mode. See check_transaction_read_only().
+ */
+int
+GetTopReadOnlyTransactionNestLevel(void)
+{
+ TransactionState s = CurrentTransactionState;
+
+ if (!XactReadOnly)
+ return 0;
+ while (s->nestingLevel > 1)
+ {
+ if (!s->prevXactReadOnly)
+ return s->nestingLevel;
+ s = s->parent;
+ }
+ return s->nestingLevel;
+}
+
+/*
* EnterParallelMode
*/
void
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 5fbbcdaabb1..c95eb945016 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -1065,16 +1065,41 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
/* Check that the chosen locales are valid, and get canonical spellings */
if (!check_locale(LC_COLLATE, dbcollate, &canonname))
- ereport(ERROR,
- (errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate),
- errhint("If the locale name is specific to ICU, use ICU_LOCALE.")));
+ {
+ if (dblocprovider == COLLPROVIDER_BUILTIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate),
+ errhint("If the locale name is specific to the builtin provider, use BUILTIN_LOCALE.")));
+ else if (dblocprovider == COLLPROVIDER_ICU)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate),
+ errhint("If the locale name is specific to the ICU provider, use ICU_LOCALE.")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate)));
+ }
dbcollate = canonname;
if (!check_locale(LC_CTYPE, dbctype, &canonname))
- ereport(ERROR,
- (errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype),
- errhint("If the locale name is specific to ICU, use ICU_LOCALE.")));
+ {
+ if (dblocprovider == COLLPROVIDER_BUILTIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype),
+ errhint("If the locale name is specific to the builtin provider, use BUILTIN_LOCALE.")));
+ else if (dblocprovider == COLLPROVIDER_ICU)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype),
+ errhint("If the locale name is specific to the ICU provider, use ICU_LOCALE.")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype)));
+ }
+
dbctype = canonname;
check_encoding_locale_matches(encoding, dbcollate, dbctype);
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index bfa83fbc3fe..7e2792ead71 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -811,14 +811,10 @@ ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc)
* the queryid in any of the EXPLAIN plans to keep stable the results
* generated by regression test suites.
*/
- if (es->verbose && queryDesc->plannedstmt->queryId != UINT64CONST(0) &&
+ if (es->verbose && queryDesc->plannedstmt->queryId != INT64CONST(0) &&
compute_query_id != COMPUTE_QUERY_ID_REGRESS)
{
- /*
- * Output the queryid as an int64 rather than a uint64 so we match
- * what would be seen in the BIGINT pg_stat_statements.queryid column.
- */
- ExplainPropertyInteger("Query Identifier", NULL, (int64)
+ ExplainPropertyInteger("Query Identifier", NULL,
queryDesc->plannedstmt->queryId, es);
}
}
diff --git a/src/backend/commands/foreigncmds.c b/src/backend/commands/foreigncmds.c
index c14e038d54f..8d2d7431544 100644
--- a/src/backend/commands/foreigncmds.c
+++ b/src/backend/commands/foreigncmds.c
@@ -71,15 +71,26 @@ optionListToArray(List *options)
foreach(cell, options)
{
DefElem *def = lfirst(cell);
+ const char *name;
const char *value;
Size len;
text *t;
+ name = def->defname;
value = defGetString(def);
- len = VARHDRSZ + strlen(def->defname) + 1 + strlen(value);
+
+ /* Insist that name not contain "=", else "a=b=c" is ambiguous */
+ if (strchr(name, '=') != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid option name \"%s\": must not contain \"=\"",
+ name)));
+
+ len = VARHDRSZ + strlen(name) + 1 + strlen(value);
+ /* +1 leaves room for sprintf's trailing null */
t = palloc(len + 1);
SET_VARSIZE(t, len);
- sprintf(VARDATA(t), "%s=%s", def->defname, value);
+ sprintf(VARDATA(t), "%s=%s", name, value);
astate = accumArrayResult(astate, PointerGetDatum(t),
false, TEXTOID,
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index d962fe392cd..c3ec2076a52 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -4226,7 +4226,7 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein
false);
/*
- * Updating pg_index might involve TOAST table access, so ensure we
+ * Swapping the indexes might involve TOAST table access, so ensure we
* have a valid snapshot.
*/
PushActiveSnapshot(GetTransactionSnapshot());
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 54ad38247aa..ea96947d813 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -430,8 +430,8 @@ static void AlterConstrUpdateConstraintEntry(ATAlterConstraint *cmdcon, Relation
static ObjectAddress ATExecValidateConstraint(List **wqueue,
Relation rel, char *constrName,
bool recurse, bool recursing, LOCKMODE lockmode);
-static void QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
- HeapTuple contuple, LOCKMODE lockmode);
+static void QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation fkrel,
+ Oid pkrelid, HeapTuple contuple, LOCKMODE lockmode);
static void QueueCheckConstraintValidation(List **wqueue, Relation conrel, Relation rel,
char *constrName, HeapTuple contuple,
bool recurse, bool recursing, LOCKMODE lockmode);
@@ -11858,6 +11858,7 @@ AttachPartitionForeignKey(List **wqueue,
if (queueValidation)
{
Relation conrel;
+ Oid confrelid;
conrel = table_open(ConstraintRelationId, RowExclusiveLock);
@@ -11865,9 +11866,11 @@ AttachPartitionForeignKey(List **wqueue,
if (!HeapTupleIsValid(partcontup))
elog(ERROR, "cache lookup failed for constraint %u", partConstrOid);
+ confrelid = ((Form_pg_constraint) GETSTRUCT(partcontup))->confrelid;
+
/* Use the same lock as for AT_ValidateConstraint */
- QueueFKConstraintValidation(wqueue, conrel, partition, partcontup,
- ShareUpdateExclusiveLock);
+ QueueFKConstraintValidation(wqueue, conrel, partition, confrelid,
+ partcontup, ShareUpdateExclusiveLock);
ReleaseSysCache(partcontup);
table_close(conrel, RowExclusiveLock);
}
@@ -12463,9 +12466,12 @@ ATExecAlterConstrEnforceability(List **wqueue, ATAlterConstraint *cmdcon,
/*
* Tell Phase 3 to check that the constraint is satisfied by existing
- * rows.
+ * rows. Only applies to leaf partitions, and (for constraints that
+ * reference a partitioned table) only if this is not one of the
+ * pg_constraint rows that exist solely to support action triggers.
*/
- if (rel->rd_rel->relkind == RELKIND_RELATION)
+ if (rel->rd_rel->relkind == RELKIND_RELATION &&
+ currcon->confrelid == pkrelid)
{
AlteredTableInfo *tab;
NewConstraint *newcon;
@@ -12919,7 +12925,8 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
{
if (con->contype == CONSTRAINT_FOREIGN)
{
- QueueFKConstraintValidation(wqueue, conrel, rel, tuple, lockmode);
+ QueueFKConstraintValidation(wqueue, conrel, rel, con->confrelid,
+ tuple, lockmode);
}
else if (con->contype == CONSTRAINT_CHECK)
{
@@ -12952,8 +12959,8 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
* for the specified relation and all its children.
*/
static void
-QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
- HeapTuple contuple, LOCKMODE lockmode)
+QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation fkrel,
+ Oid pkrelid, HeapTuple contuple, LOCKMODE lockmode)
{
Form_pg_constraint con;
AlteredTableInfo *tab;
@@ -12964,7 +12971,17 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
Assert(con->contype == CONSTRAINT_FOREIGN);
Assert(!con->convalidated);
- if (rel->rd_rel->relkind == RELKIND_RELATION)
+ /*
+ * Add the validation to phase 3's queue; not needed for partitioned
+ * tables themselves, only for their partitions.
+ *
+ * When the referenced table (pkrelid) is partitioned, the referencing
+ * table (fkrel) has one pg_constraint row pointing to each partition
+ * thereof. These rows are there only to support action triggers and no
+ * table scan is needed, therefore skip this for them as well.
+ */
+ if (fkrel->rd_rel->relkind == RELKIND_RELATION &&
+ con->confrelid == pkrelid)
{
NewConstraint *newcon;
Constraint *fkconstraint;
@@ -12983,15 +13000,16 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
newcon->qual = (Node *) fkconstraint;
/* Find or create work queue entry for this table */
- tab = ATGetQueueEntry(wqueue, rel);
+ tab = ATGetQueueEntry(wqueue, fkrel);
tab->constraints = lappend(tab->constraints, newcon);
}
/*
* If the table at either end of the constraint is partitioned, we need to
- * recurse and handle every constraint that is a child of this constraint.
+ * recurse and handle every unvalidate constraint that is a child of this
+ * constraint.
*/
- if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
+ if (fkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
get_rel_relkind(con->confrelid) == RELKIND_PARTITIONED_TABLE)
{
ScanKeyData pkey;
@@ -13023,8 +13041,12 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
childrel = table_open(childcon->conrelid, lockmode);
- QueueFKConstraintValidation(wqueue, conrel, childrel, childtup,
- lockmode);
+ /*
+ * NB: Note that pkrelid should be passed as-is during recursion,
+ * as it is required to identify the root referenced table.
+ */
+ QueueFKConstraintValidation(wqueue, conrel, childrel, pkrelid,
+ childtup, lockmode);
table_close(childrel, NoLock);
}
@@ -13032,7 +13054,11 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
}
/*
- * Now update the catalog, while we have the door open.
+ * Now mark the pg_constraint row as validated (even if we didn't check,
+ * notably the ones for partitions on the referenced side).
+ *
+ * We rely on transaction abort to roll back this change if phase 3
+ * ultimately finds violating rows. This is a bit ugly.
*/
copyTuple = heap_copytuple(contuple);
copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple);
@@ -20964,9 +20990,17 @@ ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab, Relation rel,
tab->rel = rel;
}
+ /*
+ * Detaching the partition might involve TOAST table access, so ensure we
+ * have a valid snapshot.
+ */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
/* Do the final part of detaching */
DetachPartitionFinalize(rel, partRel, concurrent, defaultPartOid);
+ PopActiveSnapshot();
+
ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel));
/* keep our lock until commit */
diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c
index 2b9d548cdeb..0feea1d30ec 100644
--- a/src/backend/commands/vacuumparallel.c
+++ b/src/backend/commands/vacuumparallel.c
@@ -63,7 +63,7 @@ typedef struct PVShared
*/
Oid relid;
int elevel;
- uint64 queryid;
+ int64 queryid;
/*
* Fields for both index vacuum and cleanup.
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 2bc89bf84dc..54da8e7995b 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -64,6 +64,7 @@
#include "nodes/nodeFuncs.h"
#include "optimizer/optimizer.h"
#include "rewrite/rewriteHandler.h"
+#include "rewrite/rewriteManip.h"
#include "storage/lmgr.h"
#include "utils/builtins.h"
#include "utils/datum.h"
@@ -3735,6 +3736,7 @@ ExecInitMerge(ModifyTableState *mtstate, EState *estate)
switch (action->commandType)
{
case CMD_INSERT:
+ /* INSERT actions always use rootRelInfo */
ExecCheckPlanOutput(rootRelInfo->ri_RelationDesc,
action->targetList);
@@ -3774,9 +3776,23 @@ ExecInitMerge(ModifyTableState *mtstate, EState *estate)
}
else
{
- /* not partitioned? use the stock relation and slot */
- tgtslot = resultRelInfo->ri_newTupleSlot;
- tgtdesc = RelationGetDescr(resultRelInfo->ri_RelationDesc);
+ /*
+ * If the MERGE targets an inherited table, we insert
+ * into the root table, so we must initialize its
+ * "new" tuple slot, if not already done, and use its
+ * relation descriptor for the projection.
+ *
+ * For non-inherited tables, rootRelInfo and
+ * resultRelInfo are the same, and the "new" tuple
+ * slot will already have been initialized.
+ */
+ if (rootRelInfo->ri_newTupleSlot == NULL)
+ rootRelInfo->ri_newTupleSlot =
+ table_slot_create(rootRelInfo->ri_RelationDesc,
+ &estate->es_tupleTable);
+
+ tgtslot = rootRelInfo->ri_newTupleSlot;
+ tgtdesc = RelationGetDescr(rootRelInfo->ri_RelationDesc);
}
action_state->mas_proj =
@@ -3809,6 +3825,114 @@ ExecInitMerge(ModifyTableState *mtstate, EState *estate)
}
}
}
+
+ /*
+ * If the MERGE targets an inherited table, any INSERT actions will use
+ * rootRelInfo, and rootRelInfo will not be in the resultRelInfo array.
+ * Therefore we must initialize its WITH CHECK OPTION constraints and
+ * RETURNING projection, as ExecInitModifyTable did for the resultRelInfo
+ * entries.
+ *
+ * Note that the planner does not build a withCheckOptionList or
+ * returningList for the root relation, but as in ExecInitPartitionInfo,
+ * we can use the first resultRelInfo entry as a reference to calculate
+ * the attno's for the root table.
+ */
+ if (rootRelInfo != mtstate->resultRelInfo &&
+ rootRelInfo->ri_RelationDesc->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
+ (mtstate->mt_merge_subcommands & MERGE_INSERT) != 0)
+ {
+ ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
+ Relation rootRelation = rootRelInfo->ri_RelationDesc;
+ Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
+ int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
+ AttrMap *part_attmap = NULL;
+ bool found_whole_row;
+
+ if (node->withCheckOptionLists != NIL)
+ {
+ List *wcoList;
+ List *wcoExprs = NIL;
+
+ /* There should be as many WCO lists as result rels */
+ Assert(list_length(node->withCheckOptionLists) ==
+ list_length(node->resultRelations));
+
+ /*
+ * Use the first WCO list as a reference. In the most common case,
+ * this will be for the same relation as rootRelInfo, and so there
+ * will be no need to adjust its attno's.
+ */
+ wcoList = linitial(node->withCheckOptionLists);
+ if (rootRelation != firstResultRel)
+ {
+ /* Convert any Vars in it to contain the root's attno's */
+ part_attmap =
+ build_attrmap_by_name(RelationGetDescr(rootRelation),
+ RelationGetDescr(firstResultRel),
+ false);
+
+ wcoList = (List *)
+ map_variable_attnos((Node *) wcoList,
+ firstVarno, 0,
+ part_attmap,
+ RelationGetForm(rootRelation)->reltype,
+ &found_whole_row);
+ }
+
+ foreach(lc, wcoList)
+ {
+ WithCheckOption *wco = lfirst_node(WithCheckOption, lc);
+ ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
+ &mtstate->ps);
+
+ wcoExprs = lappend(wcoExprs, wcoExpr);
+ }
+
+ rootRelInfo->ri_WithCheckOptions = wcoList;
+ rootRelInfo->ri_WithCheckOptionExprs = wcoExprs;
+ }
+
+ if (node->returningLists != NIL)
+ {
+ List *returningList;
+
+ /* There should be as many returning lists as result rels */
+ Assert(list_length(node->returningLists) ==
+ list_length(node->resultRelations));
+
+ /*
+ * Use the first returning list as a reference. In the most common
+ * case, this will be for the same relation as rootRelInfo, and so
+ * there will be no need to adjust its attno's.
+ */
+ returningList = linitial(node->returningLists);
+ if (rootRelation != firstResultRel)
+ {
+ /* Convert any Vars in it to contain the root's attno's */
+ if (part_attmap == NULL)
+ part_attmap =
+ build_attrmap_by_name(RelationGetDescr(rootRelation),
+ RelationGetDescr(firstResultRel),
+ false);
+
+ returningList = (List *)
+ map_variable_attnos((Node *) returningList,
+ firstVarno, 0,
+ part_attmap,
+ RelationGetForm(rootRelation)->reltype,
+ &found_whole_row);
+ }
+ rootRelInfo->ri_returningList = returningList;
+
+ /* Initialize the RETURNING projection */
+ rootRelInfo->ri_projectReturning =
+ ExecBuildProjectionInfo(returningList, econtext,
+ mtstate->ps.ps_ResultTupleSlot,
+ &mtstate->ps,
+ RelationGetDescr(rootRelation));
+ }
+ }
}
/*
diff --git a/src/backend/libpq/be-secure-gssapi.c b/src/backend/libpq/be-secure-gssapi.c
index 717ba9824f9..3534f0b8111 100644
--- a/src/backend/libpq/be-secure-gssapi.c
+++ b/src/backend/libpq/be-secure-gssapi.c
@@ -46,11 +46,18 @@
* don't want the other side to send arbitrarily huge packets as we
* would have to allocate memory for them to then pass them to GSSAPI.
*
- * Therefore, these two #define's are effectively part of the protocol
+ * Therefore, this #define is effectively part of the protocol
* spec and can't ever be changed.
*/
-#define PQ_GSS_SEND_BUFFER_SIZE 16384
-#define PQ_GSS_RECV_BUFFER_SIZE 16384
+#define PQ_GSS_MAX_PACKET_SIZE 16384 /* includes uint32 header word */
+
+/*
+ * However, during the authentication exchange we must cope with whatever
+ * message size the GSSAPI library wants to send (because our protocol
+ * doesn't support splitting those messages). Depending on configuration
+ * those messages might be as much as 64kB.
+ */
+#define PQ_GSS_AUTH_BUFFER_SIZE 65536 /* includes uint32 header word */
/*
* Since we manage at most one GSS-encrypted connection per backend,
@@ -210,12 +217,12 @@ be_gssapi_write(Port *port, const void *ptr, size_t len)
errno = ECONNRESET;
return -1;
}
- if (output.length > PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32))
+ if (output.length > PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32))
{
ereport(COMMERROR,
(errmsg("server tried to send oversize GSSAPI packet (%zu > %zu)",
(size_t) output.length,
- PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32))));
+ PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32))));
errno = ECONNRESET;
return -1;
}
@@ -346,12 +353,12 @@ be_gssapi_read(Port *port, void *ptr, size_t len)
/* Decode the packet length and check for overlength packet */
input.length = pg_ntoh32(*(uint32 *) PqGSSRecvBuffer);
- if (input.length > PQ_GSS_RECV_BUFFER_SIZE - sizeof(uint32))
+ if (input.length > PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32))
{
ereport(COMMERROR,
(errmsg("oversize GSSAPI packet sent by the client (%zu > %zu)",
(size_t) input.length,
- PQ_GSS_RECV_BUFFER_SIZE - sizeof(uint32))));
+ PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32))));
errno = ECONNRESET;
return -1;
}
@@ -517,10 +524,13 @@ secure_open_gssapi(Port *port)
* that will never use them, and we ensure that the buffers are
* sufficiently aligned for the length-word accesses that we do in some
* places in this file.
+ *
+ * We'll use PQ_GSS_AUTH_BUFFER_SIZE-sized buffers until transport
+ * negotiation is complete, then switch to PQ_GSS_MAX_PACKET_SIZE.
*/
- PqGSSSendBuffer = malloc(PQ_GSS_SEND_BUFFER_SIZE);
- PqGSSRecvBuffer = malloc(PQ_GSS_RECV_BUFFER_SIZE);
- PqGSSResultBuffer = malloc(PQ_GSS_RECV_BUFFER_SIZE);
+ PqGSSSendBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE);
+ PqGSSRecvBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE);
+ PqGSSResultBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE);
if (!PqGSSSendBuffer || !PqGSSRecvBuffer || !PqGSSResultBuffer)
ereport(FATAL,
(errcode(ERRCODE_OUT_OF_MEMORY),
@@ -568,16 +578,16 @@ secure_open_gssapi(Port *port)
/*
* During initialization, packets are always fully consumed and
- * shouldn't ever be over PQ_GSS_RECV_BUFFER_SIZE in length.
+ * shouldn't ever be over PQ_GSS_AUTH_BUFFER_SIZE in total length.
*
* Verify on our side that the client doesn't do something funny.
*/
- if (input.length > PQ_GSS_RECV_BUFFER_SIZE)
+ if (input.length > PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32))
{
ereport(COMMERROR,
- (errmsg("oversize GSSAPI packet sent by the client (%zu > %d)",
+ (errmsg("oversize GSSAPI packet sent by the client (%zu > %zu)",
(size_t) input.length,
- PQ_GSS_RECV_BUFFER_SIZE)));
+ PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32))));
return -1;
}
@@ -631,12 +641,12 @@ secure_open_gssapi(Port *port)
{
uint32 netlen = pg_hton32(output.length);
- if (output.length > PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32))
+ if (output.length > PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32))
{
ereport(COMMERROR,
(errmsg("server tried to send oversize GSSAPI packet (%zu > %zu)",
(size_t) output.length,
- PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32))));
+ PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32))));
gss_release_buffer(&minor, &output);
return -1;
}
@@ -692,11 +702,28 @@ secure_open_gssapi(Port *port)
}
/*
+ * Release the large authentication buffers and allocate the ones we want
+ * for normal operation.
+ */
+ free(PqGSSSendBuffer);
+ free(PqGSSRecvBuffer);
+ free(PqGSSResultBuffer);
+ PqGSSSendBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE);
+ PqGSSRecvBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE);
+ PqGSSResultBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE);
+ if (!PqGSSSendBuffer || !PqGSSRecvBuffer || !PqGSSResultBuffer)
+ ereport(FATAL,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+ PqGSSSendLength = PqGSSSendNext = PqGSSSendConsumed = 0;
+ PqGSSRecvLength = PqGSSResultLength = PqGSSResultNext = 0;
+
+ /*
* Determine the max packet size which will fit in our buffer, after
* accounting for the length. be_gssapi_write will need this.
*/
major = gss_wrap_size_limit(&minor, port->gss->ctx, 1, GSS_C_QOP_DEFAULT,
- PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32),
+ PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32),
&PqGSSMaxPktSize);
if (GSS_ERROR(major))
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 77659b0f760..c8595109b0e 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -1039,6 +1039,11 @@ _read${n}(void)
print $off "\tWRITE_UINT_FIELD($f);\n";
print $rff "\tREAD_UINT_FIELD($f);\n" unless $no_read;
}
+ elsif ($t eq 'int64')
+ {
+ print $off "\tWRITE_INT64_FIELD($f);\n";
+ print $rff "\tREAD_INT64_FIELD($f);\n" unless $no_read;
+ }
elsif ($t eq 'uint64'
|| $t eq 'AclMode')
{
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index ceac3fd8620..25e08ba3426 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -51,6 +51,12 @@ static void outDouble(StringInfo str, double d);
#define WRITE_UINT_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " %u", node->fldname)
+/* Write a signed integer field (anything written with INT64_FORMAT) */
+#define WRITE_INT64_FIELD(fldname) \
+ appendStringInfo(str, \
+ " :" CppAsString(fldname) " " INT64_FORMAT, \
+ node->fldname)
+
/* Write an unsigned integer field (anything written with UINT64_FORMAT) */
#define WRITE_UINT64_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " " UINT64_FORMAT, \
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index d1e82a63f09..ac3cb3d9caf 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -56,7 +56,7 @@ int compute_query_id = COMPUTE_QUERY_ID_AUTO;
bool query_id_enabled = false;
static JumbleState *InitJumble(void);
-static uint64 DoJumble(JumbleState *jstate, Node *node);
+static int64 DoJumble(JumbleState *jstate, Node *node);
static void AppendJumble(JumbleState *jstate,
const unsigned char *value, Size size);
static void FlushPendingNulls(JumbleState *jstate);
@@ -141,12 +141,12 @@ JumbleQuery(Query *query)
* If we are unlucky enough to get a hash of zero, use 1 instead for
* normal statements and 2 for utility queries.
*/
- if (query->queryId == UINT64CONST(0))
+ if (query->queryId == INT64CONST(0))
{
if (query->utilityStmt)
- query->queryId = UINT64CONST(2);
+ query->queryId = INT64CONST(2);
else
- query->queryId = UINT64CONST(1);
+ query->queryId = INT64CONST(1);
}
return jstate;
@@ -197,7 +197,7 @@ InitJumble(void)
* Jumble the given Node using the given JumbleState and return the resulting
* jumble hash.
*/
-static uint64
+static int64
DoJumble(JumbleState *jstate, Node *node)
{
/* Jumble the given node */
@@ -208,9 +208,9 @@ DoJumble(JumbleState *jstate, Node *node)
FlushPendingNulls(jstate);
/* Process the jumble buffer and produce the hash value */
- return DatumGetUInt64(hash_any_extended(jstate->jumble,
- jstate->jumble_len,
- 0));
+ return DatumGetInt64(hash_any_extended(jstate->jumble,
+ jstate->jumble_len,
+ 0));
}
/*
@@ -256,10 +256,10 @@ AppendJumbleInternal(JumbleState *jstate, const unsigned char *item,
if (unlikely(jumble_len >= JUMBLE_SIZE))
{
- uint64 start_hash;
+ int64 start_hash;
- start_hash = DatumGetUInt64(hash_any_extended(jumble,
- JUMBLE_SIZE, 0));
+ start_hash = DatumGetInt64(hash_any_extended(jumble,
+ JUMBLE_SIZE, 0));
memcpy(jumble, &start_hash, sizeof(start_hash));
jumble_len = sizeof(start_hash);
}
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 64d3a09f765..8c90ab54af8 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -68,6 +68,12 @@
token = pg_strtok(&length); /* get field value */ \
local_node->fldname = atoui(token)
+/* Read a signed integer field (anything written using INT64_FORMAT) */
+#define READ_INT64_FIELD(fldname) \
+ token = pg_strtok(&length); /* skip :fldname */ \
+ token = pg_strtok(&length); /* get field value */ \
+ local_node->fldname = strtoi64(token, NULL, 10)
+
/* Read an unsigned integer field (anything written using UINT64_FORMAT) */
#define READ_UINT64_FIELD(fldname) \
token = pg_strtok(&length); /* skip :fldname */ \
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 981be42e3af..451fb90a610 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -2234,6 +2234,12 @@ do_autovacuum(void)
get_namespace_name(classForm->relnamespace),
NameStr(classForm->relname))));
+ /*
+ * Deletion might involve TOAST table access, so ensure we have a
+ * valid snapshot.
+ */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
object.classId = RelationRelationId;
object.objectId = relid;
object.objectSubId = 0;
@@ -2246,6 +2252,7 @@ do_autovacuum(void)
* To commit the deletion, end current transaction and start a new
* one. Note this also releases the locks we took.
*/
+ PopActiveSnapshot();
CommitTransactionCommand();
StartTransactionCommand();
diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c
index 10677da56b2..1c3c051403d 100644
--- a/src/backend/replication/logical/launcher.c
+++ b/src/backend/replication/logical/launcher.c
@@ -1016,7 +1016,7 @@ logicalrep_launcher_attach_dshmem(void)
last_start_times_dsa = dsa_attach(LogicalRepCtx->last_start_dsa);
dsa_pin_mapping(last_start_times_dsa);
last_start_times = dshash_attach(last_start_times_dsa, &dsh_params,
- LogicalRepCtx->last_start_dsh, 0);
+ LogicalRepCtx->last_start_dsh, NULL);
}
MemoryContextSwitchTo(oldcontext);
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 4151a4b2a96..a23262957ac 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -4626,8 +4626,16 @@ run_apply_worker()
walrcv_startstreaming(LogRepWorkerWalRcvConn, &options);
StartTransactionCommand();
+
+ /*
+ * Updating pg_subscription might involve TOAST table access, so
+ * ensure we have a valid snapshot.
+ */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
UpdateTwoPhaseState(MySubscription->oid, LOGICALREP_TWOPHASE_STATE_ENABLED);
MySubscription->twophasestate = LOGICALREP_TWOPHASE_STATE_ENABLED;
+ PopActiveSnapshot();
CommitTransactionCommand();
}
else
@@ -4843,7 +4851,15 @@ DisableSubscriptionAndExit(void)
/* Disable the subscription */
StartTransactionCommand();
+
+ /*
+ * Updating pg_subscription might involve TOAST table access, so ensure we
+ * have a valid snapshot.
+ */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
DisableSubscription(MySubscription->oid);
+ PopActiveSnapshot();
CommitTransactionCommand();
/* Ensure we remove no-longer-useful entry for worker's start time */
@@ -4948,6 +4964,12 @@ clear_subscription_skip_lsn(XLogRecPtr finish_lsn)
}
/*
+ * Updating pg_subscription might involve TOAST table access, so ensure we
+ * have a valid snapshot.
+ */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
+ /*
* Protect subskiplsn of pg_subscription from being concurrently updated
* while clearing it.
*/
@@ -5005,6 +5027,8 @@ clear_subscription_skip_lsn(XLogRecPtr finish_lsn)
heap_freetuple(tup);
table_close(rel, NoLock);
+ PopActiveSnapshot();
+
if (started_tx)
CommitTransactionCommand();
}
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 9fa8beb6103..f2c33250e8b 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -3449,8 +3449,16 @@ XLogSendLogical(void)
if (flushPtr == InvalidXLogRecPtr ||
logical_decoding_ctx->reader->EndRecPtr >= flushPtr)
{
+ /*
+ * For cascading logical WAL senders, we use the replay LSN instead of
+ * the flush LSN, since logical decoding on a standby only processes
+ * WAL that has been replayed. This distinction becomes particularly
+ * important during shutdown, as new WAL is no longer replayed and the
+ * last replayed LSN marks the furthest point up to which decoding can
+ * proceed.
+ */
if (am_cascading_walsender)
- flushPtr = GetStandbyFlushRecPtr(NULL);
+ flushPtr = GetXLogReplayRecPtr(NULL);
else
flushPtr = GetFlushRecPtr(NULL);
}
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index f0bce5f9ed9..2ef0e7fbf3a 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -4544,7 +4544,7 @@ build_generation_expression(Relation rel, int attrno)
List *
QueryRewrite(Query *parsetree)
{
- uint64 input_query_id = parsetree->queryId;
+ int64 input_query_id = parsetree->queryId;
List *querylist;
List *results;
ListCell *l;
diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c
index c64d815ebd1..6c6c0a908e2 100644
--- a/src/backend/storage/aio/aio.c
+++ b/src/backend/storage/aio/aio.c
@@ -752,7 +752,7 @@ pgaio_io_wait_for_free(void)
{
int reclaimed = 0;
- pgaio_debug(DEBUG2, "waiting for free IO with %d pending, %d in-flight, %d idle IOs",
+ pgaio_debug(DEBUG2, "waiting for free IO with %d pending, %u in-flight, %u idle IOs",
pgaio_my_backend->num_staged_ios,
dclist_count(&pgaio_my_backend->in_flight_ios),
dclist_count(&pgaio_my_backend->idle_ios));
@@ -797,7 +797,7 @@ pgaio_io_wait_for_free(void)
if (dclist_count(&pgaio_my_backend->in_flight_ios) == 0)
ereport(ERROR,
errmsg_internal("no free IOs despite no in-flight IOs"),
- errdetail_internal("%d pending, %d in-flight, %d idle IOs",
+ errdetail_internal("%d pending, %u in-flight, %u idle IOs",
pgaio_my_backend->num_staged_ios,
dclist_count(&pgaio_my_backend->in_flight_ios),
dclist_count(&pgaio_my_backend->idle_ios)));
@@ -828,7 +828,7 @@ pgaio_io_wait_for_free(void)
case PGAIO_HS_COMPLETED_IO:
case PGAIO_HS_SUBMITTED:
pgaio_debug_io(DEBUG2, ioh,
- "waiting for free io with %d in flight",
+ "waiting for free io with %u in flight",
dclist_count(&pgaio_my_backend->in_flight_ios));
/*
@@ -1252,7 +1252,7 @@ pgaio_closing_fd(int fd)
break;
pgaio_debug_io(DEBUG2, ioh,
- "waiting for IO before FD %d gets closed, %d in-flight IOs",
+ "waiting for IO before FD %d gets closed, %u in-flight IOs",
fd, dclist_count(&pgaio_my_backend->in_flight_ios));
/* see comment in pgaio_io_wait_for_free() about raciness */
@@ -1288,7 +1288,7 @@ pgaio_shutdown(int code, Datum arg)
uint64 generation = ioh->generation;
pgaio_debug_io(DEBUG2, ioh,
- "waiting for IO to complete during shutdown, %d in-flight IOs",
+ "waiting for IO to complete during shutdown, %u in-flight IOs",
dclist_count(&pgaio_my_backend->in_flight_ios));
/* see comment in pgaio_io_wait_for_free() about raciness */
diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c
index c719ba2727a..cc312b641ca 100644
--- a/src/backend/storage/aio/method_io_uring.c
+++ b/src/backend/storage/aio/method_io_uring.c
@@ -126,7 +126,7 @@ pgaio_uring_shmem_size(void)
static void
pgaio_uring_shmem_init(bool first_time)
{
- int TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS - MAX_IO_WORKERS;
+ int TotalProcs = pgaio_uring_procs();
bool found;
pgaio_uring_contexts = (PgAioUringContext *)
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index f93131a645e..667aa0c0c78 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -7320,7 +7320,7 @@ buffer_readv_report(PgAioResult result, const PgAioTargetData *td,
affected_count > 1 ?
errdetail("Block %u held first zeroed page.",
first + first_off) : 0,
- errhint("See server log for details about the other %u invalid block(s).",
+ errhint("See server log for details about the other %d invalid block(s).",
affected_count + checkfail_count - 1));
return;
}
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 63101d56a07..ba26627f7b0 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -629,7 +629,7 @@ InvalidateLocalBuffer(BufferDesc *bufHdr, bool check_unreferenced)
*/
if (check_unreferenced &&
(LocalRefCount[bufid] != 0 || BUF_STATE_GET_REFCOUNT(buf_state) != 0))
- elog(ERROR, "block %u of %s is still referenced (local %u)",
+ elog(ERROR, "block %u of %s is still referenced (local %d)",
bufHdr->tag.blockNum,
relpathbackend(BufTagGetRelFileLocator(&bufHdr->tag),
MyProcNumber,
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index f50962983c3..3f6bf70bd3c 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -717,7 +717,10 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid,
* through, to avoid slowing down the normal case.)
*/
if (!first)
+ {
+ CHECK_FOR_INTERRUPTS();
pg_usleep(1000L);
+ }
first = false;
xid = SubTransGetTopmostTransaction(xid);
}
@@ -757,7 +760,10 @@ ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
/* See XactLockTableWait about this case */
if (!first)
+ {
+ CHECK_FOR_INTERRUPTS();
pg_usleep(1000L);
+ }
first = false;
xid = SubTransGetTopmostTransaction(xid);
}
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 86b06b9223f..2776ceb295b 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -51,7 +51,7 @@
/* GUC variables */
int max_locks_per_xact; /* used to set the lock table size */
-bool log_lock_failure = false;
+bool log_lock_failures = false;
#define NLOCKENTS() \
mul_size(max_locks_per_xact, add_size(MaxBackends, max_prepared_xacts))
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index c242c8170b5..2f8c3d5f918 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -1682,7 +1682,7 @@ exec_bind_message(StringInfo input_message)
{
Query *query = lfirst_node(Query, lc);
- if (query->queryId != UINT64CONST(0))
+ if (query->queryId != INT64CONST(0))
{
pgstat_report_query_id(query->queryId, false);
break;
@@ -2034,7 +2034,7 @@ exec_bind_message(StringInfo input_message)
{
PlannedStmt *plan = lfirst_node(PlannedStmt, lc);
- if (plan->planId != UINT64CONST(0))
+ if (plan->planId != INT64CONST(0))
{
pgstat_report_plan_id(plan->planId, false);
break;
@@ -2174,7 +2174,7 @@ exec_execute_message(const char *portal_name, long max_rows)
{
PlannedStmt *stmt = lfirst_node(PlannedStmt, lc);
- if (stmt->queryId != UINT64CONST(0))
+ if (stmt->queryId != INT64CONST(0))
{
pgstat_report_query_id(stmt->queryId, false);
break;
@@ -2185,7 +2185,7 @@ exec_execute_message(const char *portal_name, long max_rows)
{
PlannedStmt *stmt = lfirst_node(PlannedStmt, lc);
- if (stmt->planId != UINT64CONST(0))
+ if (stmt->planId != INT64CONST(0))
{
pgstat_report_plan_id(stmt->planId, false);
break;
diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c
index e1576e64b6d..a290cc4c975 100644
--- a/src/backend/utils/activity/backend_status.c
+++ b/src/backend/utils/activity/backend_status.c
@@ -320,8 +320,8 @@ pgstat_bestart_initial(void)
lbeentry.st_state = STATE_STARTING;
lbeentry.st_progress_command = PROGRESS_COMMAND_INVALID;
lbeentry.st_progress_command_target = InvalidOid;
- lbeentry.st_query_id = UINT64CONST(0);
- lbeentry.st_plan_id = UINT64CONST(0);
+ lbeentry.st_query_id = INT64CONST(0);
+ lbeentry.st_plan_id = INT64CONST(0);
/*
* we don't zero st_progress_param here to save cycles; nobody should
@@ -599,8 +599,8 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
beentry->st_activity_start_timestamp = 0;
/* st_xact_start_timestamp and wait_event_info are also disabled */
beentry->st_xact_start_timestamp = 0;
- beentry->st_query_id = UINT64CONST(0);
- beentry->st_plan_id = UINT64CONST(0);
+ beentry->st_query_id = INT64CONST(0);
+ beentry->st_plan_id = INT64CONST(0);
proc->wait_event_info = 0;
PGSTAT_END_WRITE_ACTIVITY(beentry);
}
@@ -662,8 +662,8 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
*/
if (state == STATE_RUNNING)
{
- beentry->st_query_id = UINT64CONST(0);
- beentry->st_plan_id = UINT64CONST(0);
+ beentry->st_query_id = INT64CONST(0);
+ beentry->st_plan_id = INT64CONST(0);
}
if (cmd_str != NULL)
@@ -683,7 +683,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
* --------
*/
void
-pgstat_report_query_id(uint64 query_id, bool force)
+pgstat_report_query_id(int64 query_id, bool force)
{
volatile PgBackendStatus *beentry = MyBEEntry;
@@ -702,7 +702,7 @@ pgstat_report_query_id(uint64 query_id, bool force)
* command, so ignore the one provided unless it's an explicit call to
* reset the identifier.
*/
- if (beentry->st_query_id != 0 && !force)
+ if (beentry->st_query_id != INT64CONST(0) && !force)
return;
/*
@@ -722,7 +722,7 @@ pgstat_report_query_id(uint64 query_id, bool force)
* --------
*/
void
-pgstat_report_plan_id(uint64 plan_id, bool force)
+pgstat_report_plan_id(int64 plan_id, bool force)
{
volatile PgBackendStatus *beentry = MyBEEntry;
@@ -1134,7 +1134,7 @@ pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen)
*
* Return current backend's query identifier.
*/
-uint64
+int64
pgstat_get_my_query_id(void)
{
if (!MyBEEntry)
@@ -1154,7 +1154,7 @@ pgstat_get_my_query_id(void)
*
* Return current backend's plan identifier.
*/
-uint64
+int64
pgstat_get_my_plan_id(void)
{
if (!MyBEEntry)
diff --git a/src/backend/utils/activity/pgstat_shmem.c b/src/backend/utils/activity/pgstat_shmem.c
index 2e33293b000..53e7d534270 100644
--- a/src/backend/utils/activity/pgstat_shmem.c
+++ b/src/backend/utils/activity/pgstat_shmem.c
@@ -183,7 +183,7 @@ StatsShmemInit(void)
p += MAXALIGN(pgstat_dsa_init_size());
dsa = dsa_create_in_place(ctl->raw_dsa_area,
pgstat_dsa_init_size(),
- LWTRANCHE_PGSTATS_DSA, 0);
+ LWTRANCHE_PGSTATS_DSA, NULL);
dsa_pin(dsa);
/*
@@ -255,7 +255,8 @@ pgstat_attach_shmem(void)
dsa_pin_mapping(pgStatLocal.dsa);
pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
- pgStatLocal.shmem->hash_handle, 0);
+ pgStatLocal.shmem->hash_handle,
+ NULL);
MemoryContextSwitchTo(oldcontext);
}
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index 5d9e04d6823..4da68312b5f 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -401,6 +401,7 @@ SerialSLRU "Waiting to access the serializable transaction conflict SLRU cache."
SubtransSLRU "Waiting to access the sub-transaction SLRU cache."
XactSLRU "Waiting to access the transaction status SLRU cache."
ParallelVacuumDSA "Waiting for parallel vacuum dynamic shared memory allocation."
+AioUringCompletion "Waiting for another process to complete IO via io_uring."
# No "ABI_compatibility" region here as WaitEventLWLock has its own C code.
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 793d8a9adcc..680fee2a844 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -702,9 +702,18 @@ ParseFraction(char *cp, double *frac)
}
else
{
+ /*
+ * On the other hand, let's reject anything that's not digits after
+ * the ".". strtod is happy with input like ".123e9", but that'd
+ * break callers' expectation that the result is in 0..1. (It's quite
+ * difficult to get here with such input, but not impossible.)
+ */
+ if (strspn(cp + 1, "0123456789") != strlen(cp + 1))
+ return DTERR_BAD_FORMAT;
+
errno = 0;
*frac = strtod(cp, &cp);
- /* check for parse failure */
+ /* check for parse failure (probably redundant given prior check) */
if (*cp != '\0' || errno != 0)
return DTERR_BAD_FORMAT;
}
@@ -2959,30 +2968,27 @@ DecodeNumberField(int len, char *str, int fmask,
char *cp;
/*
+ * This function was originally meant to cope only with DTK_NUMBER fields,
+ * but we now sometimes abuse it to parse (parts of) DTK_DATE fields,
+ * which can contain letters and other punctuation. Reject if it's not a
+ * valid DTK_NUMBER, that is digits and decimal point(s). (ParseFraction
+ * will reject if there's more than one decimal point.)
+ */
+ if (strspn(str, "0123456789.") != len)
+ return DTERR_BAD_FORMAT;
+
+ /*
* Have a decimal point? Then this is a date or something with a seconds
* field...
*/
if ((cp = strchr(str, '.')) != NULL)
{
- /*
- * Can we use ParseFractionalSecond here? Not clear whether trailing
- * junk should be rejected ...
- */
- if (cp[1] == '\0')
- {
- /* avoid assuming that strtod will accept "." */
- *fsec = 0;
- }
- else
- {
- double frac;
+ int dterr;
- errno = 0;
- frac = strtod(cp, NULL);
- if (errno != 0)
- return DTERR_BAD_FORMAT;
- *fsec = rint(frac * 1000000);
- }
+ /* Convert the fraction and store at *fsec */
+ dterr = ParseFractionalSecond(cp, fsec);
+ if (dterr)
+ return dterr;
/* Now truncate off the fraction for further processing */
*cp = '\0';
len = strlen(str);
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 97af7c6554f..1c12ddbae49 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -640,10 +640,10 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
values[28] = BoolGetDatum(false); /* GSS credentials not
* delegated */
}
- if (beentry->st_query_id == 0)
+ if (beentry->st_query_id == INT64CONST(0))
nulls[30] = true;
else
- values[30] = UInt64GetDatum(beentry->st_query_id);
+ values[30] = Int64GetDatum(beentry->st_query_id);
}
else
{
@@ -1510,7 +1510,7 @@ pg_stat_io_build_tuples(ReturnSetInfo *rsinfo,
bktype_stats->bytes[io_obj][io_context][io_op];
/* Convert to numeric */
- snprintf(buf, sizeof buf, UINT64_FORMAT, byte);
+ snprintf(buf, sizeof buf, INT64_FORMAT, byte);
values[byte_idx] = DirectFunctionCall3(numeric_in,
CStringGetDatum(buf),
ObjectIdGetDatum(0),
diff --git a/src/backend/utils/cache/funccache.c b/src/backend/utils/cache/funccache.c
index 150c502a612..afc048a051e 100644
--- a/src/backend/utils/cache/funccache.c
+++ b/src/backend/utils/cache/funccache.c
@@ -491,6 +491,7 @@ cached_function_compile(FunctionCallInfo fcinfo,
CachedFunctionHashKey hashkey;
bool function_valid = false;
bool hashkey_valid = false;
+ bool new_function = false;
/*
* Lookup the pg_proc tuple by Oid; we'll need it in any case
@@ -570,13 +571,15 @@ recheck:
/*
* Create the new function struct, if not done already. The function
- * structs are never thrown away, so keep them in TopMemoryContext.
+ * cache entry will be kept for the life of the backend, so put it in
+ * TopMemoryContext.
*/
Assert(cacheEntrySize >= sizeof(CachedFunction));
if (function == NULL)
{
function = (CachedFunction *)
MemoryContextAllocZero(TopMemoryContext, cacheEntrySize);
+ new_function = true;
}
else
{
@@ -585,17 +588,36 @@ recheck:
}
/*
- * Fill in the CachedFunction part. fn_hashkey and use_count remain
- * zeroes for now.
+ * However, if function compilation fails, we'd like not to leak the
+ * function struct, so use a PG_TRY block to prevent that. (It's up
+ * to the compile callback function to avoid its own internal leakage
+ * in such cases.) Unfortunately, freeing the struct is only safe if
+ * we just allocated it: otherwise there are probably fn_extra
+ * pointers to it.
*/
- function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
- function->fn_tid = procTup->t_self;
- function->dcallback = dcallback;
+ PG_TRY();
+ {
+ /*
+ * Do the hard, language-specific part.
+ */
+ ccallback(fcinfo, procTup, &hashkey, function, forValidator);
+ }
+ PG_CATCH();
+ {
+ if (new_function)
+ pfree(function);
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
/*
- * Do the hard, language-specific part.
+ * Fill in the CachedFunction part. (We do this last to prevent the
+ * function from looking valid before it's fully built.) fn_hashkey
+ * will be set by cfunc_hashtable_insert; use_count remains zero.
*/
- ccallback(fcinfo, procTup, &hashkey, function, forValidator);
+ function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
+ function->fn_tid = procTup->t_self;
+ function->dcallback = dcallback;
/*
* Add the completed struct to the hash table.
diff --git a/src/backend/utils/fmgr/dfmgr.c b/src/backend/utils/fmgr/dfmgr.c
index 603632581d0..4bb84ff7087 100644
--- a/src/backend/utils/fmgr/dfmgr.c
+++ b/src/backend/utils/fmgr/dfmgr.c
@@ -99,6 +99,14 @@ load_external_function(const char *filename, const char *funcname,
void *lib_handle;
void *retval;
+ /*
+ * If the value starts with "$libdir/", strip that. This is because many
+ * extensions have hardcoded '$libdir/foo' as their library name, which
+ * prevents using the path.
+ */
+ if (strncmp(filename, "$libdir/", 8) == 0)
+ filename += 8;
+
/* Expand the possibly-abbreviated filename to an exact path name */
fullname = expand_dynamic_library_name(filename);
@@ -456,14 +464,6 @@ expand_dynamic_library_name(const char *name)
Assert(name);
- /*
- * If the value starts with "$libdir/", strip that. This is because many
- * extensions have hardcoded '$libdir/foo' as their library name, which
- * prevents using the path.
- */
- if (strncmp(name, "$libdir/", 8) == 0)
- name += 8;
-
have_slash = (first_dir_separator(name) != NULL);
if (!have_slash)
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 2f8cbd86759..f04bfedb2fd 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1602,11 +1602,11 @@ struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
{
- {"log_lock_failure", PGC_SUSET, LOGGING_WHAT,
+ {"log_lock_failures", PGC_SUSET, LOGGING_WHAT,
gettext_noop("Logs lock failures."),
NULL
},
- &log_lock_failure,
+ &log_lock_failures,
false,
NULL, NULL, NULL
},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 87ce76b18f4..341f88adc87 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -624,7 +624,7 @@
# %% = '%'
# e.g. '<%u%%%d> '
#log_lock_waits = off # log lock waits >= deadlock_timeout
-#log_lock_failure = off # log lock failures
+#log_lock_failures = off # log lock failures
#log_recovery_conflict_waits = off # log standby recovery conflict waits
# >= deadlock_timeout
#log_parameter_max_length = -1 # when logging statements, limit logged