aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/access/brin/brin.c2
-rw-r--r--src/backend/access/common/reloptions.c17
-rw-r--r--src/backend/access/common/tupdesc.c15
-rw-r--r--src/backend/access/gist/gistutil.c14
-rw-r--r--src/backend/access/gist/gistvalidate.c6
-rw-r--r--src/backend/access/heap/heapam.c35
-rw-r--r--src/backend/access/heap/heapam_handler.c2
-rw-r--r--src/backend/access/heap/heapam_xlog.c7
-rw-r--r--src/backend/access/heap/vacuumlazy.c148
-rw-r--r--src/backend/access/nbtree/nbtpreprocesskeys.c412
-rw-r--r--src/backend/access/nbtree/nbtree.c32
-rw-r--r--src/backend/access/nbtree/nbtsearch.c530
-rw-r--r--src/backend/access/nbtree/nbtsort.c2
-rw-r--r--src/backend/access/nbtree/nbtutils.c390
-rw-r--r--src/backend/access/rmgrdesc/xactdesc.c2
-rw-r--r--src/backend/access/transam/commit_ts.c7
-rw-r--r--src/backend/access/transam/xlog.c4
-rw-r--r--src/backend/access/transam/xlogrecovery.c18
-rw-r--r--src/backend/bootstrap/bootstrap.c2
-rw-r--r--src/backend/catalog/dependency.c11
-rw-r--r--src/backend/catalog/heap.c96
-rw-r--r--src/backend/catalog/index.c2
-rw-r--r--src/backend/catalog/system_views.sql7
-rw-r--r--src/backend/commands/analyze.c26
-rw-r--r--src/backend/commands/cluster.c2
-rw-r--r--src/backend/commands/copy.c42
-rw-r--r--src/backend/commands/copyfromparse.c19
-rw-r--r--src/backend/commands/copyto.c7
-rw-r--r--src/backend/commands/createas.c5
-rw-r--r--src/backend/commands/dbcommands.c41
-rw-r--r--src/backend/commands/explain.c34
-rw-r--r--src/backend/commands/extension.c4
-rw-r--r--src/backend/commands/foreigncmds.c15
-rw-r--r--src/backend/commands/indexcmds.c44
-rw-r--r--src/backend/commands/matview.c8
-rw-r--r--src/backend/commands/portalcmds.c1
-rw-r--r--src/backend/commands/prepare.c9
-rw-r--r--src/backend/commands/publicationcmds.c4
-rw-r--r--src/backend/commands/subscriptioncmds.c6
-rw-r--r--src/backend/commands/tablecmds.c148
-rw-r--r--src/backend/commands/trigger.c15
-rw-r--r--src/backend/commands/typecmds.c14
-rw-r--r--src/backend/commands/vacuum.c121
-rw-r--r--src/backend/commands/vacuumparallel.c2
-rw-r--r--src/backend/executor/README35
-rw-r--r--src/backend/executor/execGrouping.c4
-rw-r--r--src/backend/executor/execIndexing.c4
-rw-r--r--src/backend/executor/execMain.c127
-rw-r--r--src/backend/executor/execParallel.c12
-rw-r--r--src/backend/executor/execPartition.c66
-rw-r--r--src/backend/executor/execUtils.c1
-rw-r--r--src/backend/executor/functions.c5
-rw-r--r--src/backend/executor/nodeModifyTable.c140
-rw-r--r--src/backend/executor/nodeTidrangescan.c6
-rw-r--r--src/backend/executor/spi.c29
-rw-r--r--src/backend/jit/README2
-rw-r--r--src/backend/jit/llvm/meson.build2
-rw-r--r--src/backend/lib/README4
-rw-r--r--src/backend/libpq/be-secure-gssapi.c67
-rw-r--r--src/backend/libpq/be-secure-openssl.c4
-rw-r--r--src/backend/nodes/gen_node_support.pl7
-rw-r--r--src/backend/nodes/outfuncs.c8
-rw-r--r--src/backend/nodes/queryjumblefuncs.c302
-rw-r--r--src/backend/nodes/readfuncs.c8
-rw-r--r--src/backend/optimizer/path/joinpath.c56
-rw-r--r--src/backend/optimizer/path/joinrels.c60
-rw-r--r--src/backend/optimizer/plan/createplan.c70
-rw-r--r--src/backend/optimizer/plan/planner.c4
-rw-r--r--src/backend/optimizer/plan/setrefs.c10
-rw-r--r--src/backend/optimizer/util/clauses.c7
-rw-r--r--src/backend/optimizer/util/paramassign.c109
-rw-r--r--src/backend/optimizer/util/placeholder.c40
-rw-r--r--src/backend/parser/analyze.c95
-rw-r--r--src/backend/parser/gram.y276
-rw-r--r--src/backend/parser/parse_expr.c4
-rw-r--r--src/backend/parser/parse_utilcmd.c22
-rw-r--r--src/backend/postmaster/autovacuum.c63
-rw-r--r--src/backend/postmaster/checkpointer.c4
-rw-r--r--src/backend/postmaster/interrupt.c4
-rw-r--r--src/backend/postmaster/pgarch.c16
-rw-r--r--src/backend/postmaster/startup.c4
-rw-r--r--src/backend/postmaster/walsummarizer.c4
-rw-r--r--src/backend/regex/regc_pg_locale.c429
-rw-r--r--src/backend/replication/logical/launcher.c44
-rw-r--r--src/backend/replication/logical/logical.c41
-rw-r--r--src/backend/replication/logical/reorderbuffer.c198
-rw-r--r--src/backend/replication/logical/slotsync.c6
-rw-r--r--src/backend/replication/logical/snapbuild.c12
-rw-r--r--src/backend/replication/logical/tablesync.c19
-rw-r--r--src/backend/replication/logical/worker.c31
-rw-r--r--src/backend/replication/pgoutput/pgoutput.c2
-rw-r--r--src/backend/replication/slot.c64
-rw-r--r--src/backend/replication/walsender.c10
-rw-r--r--src/backend/rewrite/rewriteHandler.c7
-rw-r--r--src/backend/storage/aio/aio.c140
-rw-r--r--src/backend/storage/aio/aio_callback.c7
-rw-r--r--src/backend/storage/aio/aio_io.c4
-rw-r--r--src/backend/storage/aio/method_io_uring.c8
-rw-r--r--src/backend/storage/aio/method_worker.c7
-rw-r--r--src/backend/storage/buffer/bufmgr.c10
-rw-r--r--src/backend/storage/buffer/localbuf.c23
-rw-r--r--src/backend/storage/file/fd.c19
-rw-r--r--src/backend/storage/ipc/dsm_registry.c265
-rw-r--r--src/backend/storage/ipc/ipci.c3
-rw-r--r--src/backend/storage/ipc/procsignal.c3
-rw-r--r--src/backend/storage/ipc/shmem.c4
-rw-r--r--src/backend/storage/lmgr/lmgr.c6
-rw-r--r--src/backend/storage/lmgr/lock.c2
-rw-r--r--src/backend/storage/lmgr/lwlock.c2
-rw-r--r--src/backend/storage/lmgr/proc.c1
-rw-r--r--src/backend/tcop/backend_startup.c6
-rw-r--r--src/backend/tcop/postgres.c17
-rw-r--r--src/backend/tcop/pquery.c51
-rw-r--r--src/backend/tcop/utility.c12
-rw-r--r--src/backend/utils/activity/backend_status.c22
-rw-r--r--src/backend/utils/activity/pgstat_shmem.c5
-rw-r--r--src/backend/utils/activity/wait_event_names.txt2
-rw-r--r--src/backend/utils/adt/Makefile1
-rw-r--r--src/backend/utils/adt/bytea.c1143
-rw-r--r--src/backend/utils/adt/date.c86
-rw-r--r--src/backend/utils/adt/datetime.c44
-rw-r--r--src/backend/utils/adt/float.c22
-rw-r--r--src/backend/utils/adt/formatting.c5
-rw-r--r--src/backend/utils/adt/inet_net_pton.c3
-rw-r--r--src/backend/utils/adt/like.c22
-rw-r--r--src/backend/utils/adt/like_support.c7
-rw-r--r--src/backend/utils/adt/mcxtfuncs.c426
-rw-r--r--src/backend/utils/adt/meson.build1
-rw-r--r--src/backend/utils/adt/network.c2
-rw-r--r--src/backend/utils/adt/network_spgist.c1
-rw-r--r--src/backend/utils/adt/numeric.c20
-rw-r--r--src/backend/utils/adt/pg_locale.c123
-rw-r--r--src/backend/utils/adt/pg_locale_builtin.c113
-rw-r--r--src/backend/utils/adt/pg_locale_icu.c130
-rw-r--r--src/backend/utils/adt/pg_locale_libc.c337
-rw-r--r--src/backend/utils/adt/pgstatfuncs.c6
-rw-r--r--src/backend/utils/adt/regexp.c38
-rw-r--r--src/backend/utils/adt/regproc.c118
-rw-r--r--src/backend/utils/adt/ri_triggers.c2
-rw-r--r--src/backend/utils/adt/ruleutils.c10
-rw-r--r--src/backend/utils/adt/selfuncs.c3
-rw-r--r--src/backend/utils/adt/timestamp.c81
-rw-r--r--src/backend/utils/adt/varlena.c1098
-rw-r--r--src/backend/utils/adt/xml.c99
-rw-r--r--src/backend/utils/cache/catcache.c1
-rw-r--r--src/backend/utils/cache/funccache.c38
-rw-r--r--src/backend/utils/cache/plancache.c197
-rw-r--r--src/backend/utils/fmgr/dfmgr.c16
-rw-r--r--src/backend/utils/init/globals.c1
-rw-r--r--src/backend/utils/init/postinit.c7
-rw-r--r--src/backend/utils/mb/mbutils.c1
-rw-r--r--src/backend/utils/misc/guc_tables.c8
-rw-r--r--src/backend/utils/misc/injection_point.c46
-rw-r--r--src/backend/utils/misc/postgresql.conf.sample12
-rw-r--r--src/backend/utils/mmgr/alignedalloc.c29
-rw-r--r--src/backend/utils/mmgr/dsa.c15
-rw-r--r--src/backend/utils/mmgr/mcxt.c646
-rw-r--r--src/backend/utils/mmgr/portalmem.c4
158 files changed, 5563 insertions, 4890 deletions
diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c
index 01e1db7f856..4204088fa0d 100644
--- a/src/backend/access/brin/brin.c
+++ b/src/backend/access/brin/brin.c
@@ -68,7 +68,7 @@ typedef struct BrinShared
int scantuplesortstates;
/* Query ID, for report in worker processes */
- uint64 queryid;
+ int64 queryid;
/*
* workersdonecv is used to monitor the progress of workers. All parallel
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 46c1dce222d..50747c16396 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -1243,8 +1243,9 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace,
}
else
{
- text *t;
+ const char *name;
const char *value;
+ text *t;
Size len;
/*
@@ -1291,11 +1292,19 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace,
* have just "name", assume "name=true" is meant. Note: the
* namespace is not output.
*/
+ name = def->defname;
if (def->arg != NULL)
value = defGetString(def);
else
value = "true";
+ /* Insist that name not contain "=", else "a=b=c" is ambiguous */
+ if (strchr(name, '=') != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid option name \"%s\": must not contain \"=\"",
+ name)));
+
/*
* This is not a great place for this test, but there's no other
* convenient place to filter the option out. As WITH (oids =
@@ -1303,7 +1312,7 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace,
* amount of ugly.
*/
if (acceptOidsOff && def->defnamespace == NULL &&
- strcmp(def->defname, "oids") == 0)
+ strcmp(name, "oids") == 0)
{
if (defGetBoolean(def))
ereport(ERROR,
@@ -1313,11 +1322,11 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace,
continue;
}
- len = VARHDRSZ + strlen(def->defname) + 1 + strlen(value);
+ len = VARHDRSZ + strlen(name) + 1 + strlen(value);
/* +1 leaves room for sprintf's trailing null */
t = (text *) palloc(len + 1);
SET_VARSIZE(t, len);
- sprintf(VARDATA(t), "%s=%s", def->defname, value);
+ sprintf(VARDATA(t), "%s=%s", name, value);
astate = accumArrayResult(astate, PointerGetDatum(t),
false, TEXTOID,
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c
index ffd0c78f905..020d00cd01c 100644
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -142,11 +142,18 @@ void
verify_compact_attribute(TupleDesc tupdesc, int attnum)
{
#ifdef USE_ASSERT_CHECKING
- CompactAttribute *cattr = &tupdesc->compact_attrs[attnum];
+ CompactAttribute cattr;
Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum);
CompactAttribute tmp;
/*
+ * Make a temp copy of the TupleDesc's CompactAttribute. This may be a
+ * shared TupleDesc and the attcacheoff might get changed by another
+ * backend.
+ */
+ memcpy(&cattr, &tupdesc->compact_attrs[attnum], sizeof(CompactAttribute));
+
+ /*
* Populate the temporary CompactAttribute from the corresponding
* Form_pg_attribute
*/
@@ -156,11 +163,11 @@ verify_compact_attribute(TupleDesc tupdesc, int attnum)
* Make the attcacheoff match since it's been reset to -1 by
* populate_compact_attribute_internal. Same with attnullability.
*/
- tmp.attcacheoff = cattr->attcacheoff;
- tmp.attnullability = cattr->attnullability;
+ tmp.attcacheoff = cattr.attcacheoff;
+ tmp.attnullability = cattr.attnullability;
/* Check the freshly populated CompactAttribute matches the TupleDesc's */
- Assert(memcmp(&tmp, cattr, sizeof(CompactAttribute)) == 0);
+ Assert(memcmp(&tmp, &cattr, sizeof(CompactAttribute)) == 0);
#endif
}
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index a6b701943d3..c0aa7d0222f 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -1058,11 +1058,11 @@ gistGetFakeLSN(Relation rel)
}
/*
- * This is a stratnum support function for GiST opclasses that use the
- * RT*StrategyNumber constants.
+ * This is a stratnum translation support function for GiST opclasses that use
+ * the RT*StrategyNumber constants.
*/
Datum
-gist_stratnum_common(PG_FUNCTION_ARGS)
+gist_translate_cmptype_common(PG_FUNCTION_ARGS)
{
CompareType cmptype = PG_GETARG_INT32(0);
@@ -1090,9 +1090,9 @@ gist_stratnum_common(PG_FUNCTION_ARGS)
/*
* Returns the opclass's private stratnum used for the given compare type.
*
- * Calls the opclass's GIST_STRATNUM_PROC support function, if any,
- * and returns the result.
- * Returns InvalidStrategy if the function is not defined.
+ * Calls the opclass's GIST_TRANSLATE_CMPTYPE_PROC support function, if any,
+ * and returns the result. Returns InvalidStrategy if the function is not
+ * defined.
*/
StrategyNumber
gisttranslatecmptype(CompareType cmptype, Oid opfamily)
@@ -1101,7 +1101,7 @@ gisttranslatecmptype(CompareType cmptype, Oid opfamily)
Datum result;
/* Check whether the function is provided. */
- funcid = get_opfamily_proc(opfamily, ANYOID, ANYOID, GIST_STRATNUM_PROC);
+ funcid = get_opfamily_proc(opfamily, ANYOID, ANYOID, GIST_TRANSLATE_CMPTYPE_PROC);
if (!OidIsValid(funcid))
return InvalidStrategy;
diff --git a/src/backend/access/gist/gistvalidate.c b/src/backend/access/gist/gistvalidate.c
index 2a49e6d20f0..2ed6f74fce9 100644
--- a/src/backend/access/gist/gistvalidate.c
+++ b/src/backend/access/gist/gistvalidate.c
@@ -138,7 +138,7 @@ gistvalidate(Oid opclassoid)
ok = check_amproc_signature(procform->amproc, VOIDOID, true,
1, 1, INTERNALOID);
break;
- case GIST_STRATNUM_PROC:
+ case GIST_TRANSLATE_CMPTYPE_PROC:
ok = check_amproc_signature(procform->amproc, INT2OID, true,
1, 1, INT4OID) &&
procform->amproclefttype == ANYOID &&
@@ -265,7 +265,7 @@ gistvalidate(Oid opclassoid)
if (i == GIST_DISTANCE_PROC || i == GIST_FETCH_PROC ||
i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC ||
i == GIST_OPTIONS_PROC || i == GIST_SORTSUPPORT_PROC ||
- i == GIST_STRATNUM_PROC)
+ i == GIST_TRANSLATE_CMPTYPE_PROC)
continue; /* optional methods */
ereport(INFO,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
@@ -336,7 +336,7 @@ gistadjustmembers(Oid opfamilyoid,
case GIST_FETCH_PROC:
case GIST_OPTIONS_PROC:
case GIST_SORTSUPPORT_PROC:
- case GIST_STRATNUM_PROC:
+ case GIST_TRANSLATE_CMPTYPE_PROC:
/* Optional, so force it to be a soft family dependency */
op->ref_is_hard = false;
op->ref_is_family = true;
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 9ec8cda1c68..0dcd6ee817e 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -213,6 +213,27 @@ static const int MultiXactStatusLock[MaxMultiXactStatus + 1] =
#define TUPLOCK_from_mxstatus(status) \
(MultiXactStatusLock[(status)])
+/*
+ * Check that we have a valid snapshot if we might need TOAST access.
+ */
+static inline void
+AssertHasSnapshotForToast(Relation rel)
+{
+#ifdef USE_ASSERT_CHECKING
+
+ /* bootstrap mode in particular breaks this rule */
+ if (!IsNormalProcessingMode())
+ return;
+
+ /* if the relation doesn't have a TOAST table, we are good */
+ if (!OidIsValid(rel->rd_rel->reltoastrelid))
+ return;
+
+ Assert(HaveRegisteredOrActiveSnapshot());
+
+#endif /* USE_ASSERT_CHECKING */
+}
+
/* ----------------------------------------------------------------
* heap support routines
* ----------------------------------------------------------------
@@ -2066,6 +2087,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
Assert(HeapTupleHeaderGetNatts(tup->t_data) <=
RelationGetNumberOfAttributes(relation));
+ AssertHasSnapshotForToast(relation);
+
/*
* Fill in tuple header fields and toast the tuple if necessary.
*
@@ -2343,6 +2366,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
/* currently not needed (thus unsupported) for heap_multi_insert() */
Assert(!(options & HEAP_INSERT_NO_LOGICAL));
+ AssertHasSnapshotForToast(relation);
+
needwal = RelationNeedsWAL(relation);
saveFreeSpace = RelationGetTargetPageFreeSpace(relation,
HEAP_DEFAULT_FILLFACTOR);
@@ -2765,6 +2790,8 @@ heap_delete(Relation relation, ItemPointer tid,
Assert(ItemPointerIsValid(tid));
+ AssertHasSnapshotForToast(relation);
+
/*
* Forbid this during a parallel operation, lest it allocate a combo CID.
* Other workers might need that combo CID for visibility checks, and we
@@ -3260,6 +3287,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
Assert(HeapTupleHeaderGetNatts(newtup->t_data) <=
RelationGetNumberOfAttributes(relation));
+ AssertHasSnapshotForToast(relation);
+
/*
* Forbid this during a parallel operation, lest it allocate a combo CID.
* Other workers might need that combo CID for visibility checks, and we
@@ -4953,7 +4982,7 @@ l3:
case LockWaitError:
if (!ConditionalMultiXactIdWait((MultiXactId) xwait,
status, infomask, relation,
- NULL, log_lock_failure))
+ NULL, log_lock_failures))
ereport(ERROR,
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
errmsg("could not obtain lock on row in relation \"%s\"",
@@ -4991,7 +5020,7 @@ l3:
}
break;
case LockWaitError:
- if (!ConditionalXactLockTableWait(xwait, log_lock_failure))
+ if (!ConditionalXactLockTableWait(xwait, log_lock_failures))
ereport(ERROR,
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
errmsg("could not obtain lock on row in relation \"%s\"",
@@ -5256,7 +5285,7 @@ heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode,
break;
case LockWaitError:
- if (!ConditionalLockTupleTuplock(relation, tid, mode, log_lock_failure))
+ if (!ConditionalLockTupleTuplock(relation, tid, mode, log_lock_failures))
ereport(ERROR,
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
errmsg("could not obtain lock on row in relation \"%s\"",
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index ac082fefa77..cb4bc35c93e 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -464,7 +464,7 @@ tuple_lock_retry:
return TM_WouldBlock;
break;
case LockWaitError:
- if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, log_lock_failure))
+ if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, log_lock_failures))
ereport(ERROR,
(errcode(ERRCODE_LOCK_NOT_AVAILABLE),
errmsg("could not obtain lock on row in relation \"%s\"",
diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c
index 30f4c2d3c67..eb4bd3d6ae3 100644
--- a/src/backend/access/heap/heapam_xlog.c
+++ b/src/backend/access/heap/heapam_xlog.c
@@ -438,6 +438,9 @@ heap_xlog_insert(XLogReaderState *record)
ItemPointerSetBlockNumber(&target_tid, blkno);
ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum);
+ /* No freezing in the heap_insert() code path */
+ Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET));
+
/*
* The visibility map may need to be fixed even if the heap page is
* already up-to-date.
@@ -508,10 +511,6 @@ heap_xlog_insert(XLogReaderState *record)
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page);
- /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
- if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
- PageSetAllVisible(page);
-
MarkBufferDirty(buffer);
}
if (BufferIsValid(buffer))
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index f28326bad09..14036c27e87 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -423,7 +423,7 @@ typedef struct LVSavedErrInfo
/* non-export function prototypes */
static void lazy_scan_heap(LVRelState *vacrel);
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
- VacuumParams *params);
+ const VacuumParams params);
static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
void *callback_private_data,
void *per_buffer_data);
@@ -431,7 +431,7 @@ static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
BlockNumber blkno, Page page,
bool sharelock, Buffer vmbuffer);
-static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
+static int lazy_scan_prune(LVRelState *vacrel, Buffer buf,
BlockNumber blkno, Page page,
Buffer vmbuffer, bool all_visible_according_to_vm,
bool *has_lpdead_items, bool *vm_page_frozen);
@@ -485,7 +485,7 @@ static void restore_vacuum_error_info(LVRelState *vacrel,
* vacuum options or for relfrozenxid/relminmxid advancement.
*/
static void
-heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params)
+heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params)
{
uint32 randseed;
BlockNumber allvisible;
@@ -504,7 +504,7 @@ heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params)
vacrel->eager_scan_remaining_successes = 0;
/* If eager scanning is explicitly disabled, just return. */
- if (params->max_eager_freeze_failure_rate == 0)
+ if (params.max_eager_freeze_failure_rate == 0)
return;
/*
@@ -581,11 +581,11 @@ heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params)
vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
- Assert(params->max_eager_freeze_failure_rate > 0 &&
- params->max_eager_freeze_failure_rate <= 1);
+ Assert(params.max_eager_freeze_failure_rate > 0 &&
+ params.max_eager_freeze_failure_rate <= 1);
vacrel->eager_scan_max_fails_per_region =
- params->max_eager_freeze_failure_rate *
+ params.max_eager_freeze_failure_rate *
EAGER_SCAN_REGION_SIZE;
/*
@@ -612,7 +612,7 @@ heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params)
* and locked the relation.
*/
void
-heap_vacuum_rel(Relation rel, VacuumParams *params,
+heap_vacuum_rel(Relation rel, const VacuumParams params,
BufferAccessStrategy bstrategy)
{
LVRelState *vacrel;
@@ -634,9 +634,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
ErrorContextCallback errcallback;
char **indnames = NULL;
- verbose = (params->options & VACOPT_VERBOSE) != 0;
+ verbose = (params.options & VACOPT_VERBOSE) != 0;
instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
- params->log_min_duration >= 0));
+ params.log_min_duration >= 0));
if (instrument)
{
pg_rusage_init(&ru0);
@@ -699,9 +699,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
* The truncate param allows user to avoid attempting relation truncation,
* though it can't force truncation to happen.
*/
- Assert(params->index_cleanup != VACOPTVALUE_UNSPECIFIED);
- Assert(params->truncate != VACOPTVALUE_UNSPECIFIED &&
- params->truncate != VACOPTVALUE_AUTO);
+ Assert(params.index_cleanup != VACOPTVALUE_UNSPECIFIED);
+ Assert(params.truncate != VACOPTVALUE_UNSPECIFIED &&
+ params.truncate != VACOPTVALUE_AUTO);
/*
* While VacuumFailSafeActive is reset to false before calling this, we
@@ -711,14 +711,14 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
vacrel->consider_bypass_optimization = true;
vacrel->do_index_vacuuming = true;
vacrel->do_index_cleanup = true;
- vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED);
- if (params->index_cleanup == VACOPTVALUE_DISABLED)
+ vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED);
+ if (params.index_cleanup == VACOPTVALUE_DISABLED)
{
/* Force disable index vacuuming up-front */
vacrel->do_index_vacuuming = false;
vacrel->do_index_cleanup = false;
}
- else if (params->index_cleanup == VACOPTVALUE_ENABLED)
+ else if (params.index_cleanup == VACOPTVALUE_ENABLED)
{
/* Force index vacuuming. Note that failsafe can still bypass. */
vacrel->consider_bypass_optimization = false;
@@ -726,7 +726,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
else
{
/* Default/auto, make all decisions dynamically */
- Assert(params->index_cleanup == VACOPTVALUE_AUTO);
+ Assert(params.index_cleanup == VACOPTVALUE_AUTO);
}
/* Initialize page counters explicitly (be tidy) */
@@ -757,7 +757,6 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
vacrel->vm_new_visible_pages = 0;
vacrel->vm_new_visible_frozen_pages = 0;
vacrel->vm_new_frozen_pages = 0;
- vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
/*
* Get cutoffs that determine which deleted tuples are considered DEAD,
@@ -776,7 +775,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
* to increase the number of dead tuples it can prune away.)
*/
vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
+ vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
vacrel->vistest = GlobalVisTestFor(rel);
+
/* Initialize state used to track oldest extant XID/MXID */
vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
@@ -788,7 +789,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
*/
vacrel->skippedallvis = false;
skipwithvm = true;
- if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
+ if (params.options & VACOPT_DISABLE_PAGE_SKIPPING)
{
/*
* Force aggressive mode, and disable skipping blocks using the
@@ -829,7 +830,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
* is already dangerously old.)
*/
lazy_check_wraparound_failsafe(vacrel);
- dead_items_alloc(vacrel, params->nworkers);
+ dead_items_alloc(vacrel, params.nworkers);
/*
* Call lazy_scan_heap to perform all required heap pruning, index
@@ -946,9 +947,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
{
TimestampTz endtime = GetCurrentTimestamp();
- if (verbose || params->log_min_duration == 0 ||
+ if (verbose || params.log_min_duration == 0 ||
TimestampDifferenceExceeds(starttime, endtime,
- params->log_min_duration))
+ params.log_min_duration))
{
long secs_dur;
int usecs_dur;
@@ -983,10 +984,10 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
* Aggressiveness already reported earlier, in dedicated
* VACUUM VERBOSE ereport
*/
- Assert(!params->is_wraparound);
+ Assert(!params.is_wraparound);
msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n");
}
- else if (params->is_wraparound)
+ else if (params.is_wraparound)
{
/*
* While it's possible for a VACUUM to be both is_wraparound
@@ -1244,6 +1245,7 @@ lazy_scan_heap(LVRelState *vacrel)
Buffer buf;
Page page;
uint8 blk_info = 0;
+ int ndeleted = 0;
bool has_lpdead_items;
void *per_buffer_data = NULL;
bool vm_page_frozen = false;
@@ -1386,10 +1388,10 @@ lazy_scan_heap(LVRelState *vacrel)
* line pointers previously marked LP_DEAD.
*/
if (got_cleanup_lock)
- lazy_scan_prune(vacrel, buf, blkno, page,
- vmbuffer,
- blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM,
- &has_lpdead_items, &vm_page_frozen);
+ ndeleted = lazy_scan_prune(vacrel, buf, blkno, page,
+ vmbuffer,
+ blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM,
+ &has_lpdead_items, &vm_page_frozen);
/*
* Count an eagerly scanned page as a failure or a success.
@@ -1413,12 +1415,26 @@ lazy_scan_heap(LVRelState *vacrel)
if (vm_page_frozen)
{
- Assert(vacrel->eager_scan_remaining_successes > 0);
- vacrel->eager_scan_remaining_successes--;
+ if (vacrel->eager_scan_remaining_successes > 0)
+ vacrel->eager_scan_remaining_successes--;
if (vacrel->eager_scan_remaining_successes == 0)
{
/*
+ * Report only once that we disabled eager scanning. We
+ * may eagerly read ahead blocks in excess of the success
+ * or failure caps before attempting to freeze them, so we
+ * could reach here even after disabling additional eager
+ * scanning.
+ */
+ if (vacrel->eager_scan_max_fails_per_region > 0)
+ ereport(vacrel->verbose ? INFO : DEBUG2,
+ (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"",
+ orig_eager_scan_success_limit,
+ vacrel->dbname, vacrel->relnamespace,
+ vacrel->relname)));
+
+ /*
* If we hit our success cap, permanently disable eager
* scanning by setting the other eager scan management
* fields to their disabled values.
@@ -1426,19 +1442,10 @@ lazy_scan_heap(LVRelState *vacrel)
vacrel->eager_scan_remaining_fails = 0;
vacrel->next_eager_scan_region_start = InvalidBlockNumber;
vacrel->eager_scan_max_fails_per_region = 0;
-
- ereport(vacrel->verbose ? INFO : DEBUG2,
- (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of \"%s.%s.%s\"",
- orig_eager_scan_success_limit,
- vacrel->dbname, vacrel->relnamespace,
- vacrel->relname)));
}
}
- else
- {
- Assert(vacrel->eager_scan_remaining_fails > 0);
+ else if (vacrel->eager_scan_remaining_fails > 0)
vacrel->eager_scan_remaining_fails--;
- }
}
/*
@@ -1475,7 +1482,7 @@ lazy_scan_heap(LVRelState *vacrel)
* table has indexes. There will only be newly-freed space if we
* held the cleanup lock and lazy_scan_prune() was called.
*/
- if (got_cleanup_lock && vacrel->nindexes == 0 && has_lpdead_items &&
+ if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 &&
blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
{
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
@@ -1866,8 +1873,6 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
*/
if (!PageIsAllVisible(page))
{
- uint8 old_vmbits;
-
START_CRIT_SECTION();
/* mark buffer dirty before writing a WAL record */
@@ -1887,24 +1892,16 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
log_newpage_buffer(buf, true);
PageSetAllVisible(page);
- old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf,
- InvalidXLogRecPtr,
- vmbuffer, InvalidTransactionId,
- VISIBILITYMAP_ALL_VISIBLE |
- VISIBILITYMAP_ALL_FROZEN);
+ visibilitymap_set(vacrel->rel, blkno, buf,
+ InvalidXLogRecPtr,
+ vmbuffer, InvalidTransactionId,
+ VISIBILITYMAP_ALL_VISIBLE |
+ VISIBILITYMAP_ALL_FROZEN);
END_CRIT_SECTION();
- /*
- * If the page wasn't already set all-visible and/or all-frozen in
- * the VM, count it as newly set for logging.
- */
- if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
- {
- vacrel->vm_new_visible_pages++;
- vacrel->vm_new_visible_frozen_pages++;
- }
- else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0)
- vacrel->vm_new_frozen_pages++;
+ /* Count the newly all-frozen pages for logging */
+ vacrel->vm_new_visible_pages++;
+ vacrel->vm_new_visible_frozen_pages++;
}
freespace = PageGetHeapFreeSpace(page);
@@ -1940,8 +1937,10 @@ cmpOffsetNumbers(const void *a, const void *b)
* *vm_page_frozen is set to true if the page is newly set all-frozen in the
* VM. The caller currently only uses this for determining whether an eagerly
* scanned page was successfully set all-frozen.
+ *
+ * Returns the number of tuples deleted from the page during HOT pruning.
*/
-static void
+static int
lazy_scan_prune(LVRelState *vacrel,
Buffer buf,
BlockNumber blkno,
@@ -2212,6 +2211,8 @@ lazy_scan_prune(LVRelState *vacrel,
*vm_page_frozen = true;
}
}
+
+ return presult.ndeleted;
}
/*
@@ -2909,7 +2910,6 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid,
&all_frozen))
{
- uint8 old_vmbits;
uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
if (all_frozen)
@@ -2919,25 +2919,15 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
}
PageSetAllVisible(page);
- old_vmbits = visibilitymap_set(vacrel->rel, blkno, buffer,
- InvalidXLogRecPtr,
- vmbuffer, visibility_cutoff_xid,
- flags);
-
- /*
- * If the page wasn't already set all-visible and/or all-frozen in the
- * VM, count it as newly set for logging.
- */
- if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0)
- {
- vacrel->vm_new_visible_pages++;
- if (all_frozen)
- vacrel->vm_new_visible_frozen_pages++;
- }
+ visibilitymap_set(vacrel->rel, blkno, buffer,
+ InvalidXLogRecPtr,
+ vmbuffer, visibility_cutoff_xid,
+ flags);
- else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
- all_frozen)
- vacrel->vm_new_frozen_pages++;
+ /* Count the newly set VM page for logging */
+ vacrel->vm_new_visible_pages++;
+ if (all_frozen)
+ vacrel->vm_new_visible_frozen_pages++;
}
/* Revert to the previous phase information for error traceback */
diff --git a/src/backend/access/nbtree/nbtpreprocesskeys.c b/src/backend/access/nbtree/nbtpreprocesskeys.c
index a136e4bbfdf..21c519cd108 100644
--- a/src/backend/access/nbtree/nbtpreprocesskeys.c
+++ b/src/backend/access/nbtree/nbtpreprocesskeys.c
@@ -16,6 +16,7 @@
#include "postgres.h"
#include "access/nbtree.h"
+#include "common/int.h"
#include "lib/qunique.h"
#include "utils/array.h"
#include "utils/lsyscache.h"
@@ -56,6 +57,8 @@ static void _bt_skiparray_strat_decrement(IndexScanDesc scan, ScanKey arraysk,
BTArrayKeyInfo *array);
static void _bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk,
BTArrayKeyInfo *array);
+static void _bt_unmark_keys(IndexScanDesc scan, int *keyDataMap);
+static int _bt_reorder_array_cmp(const void *a, const void *b);
static ScanKey _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys);
static void _bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap);
static int _bt_num_array_keys(IndexScanDesc scan, Oid *skip_eq_ops_out,
@@ -96,7 +99,7 @@ static int _bt_compare_array_elements(const void *a, const void *b, void *arg);
* incomplete sets of cross-type operators, we may fail to detect redundant
* or contradictory keys, but we can survive that.)
*
- * The output keys must be sorted by index attribute. Presently we expect
+ * Required output keys are sorted by index attribute. Presently we expect
* (but verify) that the input keys are already so sorted --- this is done
* by match_clauses_to_index() in indxpath.c. Some reordering of the keys
* within each attribute may be done as a byproduct of the processing here.
@@ -127,29 +130,36 @@ static int _bt_compare_array_elements(const void *a, const void *b, void *arg);
* This has the potential to be much more efficient than a full index scan
* (though it behaves like a full scan when there's many distinct "x" values).
*
- * If possible, redundant keys are eliminated: we keep only the tightest
+ * Typically, redundant keys are eliminated: we keep only the tightest
* >/>= bound and the tightest </<= bound, and if there's an = key then
* that's the only one returned. (So, we return either a single = key,
* or one or two boundary-condition keys for each attr.) However, if we
* cannot compare two keys for lack of a suitable cross-type operator,
- * we cannot eliminate either. If there are two such keys of the same
- * operator strategy, the second one is just pushed into the output array
- * without further processing here. We may also emit both >/>= or both
- * </<= keys if we can't compare them. The logic about required keys still
- * works if we don't eliminate redundant keys.
- *
- * Note that one reason we need direction-sensitive required-key flags is
- * precisely that we may not be able to eliminate redundant keys. Suppose
- * we have "x > 4::int AND x > 10::bigint", and we are unable to determine
- * which key is more restrictive for lack of a suitable cross-type operator.
- * _bt_first will arbitrarily pick one of the keys to do the initial
- * positioning with. If it picks x > 4, then the x > 10 condition will fail
- * until we reach index entries > 10; but we can't stop the scan just because
- * x > 10 is failing. On the other hand, if we are scanning backwards, then
- * failure of either key is indeed enough to stop the scan. (In general, when
- * inequality keys are present, the initial-positioning code only promises to
- * position before the first possible match, not exactly at the first match,
- * for a forward scan; or after the last match for a backward scan.)
+ * we cannot eliminate either key.
+ *
+ * When all redundant keys could not be eliminated, we'll output a key array
+ * that can more or less be treated as if it had no redundant keys. Suppose
+ * we have "x > 4::int AND x > 10::bigint AND x < 70", and we are unable to
+ * determine which > key is more restrictive for lack of a suitable cross-type
+ * operator. We'll arbitrarily pick one of the > keys; the other > key won't
+ * be marked required. Obviously, the scan will be less efficient if we
+ * choose x > 4 over x > 10 -- but it can still largely proceed as if there
+ * was only a single > condition. "x > 10" will be placed at the end of the
+ * so->keyData[] output array. It'll always be evaluated last, after the keys
+ * that could be marked required in the usual way (after "x > 4 AND x < 70").
+ * This can sometimes result in so->keyData[] keys that aren't even in index
+ * attribute order (if the qual involves multiple attributes). The scan's
+ * required keys will still be in attribute order, though, so it can't matter.
+ *
+ * This scheme ensures that _bt_first always uses the same set of keys at the
+ * start of a forwards scan as those _bt_checkkeys uses to determine when to
+ * end a similar backwards scan (and vice-versa). _bt_advance_array_keys
+ * depends on this: it expects to be able to reliably predict what the next
+ * _bt_first call will do by testing whether _bt_checkkeys' routines report
+ * that the final tuple on the page is past the end of matches for the scan's
+ * keys with the scan direction flipped. If it is (if continuescan=false),
+ * then it follows that calling _bt_first will, at a minimum, relocate the
+ * scan to the very next leaf page (in the current scan direction).
*
* As a byproduct of this work, we can detect contradictory quals such
* as "x = 1 AND x > 2". If we see that, we return so->qual_ok = false,
@@ -188,7 +198,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
int numberOfEqualCols;
ScanKey inkeys;
BTScanKeyPreproc xform[BTMaxStrategyNumber];
- bool test_result;
+ bool test_result,
+ redundant_key_kept = false;
AttrNumber attno;
ScanKey arrayKeyData;
int *keyDataMap = NULL;
@@ -388,7 +399,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
xform[j].inkey = NULL;
xform[j].inkeyi = -1;
}
- /* else, cannot determine redundancy, keep both keys */
+ else
+ redundant_key_kept = true;
}
/* track number of attrs for which we have "=" keys */
numberOfEqualCols++;
@@ -409,6 +421,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
else
xform[BTLessStrategyNumber - 1].inkey = NULL;
}
+ else
+ redundant_key_kept = true;
}
/* try to keep only one of >, >= */
@@ -426,6 +440,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
else
xform[BTGreaterStrategyNumber - 1].inkey = NULL;
}
+ else
+ redundant_key_kept = true;
}
/*
@@ -466,25 +482,6 @@ _bt_preprocess_keys(IndexScanDesc scan)
/* check strategy this key's operator corresponds to */
j = inkey->sk_strategy - 1;
- /* if row comparison, push it directly to the output array */
- if (inkey->sk_flags & SK_ROW_HEADER)
- {
- ScanKey outkey = &so->keyData[new_numberOfKeys++];
-
- memcpy(outkey, inkey, sizeof(ScanKeyData));
- if (arrayKeyData)
- keyDataMap[new_numberOfKeys - 1] = i;
- if (numberOfEqualCols == attno - 1)
- _bt_mark_scankey_required(outkey);
-
- /*
- * We don't support RowCompare using equality; such a qual would
- * mess up the numberOfEqualCols tracking.
- */
- Assert(j != (BTEqualStrategyNumber - 1));
- continue;
- }
-
if (inkey->sk_strategy == BTEqualStrategyNumber &&
(inkey->sk_flags & SK_SEARCHARRAY))
{
@@ -593,9 +590,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
* the new scan key.
*
* Note: We do things this way around so that our arrays are
- * always in the same order as their corresponding scan keys,
- * even with incomplete opfamilies. _bt_advance_array_keys
- * depends on this.
+ * always in the same order as their corresponding scan keys.
+ * _bt_preprocess_array_keys_final expects this.
*/
ScanKey outkey = &so->keyData[new_numberOfKeys++];
@@ -607,6 +603,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
xform[j].inkey = inkey;
xform[j].inkeyi = i;
xform[j].arrayidx = arrayidx;
+ redundant_key_kept = true;
}
}
}
@@ -622,6 +619,15 @@ _bt_preprocess_keys(IndexScanDesc scan)
if (arrayKeyData)
_bt_preprocess_array_keys_final(scan, keyDataMap);
+ /*
+ * If there are remaining redundant inequality keys, we must make sure
+ * that each index attribute has no more than one required >/>= key, and
+ * no more than one required </<= key. Attributes that have one or more
+ * required = keys now must keep only one required key (the first = key).
+ */
+ if (unlikely(redundant_key_kept) && so->qual_ok)
+ _bt_unmark_keys(scan, keyDataMap);
+
/* Could pfree arrayKeyData/keyDataMap now, but not worth the cycles */
}
@@ -746,9 +752,12 @@ _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption)
*
* Depending on the operator type, the key may be required for both scan
* directions or just one. Also, if the key is a row comparison header,
- * we have to mark its first subsidiary ScanKey as required. (Subsequent
- * subsidiary ScanKeys are normally for lower-order columns, and thus
- * cannot be required, since they're after the first non-equality scankey.)
+ * we have to mark the appropriate subsidiary ScanKeys as required. In such
+ * cases, the first subsidiary key is required, but subsequent ones are
+ * required only as long as they correspond to successive index columns and
+ * match the leading column as to sort direction. Otherwise the row
+ * comparison ordering is different from the index ordering and so we can't
+ * stop the scan on the basis of those lower-order columns.
*
* Note: when we set required-key flag bits in a subsidiary scankey, we are
* scribbling on a data structure belonging to the index AM's caller, not on
@@ -786,12 +795,25 @@ _bt_mark_scankey_required(ScanKey skey)
if (skey->sk_flags & SK_ROW_HEADER)
{
ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
+ AttrNumber attno = skey->sk_attno;
/* First subkey should be same column/operator as the header */
- Assert(subkey->sk_flags & SK_ROW_MEMBER);
- Assert(subkey->sk_attno == skey->sk_attno);
+ Assert(subkey->sk_attno == attno);
Assert(subkey->sk_strategy == skey->sk_strategy);
- subkey->sk_flags |= addflags;
+
+ for (;;)
+ {
+ Assert(subkey->sk_flags & SK_ROW_MEMBER);
+ if (subkey->sk_attno != attno)
+ break; /* non-adjacent key, so not required */
+ if (subkey->sk_strategy != skey->sk_strategy)
+ break; /* wrong direction, so not required */
+ subkey->sk_flags |= addflags;
+ if (subkey->sk_flags & SK_ROW_END)
+ break;
+ subkey++;
+ attno++;
+ }
}
}
@@ -847,8 +869,7 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
cmp_op;
StrategyNumber strat;
- Assert(!((leftarg->sk_flags | rightarg->sk_flags) &
- (SK_ROW_HEADER | SK_ROW_MEMBER)));
+ Assert(!((leftarg->sk_flags | rightarg->sk_flags) & SK_ROW_MEMBER));
/*
* First, deal with cases where one or both args are NULL. This should
@@ -925,6 +946,16 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
}
/*
+ * We don't yet know how to determine redundancy when it involves a row
+ * compare key (barring simple cases involving IS NULL/IS NOT NULL)
+ */
+ if ((leftarg->sk_flags | rightarg->sk_flags) & SK_ROW_HEADER)
+ {
+ Assert(!((leftarg->sk_flags | rightarg->sk_flags) & SK_BT_SKIP));
+ return false;
+ }
+
+ /*
* If either leftarg or rightarg are equality-type array scankeys, we need
* specialized handling (since by now we know that IS NULL wasn't used)
*/
@@ -1468,6 +1499,283 @@ _bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk,
}
/*
+ * _bt_unmark_keys() -- make superfluous required keys nonrequired after all
+ *
+ * When _bt_preprocess_keys fails to eliminate one or more redundant keys, it
+ * calls here to make sure that no index attribute has more than one > or >=
+ * key marked required, and no more than one required < or <= key. Attributes
+ * with = keys will always get one = key as their required key. All other
+ * keys that were initially marked required get "unmarked" here. That way,
+ * _bt_first and _bt_checkkeys will reliably agree on which keys to use to
+ * start and/or to end the scan.
+ *
+ * We also relocate keys that become/started out nonrequired to the end of
+ * so->keyData[]. That way, _bt_first and _bt_checkkeys cannot fail to reach
+ * a required key due to some earlier nonrequired key getting in the way.
+ *
+ * Only call here when _bt_compare_scankey_args returned false at least once
+ * (otherwise, calling here will just waste cycles).
+ */
+static void
+_bt_unmark_keys(IndexScanDesc scan, int *keyDataMap)
+{
+ BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ AttrNumber attno;
+ bool *unmarkikey;
+ int nunmark,
+ nunmarked,
+ nkept,
+ firsti;
+ ScanKey keepKeys,
+ unmarkKeys;
+ FmgrInfo *keepOrderProcs = NULL,
+ *unmarkOrderProcs = NULL;
+ bool haveReqEquals,
+ haveReqForward,
+ haveReqBackward;
+
+ /*
+ * Do an initial pass over so->keyData[] that determines which keys to
+ * keep as required. We expect so->keyData[] to still be in attribute
+ * order when we're called (though we don't expect any particular order
+ * among each attribute's keys).
+ *
+ * When both equality and inequality keys remain on a single attribute, we
+ * *must* make sure that exactly one of the equalities remains required.
+ * Any requiredness markings that we might leave on later keys/attributes
+ * are predicated on there being required = keys on all prior columns.
+ */
+ unmarkikey = palloc0(so->numberOfKeys * sizeof(bool));
+ nunmark = 0;
+
+ /* Set things up for first key's attribute */
+ attno = so->keyData[0].sk_attno;
+ firsti = 0;
+ haveReqEquals = false;
+ haveReqForward = false;
+ haveReqBackward = false;
+ for (int i = 0; i < so->numberOfKeys; i++)
+ {
+ ScanKey origkey = &so->keyData[i];
+
+ if (origkey->sk_attno != attno)
+ {
+ /* Reset for next attribute */
+ attno = origkey->sk_attno;
+ firsti = i;
+
+ haveReqEquals = false;
+ haveReqForward = false;
+ haveReqBackward = false;
+ }
+
+ /* Equalities get priority over inequalities */
+ if (haveReqEquals)
+ {
+ /*
+ * We already found the first "=" key for this attribute. We've
+ * already decided that all its other keys will be unmarked.
+ */
+ Assert(!(origkey->sk_flags & SK_SEARCHNULL));
+ unmarkikey[i] = true;
+ nunmark++;
+ continue;
+ }
+ else if ((origkey->sk_flags & SK_BT_REQFWD) &&
+ (origkey->sk_flags & SK_BT_REQBKWD))
+ {
+ /*
+ * Found the first "=" key for attno. All other attno keys will
+ * be unmarked.
+ */
+ Assert(origkey->sk_strategy == BTEqualStrategyNumber);
+
+ haveReqEquals = true;
+ for (int j = firsti; j < i; j++)
+ {
+ /* Unmark any prior inequality keys on attno after all */
+ if (!unmarkikey[j])
+ {
+ unmarkikey[j] = true;
+ nunmark++;
+ }
+ }
+ continue;
+ }
+
+ /* Deal with inequalities next */
+ if ((origkey->sk_flags & SK_BT_REQFWD) && !haveReqForward)
+ {
+ haveReqForward = true;
+ continue;
+ }
+ else if ((origkey->sk_flags & SK_BT_REQBKWD) && !haveReqBackward)
+ {
+ haveReqBackward = true;
+ continue;
+ }
+
+ /*
+ * We have either a redundant inequality key that will be unmarked, or
+ * we have a key that wasn't marked required in the first place
+ */
+ unmarkikey[i] = true;
+ nunmark++;
+ }
+
+ /* Should only be called when _bt_compare_scankey_args reported failure */
+ Assert(nunmark > 0);
+
+ /*
+ * Next, allocate temp arrays: one for required keys that'll remain
+ * required, the other for all remaining keys
+ */
+ unmarkKeys = palloc(nunmark * sizeof(ScanKeyData));
+ keepKeys = palloc((so->numberOfKeys - nunmark) * sizeof(ScanKeyData));
+ nunmarked = 0;
+ nkept = 0;
+ if (so->numArrayKeys)
+ {
+ unmarkOrderProcs = palloc(nunmark * sizeof(FmgrInfo));
+ keepOrderProcs = palloc((so->numberOfKeys - nunmark) * sizeof(FmgrInfo));
+ }
+
+ /*
+ * Next, copy the contents of so->keyData[] into the appropriate temp
+ * array.
+ *
+ * Scans with = array keys need us to maintain invariants around the order
+ * of so->orderProcs[] and so->arrayKeys[] relative to so->keyData[]. See
+ * _bt_preprocess_array_keys_final for a full explanation.
+ */
+ for (int i = 0; i < so->numberOfKeys; i++)
+ {
+ ScanKey origkey = &so->keyData[i];
+ ScanKey unmark;
+
+ if (!unmarkikey[i])
+ {
+ /*
+ * Key gets to keep its original requiredness markings.
+ *
+ * Key will stay in its original position, unless we're going to
+ * unmark an earlier key (in which case this key gets moved back).
+ */
+ memcpy(keepKeys + nkept, origkey, sizeof(ScanKeyData));
+
+ if (so->numArrayKeys)
+ {
+ keyDataMap[i] = nkept;
+ memcpy(keepOrderProcs + nkept, &so->orderProcs[i],
+ sizeof(FmgrInfo));
+ }
+
+ nkept++;
+ continue;
+ }
+
+ /*
+ * Key will be unmarked as needed, and moved to the end of the array,
+ * next to other keys that will become (or always were) nonrequired
+ */
+ unmark = unmarkKeys + nunmarked;
+ memcpy(unmark, origkey, sizeof(ScanKeyData));
+
+ if (so->numArrayKeys)
+ {
+ keyDataMap[i] = (so->numberOfKeys - nunmark) + nunmarked;
+ memcpy(&unmarkOrderProcs[nunmarked], &so->orderProcs[i],
+ sizeof(FmgrInfo));
+ }
+
+ /*
+ * Preprocessing only generates skip arrays when it knows that they'll
+ * be the only required = key on the attr. We'll never unmark them.
+ */
+ Assert(!(unmark->sk_flags & SK_BT_SKIP));
+
+ /*
+ * Also shouldn't have to unmark an IS NULL or an IS NOT NULL key.
+ * They aren't cross-type, so an incomplete opfamily can't matter.
+ */
+ Assert(!(unmark->sk_flags & SK_ISNULL) ||
+ !(unmark->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)));
+
+ /* Clear requiredness flags on redundant key (and on any subkeys) */
+ unmark->sk_flags &= ~(SK_BT_REQFWD | SK_BT_REQBKWD);
+ if (unmark->sk_flags & SK_ROW_HEADER)
+ {
+ ScanKey subkey = (ScanKey) DatumGetPointer(unmark->sk_argument);
+
+ Assert(subkey->sk_strategy == unmark->sk_strategy);
+ for (;;)
+ {
+ Assert(subkey->sk_flags & SK_ROW_MEMBER);
+ subkey->sk_flags &= ~(SK_BT_REQFWD | SK_BT_REQBKWD);
+ if (subkey->sk_flags & SK_ROW_END)
+ break;
+ subkey++;
+ }
+ }
+
+ nunmarked++;
+ }
+
+ /* Copy both temp arrays back into so->keyData[] to reorder */
+ Assert(nkept == so->numberOfKeys - nunmark);
+ Assert(nunmarked == nunmark);
+ memcpy(so->keyData, keepKeys, sizeof(ScanKeyData) * nkept);
+ memcpy(so->keyData + nkept, unmarkKeys, sizeof(ScanKeyData) * nunmarked);
+
+ /* Done with temp arrays */
+ pfree(unmarkikey);
+ pfree(keepKeys);
+ pfree(unmarkKeys);
+
+ /*
+ * Now copy so->orderProcs[] temp entries needed by scans with = array
+ * keys back (just like with the so->keyData[] temp arrays)
+ */
+ if (so->numArrayKeys)
+ {
+ memcpy(so->orderProcs, keepOrderProcs, sizeof(FmgrInfo) * nkept);
+ memcpy(so->orderProcs + nkept, unmarkOrderProcs,
+ sizeof(FmgrInfo) * nunmarked);
+
+ /* Also fix-up array->scan_key references */
+ for (int arridx = 0; arridx < so->numArrayKeys; arridx++)
+ {
+ BTArrayKeyInfo *array = &so->arrayKeys[arridx];
+
+ array->scan_key = keyDataMap[array->scan_key];
+ }
+
+ /*
+ * Sort so->arrayKeys[] based on its new BTArrayKeyInfo.scan_key
+ * offsets, so that its order matches so->keyData[] order as expected
+ */
+ qsort(so->arrayKeys, so->numArrayKeys, sizeof(BTArrayKeyInfo),
+ _bt_reorder_array_cmp);
+
+ /* Done with temp arrays */
+ pfree(unmarkOrderProcs);
+ pfree(keepOrderProcs);
+ }
+}
+
+/*
+ * qsort comparator for reordering so->arrayKeys[] BTArrayKeyInfo entries
+ */
+static int
+_bt_reorder_array_cmp(const void *a, const void *b)
+{
+ BTArrayKeyInfo *arraya = (BTArrayKeyInfo *) a;
+ BTArrayKeyInfo *arrayb = (BTArrayKeyInfo *) b;
+
+ return pg_cmp_s32(arraya->scan_key, arrayb->scan_key);
+}
+
+/*
* _bt_preprocess_array_keys() -- Preprocess SK_SEARCHARRAY scan keys
*
* If there are any SK_SEARCHARRAY scan keys, deconstruct the array(s) and
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 765659887af..fdff960c130 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -228,6 +228,8 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
BTScanOpaque so = (BTScanOpaque) scan->opaque;
bool res;
+ Assert(scan->heapRelation != NULL);
+
/* btree indexes are never lossy */
scan->xs_recheck = false;
@@ -289,6 +291,8 @@ btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
int64 ntids = 0;
ItemPointer heapTid;
+ Assert(scan->heapRelation == NULL);
+
/* Each loop iteration performs another primitive index scan */
do
{
@@ -393,6 +397,34 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
BTScanPosInvalidate(so->currPos);
}
+ /*
+ * We prefer to eagerly drop leaf page pins before btgettuple returns.
+ * This avoids making VACUUM wait to acquire a cleanup lock on the page.
+ *
+ * We cannot safely drop leaf page pins during index-only scans due to a
+ * race condition involving VACUUM setting pages all-visible in the VM.
+ * It's also unsafe for plain index scans that use a non-MVCC snapshot.
+ *
+ * When we drop pins eagerly, the mechanism that marks so->killedItems[]
+ * index tuples LP_DEAD has to deal with concurrent TID recycling races.
+ * The scheme used to detect unsafe TID recycling won't work when scanning
+ * unlogged relations (since it involves saving an affected page's LSN).
+ * Opt out of eager pin dropping during unlogged relation scans for now
+ * (this is preferable to opting out of kill_prior_tuple LP_DEAD setting).
+ *
+ * Also opt out of dropping leaf page pins eagerly during bitmap scans.
+ * Pins cannot be held for more than an instant during bitmap scans either
+ * way, so we might as well avoid wasting cycles on acquiring page LSNs.
+ *
+ * See nbtree/README section on making concurrent TID recycling safe.
+ *
+ * Note: so->dropPin should never change across rescans.
+ */
+ so->dropPin = (!scan->xs_want_itup &&
+ IsMVCCSnapshot(scan->xs_snapshot) &&
+ RelationNeedsWAL(scan->indexRelation) &&
+ scan->heapRelation != NULL);
+
so->markItemIndex = -1;
so->needPrimScan = false;
so->scanBehind = false;
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index fe9a3886913..4af1ff1e9e5 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -25,7 +25,7 @@
#include "utils/rel.h"
-static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp);
+static inline void _bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so);
static Buffer _bt_moveright(Relation rel, Relation heaprel, BTScanInsert key,
Buffer buf, bool forupdate, BTStack stack,
int access);
@@ -57,24 +57,29 @@ static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
/*
* _bt_drop_lock_and_maybe_pin()
*
- * Unlock the buffer; and if it is safe to release the pin, do that, too.
- * This will prevent vacuum from stalling in a blocked state trying to read a
- * page when a cursor is sitting on it.
- *
- * See nbtree/README section on making concurrent TID recycling safe.
+ * Unlock so->currPos.buf. If scan is so->dropPin, drop the pin, too.
+ * Dropping the pin prevents VACUUM from blocking on acquiring a cleanup lock.
*/
-static void
-_bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp)
+static inline void
+_bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so)
{
- _bt_unlockbuf(scan->indexRelation, sp->buf);
-
- if (IsMVCCSnapshot(scan->xs_snapshot) &&
- RelationNeedsWAL(scan->indexRelation) &&
- !scan->xs_want_itup)
+ if (!so->dropPin)
{
- ReleaseBuffer(sp->buf);
- sp->buf = InvalidBuffer;
+ /* Just drop the lock (not the pin) */
+ _bt_unlockbuf(rel, so->currPos.buf);
+ return;
}
+
+ /*
+ * Drop both the lock and the pin.
+ *
+ * Have to set so->currPos.lsn so that _bt_killitems has a way to detect
+ * when concurrent heap TID recycling by VACUUM might have taken place.
+ */
+ Assert(RelationNeedsWAL(rel));
+ so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf);
+ _bt_relbuf(rel, so->currPos.buf);
+ so->currPos.buf = InvalidBuffer;
}
/*
@@ -866,8 +871,8 @@ _bt_compare(Relation rel,
* if backwards scan, the last item) in the tree that satisfies the
* qualifications in the scan key. On success exit, data about the
* matching tuple(s) on the page has been loaded into so->currPos. We'll
- * drop all locks and hold onto a pin on page's buffer, except when
- * _bt_drop_lock_and_maybe_pin dropped the pin to avoid blocking VACUUM.
+ * drop all locks and hold onto a pin on page's buffer, except during
+ * so->dropPin scans, when we drop both the lock and the pin.
* _bt_returnitem sets the next item to return to scan on success exit.
*
* If there are no matching items in the index, we return false, with no
@@ -955,46 +960,51 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
/*----------
* Examine the scan keys to discover where we need to start the scan.
+ * The selected scan keys (at most one per index column) are remembered by
+ * storing their addresses into the local startKeys[] array. The final
+ * startKeys[] entry's strategy is set in strat_total. (Actually, there
+ * are a couple of cases where we force a less/more restrictive strategy.)
*
- * We want to identify the keys that can be used as starting boundaries;
- * these are =, >, or >= keys for a forward scan or =, <, <= keys for
- * a backwards scan. We can use keys for multiple attributes so long as
- * the prior attributes had only =, >= (resp. =, <=) keys. Once we accept
- * a > or < boundary or find an attribute with no boundary (which can be
- * thought of as the same as "> -infinity"), we can't use keys for any
- * attributes to its right, because it would break our simplistic notion
- * of what initial positioning strategy to use.
+ * We must use the key that was marked required (in the direction opposite
+ * our own scan's) during preprocessing. Each index attribute can only
+ * have one such required key. In general, the keys that we use to find
+ * an initial position when scanning forwards are the same keys that end
+ * the scan on the leaf level when scanning backwards (and vice-versa).
*
* When the scan keys include cross-type operators, _bt_preprocess_keys
- * may not be able to eliminate redundant keys; in such cases we will
- * arbitrarily pick a usable one for each attribute. This is correct
- * but possibly not optimal behavior. (For example, with keys like
- * "x >= 4 AND x >= 5" we would elect to scan starting at x=4 when
- * x=5 would be more efficient.) Since the situation only arises given
- * a poorly-worded query plus an incomplete opfamily, live with it.
+ * may not be able to eliminate redundant keys; in such cases it will
+ * arbitrarily pick a usable key for each attribute (and scan direction),
+ * ensuring that there is no more than one key required in each direction.
+ * We stop considering further keys once we reach the first nonrequired
+ * key (which must come after all required keys), so this can't affect us.
+ *
+ * The required keys that we use as starting boundaries have to be =, >,
+ * or >= keys for a forward scan or =, <, <= keys for a backwards scan.
+ * We can use keys for multiple attributes so long as the prior attributes
+ * had only =, >= (resp. =, <=) keys. These rules are very similar to the
+ * rules that preprocessing used to determine which keys to mark required.
+ * We cannot always use every required key as a positioning key, though.
+ * Skip arrays necessitate independently applying our own rules here.
+ * Skip arrays are always generally considered = array keys, but we'll
+ * nevertheless treat them as inequalities at certain points of the scan.
+ * When that happens, it _might_ have implications for the number of
+ * required keys that we can safely use for initial positioning purposes.
*
- * When both equality and inequality keys appear for a single attribute
- * (again, only possible when cross-type operators appear), we *must*
- * select one of the equality keys for the starting point, because
- * _bt_checkkeys() will stop the scan as soon as an equality qual fails.
- * For example, if we have keys like "x >= 4 AND x = 10" and we elect to
- * start at x=4, we will fail and stop before reaching x=10. If multiple
- * equality quals survive preprocessing, however, it doesn't matter which
- * one we use --- by definition, they are either redundant or
- * contradictory.
+ * For example, a forward scan with a skip array on its leading attribute
+ * (with no low_compare/high_compare) will have at least two required scan
+ * keys, but we won't use any of them as boundary keys during the scan's
+ * initial call here. Our positioning key during the first call here can
+ * be thought of as representing "> -infinity". Similarly, if such a skip
+ * array's low_compare is "a > 'foo'", then we position using "a > 'foo'"
+ * during the scan's initial call here; a lower-order key such as "b = 42"
+ * can't be used until the "a" array advances beyond MINVAL/low_compare.
*
- * In practice we rarely see any "attribute boundary key gaps" here.
- * Preprocessing can usually backfill skip array keys for any attributes
- * that were omitted from the original scan->keyData[] input keys. All
- * array keys are always considered = keys, but we'll sometimes need to
- * treat the current key value as if we were using an inequality strategy.
- * This happens with range skip arrays, which store inequality keys in the
- * array's low_compare/high_compare fields (used to find the first/last
- * set of matches, when = key will lack a usable sk_argument value).
- * These are always preferred over any redundant "standard" inequality
- * keys on the same column (per the usual rule about preferring = keys).
- * Note also that any column with an = skip array key can never have an
- * additional, contradictory = key.
+ * On the other hand, if such a skip array's low_compare was "a >= 'foo'",
+ * then we _can_ use "a >= 'foo' AND b = 42" during the initial call here.
+ * A subsequent call here might have us use "a = 'fop' AND b = 42". Note
+ * that we treat = and >= as equivalent when scanning forwards (just as we
+ * treat = and <= as equivalent when scanning backwards). We effectively
+ * do the same thing (though with a distinct "a" element/value) each time.
*
* All keys (with the exception of SK_SEARCHNULL keys and SK_BT_SKIP
* array keys whose array is "null_elem=true") imply a NOT NULL qualifier.
@@ -1006,21 +1016,20 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* traversing a lot of null entries at the start of the scan.
*
* In this loop, row-comparison keys are treated the same as keys on their
- * first (leftmost) columns. We'll add on lower-order columns of the row
- * comparison below, if possible.
+ * first (leftmost) columns. We'll add all lower-order columns of the row
+ * comparison that were marked required during preprocessing below.
*
- * The selected scan keys (at most one per index column) are remembered by
- * storing their addresses into the local startKeys[] array.
- *
- * _bt_checkkeys/_bt_advance_array_keys decide whether and when to start
- * the next primitive index scan (for scans with array keys) based in part
- * on an understanding of how it'll enable us to reposition the scan.
- * They're directly aware of how we'll sometimes cons up an explicit
- * SK_SEARCHNOTNULL key. They'll even end primitive scans by applying a
- * symmetric "deduce NOT NULL" rule of their own. This allows top-level
- * scans to skip large groups of NULLs through repeated deductions about
- * key strictness (for a required inequality key) and whether NULLs in the
- * key's index column are stored last or first (relative to non-NULLs).
+ * _bt_advance_array_keys needs to know exactly how we'll reposition the
+ * scan (should it opt to schedule another primitive index scan). It is
+ * critical that primscans only be scheduled when they'll definitely make
+ * some useful progress. _bt_advance_array_keys does this by calling
+ * _bt_checkkeys routines that report whether a tuple is past the end of
+ * matches for the scan's keys (given the scan's current array elements).
+ * If the page's final tuple is "after the end of matches" for a scan that
+ * uses the *opposite* scan direction, then it must follow that it's also
+ * "before the start of matches" for the actual current scan direction.
+ * It is therefore essential that all of our initial positioning rules are
+ * symmetric with _bt_checkkeys's corresponding continuescan=false rule.
* If you update anything here, _bt_checkkeys/_bt_advance_array_keys might
* need to be kept in sync.
*----------
@@ -1029,18 +1038,17 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
if (so->numberOfKeys > 0)
{
AttrNumber curattr;
- ScanKey chosen;
+ ScanKey bkey;
ScanKey impliesNN;
ScanKey cur;
/*
- * chosen is the so-far-chosen key for the current attribute, if any.
- * We don't cast the decision in stone until we reach keys for the
- * next attribute.
+ * bkey will be set to the key that preprocessing left behind as the
+ * boundary key for this attribute, in this scan direction (if any)
*/
cur = so->keyData;
curattr = 1;
- chosen = NULL;
+ bkey = NULL;
/* Also remember any scankey that implies a NOT NULL constraint */
impliesNN = NULL;
@@ -1053,23 +1061,29 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
{
if (i >= so->numberOfKeys || cur->sk_attno != curattr)
{
+ /* Done looking for the curattr boundary key */
+ Assert(bkey == NULL ||
+ (bkey->sk_attno == curattr &&
+ (bkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))));
+ Assert(impliesNN == NULL ||
+ (impliesNN->sk_attno == curattr &&
+ (impliesNN->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))));
+
/*
- * Done looking at keys for curattr.
- *
* If this is a scan key for a skip array whose current
* element is MINVAL, choose low_compare (when scanning
* backwards it'll be MAXVAL, and we'll choose high_compare).
*
- * Note: if the array's low_compare key makes 'chosen' NULL,
+ * Note: if the array's low_compare key makes 'bkey' NULL,
* then we behave as if the array's first element is -inf,
* except when !array->null_elem implies a usable NOT NULL
* constraint.
*/
- if (chosen != NULL &&
- (chosen->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL)))
+ if (bkey != NULL &&
+ (bkey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL)))
{
- int ikey = chosen - so->keyData;
- ScanKey skipequalitykey = chosen;
+ int ikey = bkey - so->keyData;
+ ScanKey skipequalitykey = bkey;
BTArrayKeyInfo *array = NULL;
for (int arridx = 0; arridx < so->numArrayKeys; arridx++)
@@ -1082,35 +1096,35 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
if (ScanDirectionIsForward(dir))
{
Assert(!(skipequalitykey->sk_flags & SK_BT_MAXVAL));
- chosen = array->low_compare;
+ bkey = array->low_compare;
}
else
{
Assert(!(skipequalitykey->sk_flags & SK_BT_MINVAL));
- chosen = array->high_compare;
+ bkey = array->high_compare;
}
- Assert(chosen == NULL ||
- chosen->sk_attno == skipequalitykey->sk_attno);
+ Assert(bkey == NULL ||
+ bkey->sk_attno == skipequalitykey->sk_attno);
if (!array->null_elem)
impliesNN = skipequalitykey;
else
- Assert(chosen == NULL && impliesNN == NULL);
+ Assert(bkey == NULL && impliesNN == NULL);
}
/*
* If we didn't find a usable boundary key, see if we can
* deduce a NOT NULL key
*/
- if (chosen == NULL && impliesNN != NULL &&
+ if (bkey == NULL && impliesNN != NULL &&
((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ?
ScanDirectionIsForward(dir) :
ScanDirectionIsBackward(dir)))
{
/* Yes, so build the key in notnullkeys[keysz] */
- chosen = &notnullkeys[keysz];
- ScanKeyEntryInitialize(chosen,
+ bkey = &notnullkeys[keysz];
+ ScanKeyEntryInitialize(bkey,
(SK_SEARCHNOTNULL | SK_ISNULL |
(impliesNN->sk_flags &
(SK_BT_DESC | SK_BT_NULLS_FIRST))),
@@ -1125,12 +1139,12 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
}
/*
- * If we still didn't find a usable boundary key, quit; else
- * save the boundary key pointer in startKeys.
+ * If preprocessing didn't leave a usable boundary key, quit;
+ * else save the boundary key pointer in startKeys[]
*/
- if (chosen == NULL)
+ if (bkey == NULL)
break;
- startKeys[keysz++] = chosen;
+ startKeys[keysz++] = bkey;
/*
* We can only consider adding more boundary keys when the one
@@ -1138,7 +1152,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* (during backwards scans we can only do so when the key that
* we just added to startKeys[] uses the = or <= strategy)
*/
- strat_total = chosen->sk_strategy;
+ strat_total = bkey->sk_strategy;
if (strat_total == BTGreaterStrategyNumber ||
strat_total == BTLessStrategyNumber)
break;
@@ -1149,19 +1163,19 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* make strat_total > or < (and stop adding boundary keys).
* This can only happen with opclasses that lack skip support.
*/
- if (chosen->sk_flags & (SK_BT_NEXT | SK_BT_PRIOR))
+ if (bkey->sk_flags & (SK_BT_NEXT | SK_BT_PRIOR))
{
- Assert(chosen->sk_flags & SK_BT_SKIP);
+ Assert(bkey->sk_flags & SK_BT_SKIP);
Assert(strat_total == BTEqualStrategyNumber);
if (ScanDirectionIsForward(dir))
{
- Assert(!(chosen->sk_flags & SK_BT_PRIOR));
+ Assert(!(bkey->sk_flags & SK_BT_PRIOR));
strat_total = BTGreaterStrategyNumber;
}
else
{
- Assert(!(chosen->sk_flags & SK_BT_NEXT));
+ Assert(!(bkey->sk_flags & SK_BT_NEXT));
strat_total = BTLessStrategyNumber;
}
@@ -1175,24 +1189,30 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
/*
* Done if that was the last scan key output by preprocessing.
- * Also done if there is a gap index attribute that lacks a
- * usable key (only possible when preprocessing was unable to
- * generate a skip array key to "fill in the gap").
+ * Also done if we've now examined all keys marked required.
*/
if (i >= so->numberOfKeys ||
- cur->sk_attno != curattr + 1)
+ !(cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
break;
/*
* Reset for next attr.
*/
+ Assert(cur->sk_attno == curattr + 1);
curattr = cur->sk_attno;
- chosen = NULL;
+ bkey = NULL;
impliesNN = NULL;
}
/*
- * Can we use this key as a starting boundary for this attr?
+ * If we've located the starting boundary key for curattr, we have
+ * no interest in curattr's other required key
+ */
+ if (bkey != NULL)
+ continue;
+
+ /*
+ * Is this key the starting boundary key for curattr?
*
* If not, does it imply a NOT NULL constraint? (Because
* SK_SEARCHNULL keys are always assigned BTEqualStrategyNumber,
@@ -1202,27 +1222,20 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
{
case BTLessStrategyNumber:
case BTLessEqualStrategyNumber:
- if (chosen == NULL)
- {
- if (ScanDirectionIsBackward(dir))
- chosen = cur;
- else
- impliesNN = cur;
- }
+ if (ScanDirectionIsBackward(dir))
+ bkey = cur;
+ else if (impliesNN == NULL)
+ impliesNN = cur;
break;
case BTEqualStrategyNumber:
- /* override any non-equality choice */
- chosen = cur;
+ bkey = cur;
break;
case BTGreaterEqualStrategyNumber:
case BTGreaterStrategyNumber:
- if (chosen == NULL)
- {
- if (ScanDirectionIsForward(dir))
- chosen = cur;
- else
- impliesNN = cur;
- }
+ if (ScanDirectionIsForward(dir))
+ bkey = cur;
+ else if (impliesNN == NULL)
+ impliesNN = cur;
break;
}
}
@@ -1248,16 +1261,18 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
Assert(keysz <= INDEX_MAX_KEYS);
for (int i = 0; i < keysz; i++)
{
- ScanKey cur = startKeys[i];
+ ScanKey bkey = startKeys[i];
- Assert(cur->sk_attno == i + 1);
+ Assert(bkey->sk_attno == i + 1);
- if (cur->sk_flags & SK_ROW_HEADER)
+ if (bkey->sk_flags & SK_ROW_HEADER)
{
/*
* Row comparison header: look to the first row member instead
*/
- ScanKey subkey = (ScanKey) DatumGetPointer(cur->sk_argument);
+ ScanKey subkey = (ScanKey) DatumGetPointer(bkey->sk_argument);
+ bool loosen_strat = false,
+ tighten_strat = false;
/*
* Cannot be a NULL in the first row member: _bt_preprocess_keys
@@ -1265,122 +1280,160 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* ever getting this far
*/
Assert(subkey->sk_flags & SK_ROW_MEMBER);
- Assert(subkey->sk_attno == cur->sk_attno);
+ Assert(subkey->sk_attno == bkey->sk_attno);
Assert(!(subkey->sk_flags & SK_ISNULL));
/*
+ * This is either a > or >= key (during backwards scans it is
+ * either < or <=) that was marked required during preprocessing.
+ * Later so->keyData[] keys can't have been marked required, so
+ * our row compare header key must be the final startKeys[] entry.
+ */
+ Assert(subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD));
+ Assert(i == keysz - 1);
+
+ /*
* The member scankeys are already in insertion format (ie, they
* have sk_func = 3-way-comparison function)
*/
memcpy(inskey.scankeys + i, subkey, sizeof(ScanKeyData));
/*
- * If the row comparison is the last positioning key we accepted,
- * try to add additional keys from the lower-order row members.
- * (If we accepted independent conditions on additional index
- * columns, we use those instead --- doesn't seem worth trying to
- * determine which is more restrictive.) Note that this is OK
- * even if the row comparison is of ">" or "<" type, because the
- * condition applied to all but the last row member is effectively
- * ">=" or "<=", and so the extra keys don't break the positioning
- * scheme. But, by the same token, if we aren't able to use all
- * the row members, then the part of the row comparison that we
- * did use has to be treated as just a ">=" or "<=" condition, and
- * so we'd better adjust strat_total accordingly.
+ * Now look to later row compare members.
+ *
+ * If there's an "index attribute gap" between two row compare
+ * members, the second member won't have been marked required, and
+ * so can't be used as a starting boundary key here. The part of
+ * the row comparison that we do still use has to be treated as a
+ * ">=" or "<=" condition. For example, a qual "(a, c) > (1, 42)"
+ * with an omitted intervening index attribute "b" will use an
+ * insertion scan key "a >= 1". Even the first "a = 1" tuple on
+ * the leaf level might satisfy the row compare qual.
+ *
+ * We're able to use a _more_ restrictive strategy when we reach a
+ * NULL row compare member, since they're always unsatisfiable.
+ * For example, a qual "(a, b, c) >= (1, NULL, 77)" will use an
+ * insertion scan key "a > 1". All tuples where "a = 1" cannot
+ * possibly satisfy the row compare qual, so this is safe.
*/
- if (i == keysz - 1)
+ Assert(!(subkey->sk_flags & SK_ROW_END));
+ for (;;)
{
- bool used_all_subkeys = false;
+ subkey++;
+ Assert(subkey->sk_flags & SK_ROW_MEMBER);
- Assert(!(subkey->sk_flags & SK_ROW_END));
- for (;;)
+ if (subkey->sk_flags & SK_ISNULL)
{
- subkey++;
- Assert(subkey->sk_flags & SK_ROW_MEMBER);
- if (subkey->sk_attno != keysz + 1)
- break; /* out-of-sequence, can't use it */
- if (subkey->sk_strategy != cur->sk_strategy)
- break; /* wrong direction, can't use it */
- if (subkey->sk_flags & SK_ISNULL)
- break; /* can't use null keys */
- Assert(keysz < INDEX_MAX_KEYS);
- memcpy(inskey.scankeys + keysz, subkey,
- sizeof(ScanKeyData));
- keysz++;
- if (subkey->sk_flags & SK_ROW_END)
- {
- used_all_subkeys = true;
- break;
- }
+ /*
+ * NULL member key, can only use earlier keys.
+ *
+ * We deliberately avoid checking if this key is marked
+ * required. All earlier keys are required, and this key
+ * is unsatisfiable either way, so we can't miss anything.
+ */
+ tighten_strat = true;
+ break;
}
- if (!used_all_subkeys)
+
+ if (!(subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
{
- switch (strat_total)
- {
- case BTLessStrategyNumber:
- strat_total = BTLessEqualStrategyNumber;
- break;
- case BTGreaterStrategyNumber:
- strat_total = BTGreaterEqualStrategyNumber;
- break;
- }
+ /* nonrequired member key, can only use earlier keys */
+ loosen_strat = true;
+ break;
}
- break; /* done with outer loop */
+
+ Assert(subkey->sk_attno == keysz + 1);
+ Assert(subkey->sk_strategy == bkey->sk_strategy);
+ Assert(keysz < INDEX_MAX_KEYS);
+
+ memcpy(inskey.scankeys + keysz, subkey,
+ sizeof(ScanKeyData));
+ keysz++;
+ if (subkey->sk_flags & SK_ROW_END)
+ break;
}
- }
- else
- {
- /*
- * Ordinary comparison key. Transform the search-style scan key
- * to an insertion scan key by replacing the sk_func with the
- * appropriate btree comparison function.
- *
- * If scankey operator is not a cross-type comparison, we can use
- * the cached comparison function; otherwise gotta look it up in
- * the catalogs. (That can't lead to infinite recursion, since no
- * indexscan initiated by syscache lookup will use cross-data-type
- * operators.)
- *
- * We support the convention that sk_subtype == InvalidOid means
- * the opclass input type; this is a hack to simplify life for
- * ScanKeyInit().
- */
- if (cur->sk_subtype == rel->rd_opcintype[i] ||
- cur->sk_subtype == InvalidOid)
+ Assert(!(loosen_strat && tighten_strat));
+ if (loosen_strat)
{
- FmgrInfo *procinfo;
-
- procinfo = index_getprocinfo(rel, cur->sk_attno, BTORDER_PROC);
- ScanKeyEntryInitializeWithInfo(inskey.scankeys + i,
- cur->sk_flags,
- cur->sk_attno,
- InvalidStrategy,
- cur->sk_subtype,
- cur->sk_collation,
- procinfo,
- cur->sk_argument);
+ /* Use less restrictive strategy (and fewer member keys) */
+ switch (strat_total)
+ {
+ case BTLessStrategyNumber:
+ strat_total = BTLessEqualStrategyNumber;
+ break;
+ case BTGreaterStrategyNumber:
+ strat_total = BTGreaterEqualStrategyNumber;
+ break;
+ }
}
- else
+ if (tighten_strat)
{
- RegProcedure cmp_proc;
-
- cmp_proc = get_opfamily_proc(rel->rd_opfamily[i],
- rel->rd_opcintype[i],
- cur->sk_subtype,
- BTORDER_PROC);
- if (!RegProcedureIsValid(cmp_proc))
- elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"",
- BTORDER_PROC, rel->rd_opcintype[i], cur->sk_subtype,
- cur->sk_attno, RelationGetRelationName(rel));
- ScanKeyEntryInitialize(inskey.scankeys + i,
- cur->sk_flags,
- cur->sk_attno,
- InvalidStrategy,
- cur->sk_subtype,
- cur->sk_collation,
- cmp_proc,
- cur->sk_argument);
+ /* Use more restrictive strategy (and fewer member keys) */
+ switch (strat_total)
+ {
+ case BTLessEqualStrategyNumber:
+ strat_total = BTLessStrategyNumber;
+ break;
+ case BTGreaterEqualStrategyNumber:
+ strat_total = BTGreaterStrategyNumber;
+ break;
+ }
}
+
+ /* done adding to inskey (row comparison keys always come last) */
+ break;
+ }
+
+ /*
+ * Ordinary comparison key/search-style key.
+ *
+ * Transform the search-style scan key to an insertion scan key by
+ * replacing the sk_func with the appropriate btree 3-way-comparison
+ * function.
+ *
+ * If scankey operator is not a cross-type comparison, we can use the
+ * cached comparison function; otherwise gotta look it up in the
+ * catalogs. (That can't lead to infinite recursion, since no
+ * indexscan initiated by syscache lookup will use cross-data-type
+ * operators.)
+ *
+ * We support the convention that sk_subtype == InvalidOid means the
+ * opclass input type; this hack simplifies life for ScanKeyInit().
+ */
+ if (bkey->sk_subtype == rel->rd_opcintype[i] ||
+ bkey->sk_subtype == InvalidOid)
+ {
+ FmgrInfo *procinfo;
+
+ procinfo = index_getprocinfo(rel, bkey->sk_attno, BTORDER_PROC);
+ ScanKeyEntryInitializeWithInfo(inskey.scankeys + i,
+ bkey->sk_flags,
+ bkey->sk_attno,
+ InvalidStrategy,
+ bkey->sk_subtype,
+ bkey->sk_collation,
+ procinfo,
+ bkey->sk_argument);
+ }
+ else
+ {
+ RegProcedure cmp_proc;
+
+ cmp_proc = get_opfamily_proc(rel->rd_opfamily[i],
+ rel->rd_opcintype[i],
+ bkey->sk_subtype, BTORDER_PROC);
+ if (!RegProcedureIsValid(cmp_proc))
+ elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"",
+ BTORDER_PROC, rel->rd_opcintype[i], bkey->sk_subtype,
+ bkey->sk_attno, RelationGetRelationName(rel));
+ ScanKeyEntryInitialize(inskey.scankeys + i,
+ bkey->sk_flags,
+ bkey->sk_attno,
+ InvalidStrategy,
+ bkey->sk_subtype,
+ bkey->sk_collation,
+ cmp_proc,
+ bkey->sk_argument);
}
}
@@ -1469,6 +1522,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
if (!BufferIsValid(so->currPos.buf))
{
+ Assert(!so->needPrimScan);
+
/*
* We only get here if the index is completely empty. Lock relation
* because nothing finer to lock exists. Without a buffer lock, it's
@@ -1487,7 +1542,6 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
if (!BufferIsValid(so->currPos.buf))
{
- Assert(!so->needPrimScan);
_bt_parallel_done(scan);
return false;
}
@@ -1610,7 +1664,13 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
so->currPos.currPage = BufferGetBlockNumber(so->currPos.buf);
so->currPos.prevPage = opaque->btpo_prev;
so->currPos.nextPage = opaque->btpo_next;
+ /* delay setting so->currPos.lsn until _bt_drop_lock_and_maybe_pin */
+ so->currPos.dir = dir;
+ so->currPos.nextTupleOffset = 0;
+ /* either moreRight or moreLeft should be set now (may be unset later) */
+ Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight :
+ so->currPos.moreLeft);
Assert(!P_IGNORE(opaque));
Assert(BTScanPosIsPinned(so->currPos));
Assert(!so->needPrimScan);
@@ -1626,14 +1686,6 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
so->currPos.currPage);
}
- /* initialize remaining currPos fields related to current page */
- so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf);
- so->currPos.dir = dir;
- so->currPos.nextTupleOffset = 0;
- /* either moreLeft or moreRight should be set now (may be unset later) */
- Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight :
- so->currPos.moreLeft);
-
PredicateLockPage(rel, so->currPos.currPage, scan->xs_snapshot);
/* initialize local variables */
@@ -2107,10 +2159,9 @@ _bt_returnitem(IndexScanDesc scan, BTScanOpaque so)
*
* Wrapper on _bt_readnextpage that performs final steps for the current page.
*
- * On entry, if so->currPos.buf is valid the buffer is pinned but not locked.
- * If there's no pin held, it's because _bt_drop_lock_and_maybe_pin dropped
- * the pin eagerly earlier on. The scan must have so->currPos.currPage set to
- * a valid block, in any case.
+ * On entry, so->currPos must be valid. Its buffer will be pinned, though
+ * never locked. (Actually, when so->dropPin there won't even be a pin held,
+ * though so->currPos.currPage must still be set to a valid block number.)
*/
static bool
_bt_steppage(IndexScanDesc scan, ScanDirection dir)
@@ -2251,12 +2302,14 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
*/
if (_bt_readpage(scan, dir, offnum, true))
{
+ Relation rel = scan->indexRelation;
+
/*
* _bt_readpage succeeded. Drop the lock (and maybe the pin) on
* so->currPos.buf in preparation for btgettuple returning tuples.
*/
Assert(BTScanPosIsPinned(so->currPos));
- _bt_drop_lock_and_maybe_pin(scan, &so->currPos);
+ _bt_drop_lock_and_maybe_pin(rel, so);
return true;
}
@@ -2278,9 +2331,12 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
* previously-saved right link or left link. lastcurrblkno is the page that
* was current at the point where the blkno link was saved, which we use to
* reason about concurrent page splits/page deletions during backwards scans.
+ * In the common case where seized=false, blkno is either so->currPos.nextPage
+ * or so->currPos.prevPage, and lastcurrblkno is so->currPos.currPage.
*
- * On entry, caller shouldn't hold any locks or pins on any page (we work
- * directly off of blkno and lastcurrblkno instead). Parallel scan callers
+ * On entry, so->currPos shouldn't be locked by caller. so->currPos.buf must
+ * be InvalidBuffer/unpinned as needed by caller (note that lastcurrblkno
+ * won't need to be read again in almost all cases). Parallel scan callers
* that seized the scan before calling here should pass seized=true; such a
* caller's blkno and lastcurrblkno arguments come from the seized scan.
* seized=false callers just pass us the blkno/lastcurrblkno taken from their
@@ -2294,11 +2350,11 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
*
* On success exit, so->currPos is updated to contain data from the next
* interesting page, and we return true. We hold a pin on the buffer on
- * success exit, except when _bt_drop_lock_and_maybe_pin decided it was safe
- * to eagerly drop the pin (to avoid blocking VACUUM).
+ * success exit (except during so->dropPin index scans, when we drop the pin
+ * eagerly to avoid blocking VACUUM).
*
- * If there are no more matching records in the given direction, we drop all
- * locks and pins, invalidate so->currPos, and return false.
+ * If there are no more matching records in the given direction, we invalidate
+ * so->currPos (while ensuring it retains no locks or pins), and return false.
*
* We always release the scan for a parallel scan caller, regardless of
* success or failure; we'll call _bt_parallel_release as soon as possible.
@@ -2413,7 +2469,7 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno,
*/
Assert(so->currPos.currPage == blkno);
Assert(BTScanPosIsPinned(so->currPos));
- _bt_drop_lock_and_maybe_pin(scan, &so->currPos);
+ _bt_drop_lock_and_maybe_pin(rel, so);
return true;
}
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 3794cc924ad..9d70e89c1f3 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -105,7 +105,7 @@ typedef struct BTShared
int scantuplesortstates;
/* Query ID, for report in worker processes */
- uint64 queryid;
+ int64 queryid;
/*
* workersdonecv is used to monitor the progress of workers. All parallel
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index 1a15dfcb7d3..9aed207995f 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -44,7 +44,6 @@ static bool _bt_array_decrement(Relation rel, ScanKey skey, BTArrayKeyInfo *arra
static bool _bt_array_increment(Relation rel, ScanKey skey, BTArrayKeyInfo *array);
static bool _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir,
bool *skip_array_set);
-static void _bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir);
static bool _bt_tuple_before_array_skeys(IndexScanDesc scan, ScanDirection dir,
IndexTuple tuple, TupleDesc tupdesc, int tupnatts,
bool readpagetup, int sktrig, bool *scanBehind);
@@ -52,7 +51,6 @@ static bool _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
IndexTuple tuple, int tupnatts, TupleDesc tupdesc,
int sktrig, bool sktrig_required);
#ifdef USE_ASSERT_CHECKING
-static bool _bt_verify_arrays_bt_first(IndexScanDesc scan, ScanDirection dir);
static bool _bt_verify_keys_with_arraykeys(IndexScanDesc scan);
#endif
static bool _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir,
@@ -1035,73 +1033,6 @@ _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir,
}
/*
- * _bt_rewind_nonrequired_arrays() -- Rewind SAOP arrays not marked required
- *
- * Called when _bt_advance_array_keys decides to start a new primitive index
- * scan on the basis of the current scan position being before the position
- * that _bt_first is capable of repositioning the scan to by applying an
- * inequality operator required in the opposite-to-scan direction only.
- *
- * Although equality strategy scan keys (for both arrays and non-arrays alike)
- * are either marked required in both directions or in neither direction,
- * there is a sense in which non-required arrays behave like required arrays.
- * With a qual such as "WHERE a IN (100, 200) AND b >= 3 AND c IN (5, 6, 7)",
- * the scan key on "c" is non-required, but nevertheless enables positioning
- * the scan at the first tuple >= "(100, 3, 5)" on the leaf level during the
- * first descent of the tree by _bt_first. Later on, there could also be a
- * second descent, that places the scan right before tuples >= "(200, 3, 5)".
- * _bt_first must never be allowed to build an insertion scan key whose "c"
- * entry is set to a value other than 5, the "c" array's first element/value.
- * (Actually, it's the first in the current scan direction. This example uses
- * a forward scan.)
- *
- * Calling here resets the array scan key elements for the scan's non-required
- * arrays. This is strictly necessary for correctness in a subset of cases
- * involving "required in opposite direction"-triggered primitive index scans.
- * Not all callers are at risk of _bt_first using a non-required array like
- * this, but advancement always resets the arrays when another primitive scan
- * is scheduled, just to keep things simple. Array advancement even makes
- * sure to reset non-required arrays during scans that have no inequalities.
- * (Advancement still won't call here when there are no inequalities, though
- * that's just because it's all handled indirectly instead.)
- *
- * Note: _bt_verify_arrays_bt_first is called by an assertion to enforce that
- * everybody got this right.
- *
- * Note: In practice almost all SAOP arrays are marked required during
- * preprocessing (if necessary by generating skip arrays). It is hardly ever
- * truly necessary to call here, but consistently doing so is simpler.
- */
-static void
-_bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir)
-{
- Relation rel = scan->indexRelation;
- BTScanOpaque so = (BTScanOpaque) scan->opaque;
- int arrayidx = 0;
-
- for (int ikey = 0; ikey < so->numberOfKeys; ikey++)
- {
- ScanKey cur = so->keyData + ikey;
- BTArrayKeyInfo *array = NULL;
-
- if (!(cur->sk_flags & SK_SEARCHARRAY) ||
- cur->sk_strategy != BTEqualStrategyNumber)
- continue;
-
- array = &so->arrayKeys[arrayidx++];
- Assert(array->scan_key == ikey);
-
- if ((cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
- continue;
-
- Assert(array->num_elems != -1); /* No non-required skip arrays */
-
- _bt_array_set_low_or_high(rel, cur, array,
- ScanDirectionIsForward(dir));
- }
-}
-
-/*
* _bt_tuple_before_array_skeys() -- too early to advance required arrays?
*
* We always compare the tuple using the current array keys (which we assume
@@ -1380,8 +1311,6 @@ _bt_start_prim_scan(IndexScanDesc scan, ScanDirection dir)
*/
if (so->needPrimScan)
{
- Assert(_bt_verify_arrays_bt_first(scan, dir));
-
/*
* Flag was set -- must call _bt_first again, which will reset the
* scan's needPrimScan flag
@@ -2007,14 +1936,7 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate,
*/
else if (has_required_opposite_direction_only && pstate->finaltup &&
unlikely(!_bt_oppodir_checkkeys(scan, dir, pstate->finaltup)))
- {
- /*
- * Make sure that any SAOP arrays that were not marked required by
- * preprocessing are reset to their first element for this direction
- */
- _bt_rewind_nonrequired_arrays(scan, dir);
goto new_prim_scan;
- }
continue_scan:
@@ -2045,8 +1967,6 @@ continue_scan:
*/
so->oppositeDirCheck = has_required_opposite_direction_only;
- _bt_rewind_nonrequired_arrays(scan, dir);
-
/*
* skip by setting "look ahead" mechanism's offnum for forwards scans
* (backwards scans check scanBehind flag directly instead)
@@ -2143,48 +2063,6 @@ end_toplevel_scan:
#ifdef USE_ASSERT_CHECKING
/*
- * Verify that the scan's qual state matches what we expect at the point that
- * _bt_start_prim_scan is about to start a just-scheduled new primitive scan.
- *
- * We enforce a rule against non-required array scan keys: they must start out
- * with whatever element is the first for the scan's current scan direction.
- * See _bt_rewind_nonrequired_arrays comments for an explanation.
- */
-static bool
-_bt_verify_arrays_bt_first(IndexScanDesc scan, ScanDirection dir)
-{
- BTScanOpaque so = (BTScanOpaque) scan->opaque;
- int arrayidx = 0;
-
- for (int ikey = 0; ikey < so->numberOfKeys; ikey++)
- {
- ScanKey cur = so->keyData + ikey;
- BTArrayKeyInfo *array = NULL;
- int first_elem_dir;
-
- if (!(cur->sk_flags & SK_SEARCHARRAY) ||
- cur->sk_strategy != BTEqualStrategyNumber)
- continue;
-
- array = &so->arrayKeys[arrayidx++];
-
- if (((cur->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) ||
- ((cur->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir)))
- continue;
-
- if (ScanDirectionIsForward(dir))
- first_elem_dir = 0;
- else
- first_elem_dir = array->num_elems - 1;
-
- if (array->cur_elem != first_elem_dir)
- return false;
- }
-
- return _bt_verify_keys_with_arraykeys(scan);
-}
-
-/*
* Verify that the scan's "so->keyData[]" scan keys are in agreement with
* its array key state
*/
@@ -2194,6 +2072,7 @@ _bt_verify_keys_with_arraykeys(IndexScanDesc scan)
BTScanOpaque so = (BTScanOpaque) scan->opaque;
int last_sk_attno = InvalidAttrNumber,
arrayidx = 0;
+ bool nonrequiredseen = false;
if (!so->qual_ok)
return false;
@@ -2217,8 +2096,16 @@ _bt_verify_keys_with_arraykeys(IndexScanDesc scan)
if (array->num_elems != -1 &&
cur->sk_argument != array->elem_values[array->cur_elem])
return false;
- if (last_sk_attno > cur->sk_attno)
- return false;
+ if (cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))
+ {
+ if (last_sk_attno > cur->sk_attno)
+ return false;
+ if (nonrequiredseen)
+ return false;
+ }
+ else
+ nonrequiredseen = true;
+
last_sk_attno = cur->sk_attno;
}
@@ -2551,37 +2438,12 @@ _bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate)
if (!(key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))
{
/* Scan key isn't marked required (corner case) */
- Assert(!(key->sk_flags & SK_ROW_HEADER));
break; /* unsafe */
}
if (key->sk_flags & SK_ROW_HEADER)
{
- /*
- * RowCompare inequality.
- *
- * Only the first subkey from a RowCompare can ever be marked
- * required (that happens when the row header is marked required).
- * There is no simple, general way for us to transitively deduce
- * whether or not every tuple on the page satisfies a RowCompare
- * key based only on firsttup and lasttup -- so we just give up.
- */
- if (!start_past_saop_eq && !so->skipScan)
- break; /* unsafe to go further */
-
- /*
- * We have to be even more careful with RowCompares that come
- * after an array: we assume it's unsafe to even bypass the array.
- * Calling _bt_start_array_keys to recover the scan's arrays
- * following use of forcenonrequired mode isn't compatible with
- * _bt_check_rowcompare's continuescan=false behavior with NULL
- * row compare members. _bt_advance_array_keys must not make a
- * decision on the basis of a key not being satisfied in the
- * opposite-to-scan direction until the scan reaches a leaf page
- * where the same key begins to be satisfied in scan direction.
- * The _bt_first !used_all_subkeys behavior makes this limitation
- * hard to work around some other way.
- */
- return; /* completely unsafe to set pstate.startikey */
+ /* RowCompare inequalities currently aren't supported */
+ break; /* "unsafe" */
}
if (key->sk_strategy != BTEqualStrategyNumber)
{
@@ -3078,6 +2940,31 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
Assert(subkey->sk_flags & SK_ROW_MEMBER);
+ /* When a NULL row member is compared, the row never matches */
+ if (subkey->sk_flags & SK_ISNULL)
+ {
+ /*
+ * Unlike the simple-scankey case, this isn't a disallowed case
+ * (except when it's the first row element that has the NULL arg).
+ * But it can never match. If all the earlier row comparison
+ * columns are required for the scan direction, we can stop the
+ * scan, because there can't be another tuple that will succeed.
+ */
+ Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument));
+ subkey--;
+ if (forcenonrequired)
+ {
+ /* treating scan's keys as non-required */
+ }
+ else if ((subkey->sk_flags & SK_BT_REQFWD) &&
+ ScanDirectionIsForward(dir))
+ *continuescan = false;
+ else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
+ ScanDirectionIsBackward(dir))
+ *continuescan = false;
+ return false;
+ }
+
if (subkey->sk_attno > tupnatts)
{
/*
@@ -3087,11 +2974,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
* attribute passes the qual.
*/
Assert(BTreeTupleIsPivot(tuple));
- cmpresult = 0;
- if (subkey->sk_flags & SK_ROW_END)
- break;
- subkey++;
- continue;
+ return true;
}
datum = index_getattr(tuple,
@@ -3101,6 +2984,8 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
if (isNull)
{
+ int reqflags;
+
if (forcenonrequired)
{
/* treating scan's keys as non-required */
@@ -3111,15 +2996,35 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
* Since NULLs are sorted before non-NULLs, we know we have
* reached the lower limit of the range of values for this
* index attr. On a backward scan, we can stop if this qual
- * is one of the "must match" subset. We can stop regardless
- * of whether the qual is > or <, so long as it's required,
- * because it's not possible for any future tuples to pass. On
- * a forward scan, however, we must keep going, because we may
- * have initially positioned to the start of the index.
- * (_bt_advance_array_keys also relies on this behavior during
- * forward scans.)
+ * is one of the "must match" subset. However, on a forwards
+ * scan, we must keep going, because we may have initially
+ * positioned to the start of the index.
+ *
+ * All required NULLS FIRST > row members can use NULL tuple
+ * values to end backwards scans, just like with other values.
+ * A qual "WHERE (a, b, c) > (9, 42, 'foo')" can terminate a
+ * backwards scan upon reaching the index's rightmost "a = 9"
+ * tuple whose "b" column contains a NULL (if not sooner).
+ * Since "b" is NULLS FIRST, we can treat its NULLs as "<" 42.
*/
- if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
+ reqflags = SK_BT_REQBKWD;
+
+ /*
+ * When a most significant required NULLS FIRST < row compare
+ * member sees NULL tuple values during a backwards scan, it
+ * signals the end of matches for the whole row compare/scan.
+ * A qual "WHERE (a, b, c) < (9, 42, 'foo')" will terminate a
+ * backwards scan upon reaching the rightmost tuple whose "a"
+ * column has a NULL. The "a" NULL value is "<" 9, and yet
+ * our < row compare will still end the scan. (This isn't
+ * safe with later/lower-order row members. Notice that it
+ * can only happen with an "a" NULL some time after the scan
+ * completely stops needing to use its "b" and "c" members.)
+ */
+ if (subkey == (ScanKey) DatumGetPointer(skey->sk_argument))
+ reqflags |= SK_BT_REQFWD; /* safe, first row member */
+
+ if ((subkey->sk_flags & reqflags) &&
ScanDirectionIsBackward(dir))
*continuescan = false;
}
@@ -3129,15 +3034,35 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
* Since NULLs are sorted after non-NULLs, we know we have
* reached the upper limit of the range of values for this
* index attr. On a forward scan, we can stop if this qual is
- * one of the "must match" subset. We can stop regardless of
- * whether the qual is > or <, so long as it's required,
- * because it's not possible for any future tuples to pass. On
- * a backward scan, however, we must keep going, because we
- * may have initially positioned to the end of the index.
- * (_bt_advance_array_keys also relies on this behavior during
- * backward scans.)
+ * one of the "must match" subset. However, on a backward
+ * scan, we must keep going, because we may have initially
+ * positioned to the end of the index.
+ *
+ * All required NULLS LAST < row members can use NULL tuple
+ * values to end forwards scans, just like with other values.
+ * A qual "WHERE (a, b, c) < (9, 42, 'foo')" can terminate a
+ * forwards scan upon reaching the index's leftmost "a = 9"
+ * tuple whose "b" column contains a NULL (if not sooner).
+ * Since "b" is NULLS LAST, we can treat its NULLs as ">" 42.
+ */
+ reqflags = SK_BT_REQFWD;
+
+ /*
+ * When a most significant required NULLS LAST > row compare
+ * member sees NULL tuple values during a forwards scan, it
+ * signals the end of matches for the whole row compare/scan.
+ * A qual "WHERE (a, b, c) > (9, 42, 'foo')" will terminate a
+ * forwards scan upon reaching the leftmost tuple whose "a"
+ * column has a NULL. The "a" NULL value is ">" 9, and yet
+ * our > row compare will end the scan. (This isn't safe with
+ * later/lower-order row members. Notice that it can only
+ * happen with an "a" NULL some time after the scan completely
+ * stops needing to use its "b" and "c" members.)
*/
- if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) &&
+ if (subkey == (ScanKey) DatumGetPointer(skey->sk_argument))
+ reqflags |= SK_BT_REQBKWD; /* safe, first row member */
+
+ if ((subkey->sk_flags & reqflags) &&
ScanDirectionIsForward(dir))
*continuescan = false;
}
@@ -3148,30 +3073,6 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts,
return false;
}
- if (subkey->sk_flags & SK_ISNULL)
- {
- /*
- * Unlike the simple-scankey case, this isn't a disallowed case
- * (except when it's the first row element that has the NULL arg).
- * But it can never match. If all the earlier row comparison
- * columns are required for the scan direction, we can stop the
- * scan, because there can't be another tuple that will succeed.
- */
- Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument));
- subkey--;
- if (forcenonrequired)
- {
- /* treating scan's keys as non-required */
- }
- else if ((subkey->sk_flags & SK_BT_REQFWD) &&
- ScanDirectionIsForward(dir))
- *continuescan = false;
- else if ((subkey->sk_flags & SK_BT_REQBKWD) &&
- ScanDirectionIsBackward(dir))
- *continuescan = false;
- return false;
- }
-
/* Perform the test --- three-way comparison not bool operator */
cmpresult = DatumGetInt32(FunctionCall2Coll(&subkey->sk_func,
subkey->sk_collation,
@@ -3330,87 +3231,85 @@ _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate,
* current page and killed tuples thereon (generally, this should only be
* called if so->numKilled > 0).
*
- * The caller does not have a lock on the page and may or may not have the
- * page pinned in a buffer. Note that read-lock is sufficient for setting
- * LP_DEAD status (which is only a hint).
- *
- * We match items by heap TID before assuming they are the right ones to
- * delete. We cope with cases where items have moved right due to insertions.
- * If an item has moved off the current page due to a split, we'll fail to
- * find it and do nothing (this is not an error case --- we assume the item
- * will eventually get marked in a future indexscan).
+ * Caller should not have a lock on the so->currPos page, but must hold a
+ * buffer pin when !so->dropPin. When we return, it still won't be locked.
+ * It'll continue to hold whatever pins were held before calling here.
*
- * Note that if we hold a pin on the target page continuously from initially
- * reading the items until applying this function, VACUUM cannot have deleted
- * any items from the page, and so there is no need to search left from the
- * recorded offset. (This observation also guarantees that the item is still
- * the right one to delete, which might otherwise be questionable since heap
- * TIDs can get recycled.) This holds true even if the page has been modified
- * by inserts and page splits, so there is no need to consult the LSN.
+ * We match items by heap TID before assuming they are the right ones to set
+ * LP_DEAD. If the scan is one that holds a buffer pin on the target page
+ * continuously from initially reading the items until applying this function
+ * (if it is a !so->dropPin scan), VACUUM cannot have deleted any items on the
+ * page, so the page's TIDs can't have been recycled by now. There's no risk
+ * that we'll confuse a new index tuple that happens to use a recycled TID
+ * with a now-removed tuple with the same TID (that used to be on this same
+ * page). We can't rely on that during scans that drop buffer pins eagerly
+ * (so->dropPin scans), though, so we must condition setting LP_DEAD bits on
+ * the page LSN having not changed since back when _bt_readpage saw the page.
+ * We totally give up on setting LP_DEAD bits when the page LSN changed.
*
- * If the pin was released after reading the page, then we re-read it. If it
- * has been modified since we read it (as determined by the LSN), we dare not
- * flag any entries because it is possible that the old entry was vacuumed
- * away and the TID was re-used by a completely different heap tuple.
+ * We give up much less often during !so->dropPin scans, but it still happens.
+ * We cope with cases where items have moved right due to insertions. If an
+ * item has moved off the current page due to a split, we'll fail to find it
+ * and just give up on it.
*/
void
_bt_killitems(IndexScanDesc scan)
{
+ Relation rel = scan->indexRelation;
BTScanOpaque so = (BTScanOpaque) scan->opaque;
Page page;
BTPageOpaque opaque;
OffsetNumber minoff;
OffsetNumber maxoff;
- int i;
int numKilled = so->numKilled;
bool killedsomething = false;
- bool droppedpin PG_USED_FOR_ASSERTS_ONLY;
+ Buffer buf;
+ Assert(numKilled > 0);
Assert(BTScanPosIsValid(so->currPos));
+ Assert(scan->heapRelation != NULL); /* can't be a bitmap index scan */
- /*
- * Always reset the scan state, so we don't look for same items on other
- * pages.
- */
+ /* Always invalidate so->killedItems[] before leaving so->currPos */
so->numKilled = 0;
- if (BTScanPosIsPinned(so->currPos))
+ if (!so->dropPin)
{
/*
* We have held the pin on this page since we read the index tuples,
* so all we need to do is lock it. The pin will have prevented
- * re-use of any TID on the page, so there is no need to check the
- * LSN.
+ * concurrent VACUUMs from recycling any of the TIDs on the page.
*/
- droppedpin = false;
- _bt_lockbuf(scan->indexRelation, so->currPos.buf, BT_READ);
-
- page = BufferGetPage(so->currPos.buf);
+ Assert(BTScanPosIsPinned(so->currPos));
+ buf = so->currPos.buf;
+ _bt_lockbuf(rel, buf, BT_READ);
}
else
{
- Buffer buf;
+ XLogRecPtr latestlsn;
- droppedpin = true;
- /* Attempt to re-read the buffer, getting pin and lock. */
- buf = _bt_getbuf(scan->indexRelation, so->currPos.currPage, BT_READ);
+ Assert(!BTScanPosIsPinned(so->currPos));
+ Assert(RelationNeedsWAL(rel));
+ buf = _bt_getbuf(rel, so->currPos.currPage, BT_READ);
- page = BufferGetPage(buf);
- if (BufferGetLSNAtomic(buf) == so->currPos.lsn)
- so->currPos.buf = buf;
- else
+ latestlsn = BufferGetLSNAtomic(buf);
+ Assert(!XLogRecPtrIsInvalid(so->currPos.lsn));
+ Assert(so->currPos.lsn <= latestlsn);
+ if (so->currPos.lsn != latestlsn)
{
- /* Modified while not pinned means hinting is not safe. */
- _bt_relbuf(scan->indexRelation, buf);
+ /* Modified, give up on hinting */
+ _bt_relbuf(rel, buf);
return;
}
+
+ /* Unmodified, hinting is safe */
}
+ page = BufferGetPage(buf);
opaque = BTPageGetOpaque(page);
minoff = P_FIRSTDATAKEY(opaque);
maxoff = PageGetMaxOffsetNumber(page);
- for (i = 0; i < numKilled; i++)
+ for (int i = 0; i < numKilled; i++)
{
int itemIndex = so->killedItems[i];
BTScanPosItem *kitem = &so->currPos.items[itemIndex];
@@ -3442,7 +3341,7 @@ _bt_killitems(IndexScanDesc scan)
* correctness.
*
* Note that the page may have been modified in almost any way
- * since we first read it (in the !droppedpin case), so it's
+ * since we first read it (in the !so->dropPin case), so it's
* possible that this posting list tuple wasn't a posting list
* tuple when we first encountered its heap TIDs.
*/
@@ -3458,7 +3357,7 @@ _bt_killitems(IndexScanDesc scan)
* though only in the common case where the page can't
* have been concurrently modified
*/
- Assert(kitem->indexOffset == offnum || !droppedpin);
+ Assert(kitem->indexOffset == offnum || !so->dropPin);
/*
* Read-ahead to later kitems here.
@@ -3522,10 +3421,13 @@ _bt_killitems(IndexScanDesc scan)
if (killedsomething)
{
opaque->btpo_flags |= BTP_HAS_GARBAGE;
- MarkBufferDirtyHint(so->currPos.buf, true);
+ MarkBufferDirtyHint(buf, true);
}
- _bt_unlockbuf(scan->indexRelation, so->currPos.buf);
+ if (!so->dropPin)
+ _bt_unlockbuf(rel, buf);
+ else
+ _bt_relbuf(rel, buf);
}
diff --git a/src/backend/access/rmgrdesc/xactdesc.c b/src/backend/access/rmgrdesc/xactdesc.c
index 715cc1f7bad..305598e2865 100644
--- a/src/backend/access/rmgrdesc/xactdesc.c
+++ b/src/backend/access/rmgrdesc/xactdesc.c
@@ -252,6 +252,8 @@ ParsePrepareRecord(uint8 info, xl_xact_prepare *xlrec, xl_xact_parsed_prepare *p
parsed->nsubxacts = xlrec->nsubxacts;
parsed->nrels = xlrec->ncommitrels;
parsed->nabortrels = xlrec->nabortrels;
+ parsed->nstats = xlrec->ncommitstats;
+ parsed->nabortstats = xlrec->nabortstats;
parsed->nmsgs = xlrec->ninvalmsgs;
strncpy(parsed->twophase_gid, bufptr, xlrec->gidlen);
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index 113fae1437a..225ff7ca9f2 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -707,6 +707,13 @@ ActivateCommitTs(void)
TransactionId xid;
int64 pageno;
+ /*
+ * During bootstrap, we should not register commit timestamps so skip the
+ * activation in this case.
+ */
+ if (IsBootstrapProcessingMode())
+ return;
+
/* If we've done this already, there's nothing to do */
LWLockAcquire(CommitTsLock, LW_EXCLUSIVE);
if (commitTsShared->commitTsActive)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 1914859b2ee..47ffc0a2307 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7498,6 +7498,10 @@ CreateCheckPoint(int flags)
if (PriorRedoPtr != InvalidXLogRecPtr)
UpdateCheckPointDistanceEstimate(RedoRecPtr - PriorRedoPtr);
+#ifdef USE_INJECTION_POINTS
+ INJECTION_POINT("checkpoint-before-old-wal-removal", NULL);
+#endif
+
/*
* Delete old log files, those no longer needed for last checkpoint to
* prevent the disk holding the xlog from growing full.
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index 6ce979f2d8b..93d38914854 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -4994,13 +4994,25 @@ check_recovery_target_timeline(char **newval, void **extra, GucSource source)
rttg = RECOVERY_TARGET_TIMELINE_LATEST;
else
{
+ char *endp;
+ uint64 timeline;
+
rttg = RECOVERY_TARGET_TIMELINE_NUMERIC;
errno = 0;
- strtoul(*newval, NULL, 0);
- if (errno == EINVAL || errno == ERANGE)
+ timeline = strtou64(*newval, &endp, 0);
+
+ if (*endp != '\0' || errno == EINVAL || errno == ERANGE)
+ {
+ GUC_check_errdetail("\"%s\" is not a valid number.",
+ "recovery_target_timeline");
+ return false;
+ }
+
+ if (timeline < 1 || timeline > PG_UINT32_MAX)
{
- GUC_check_errdetail("\"recovery_target_timeline\" is not a valid number.");
+ GUC_check_errdetail("\"%s\" must be between %u and %u.",
+ "recovery_target_timeline", 1, UINT_MAX);
return false;
}
}
diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index 6db864892d0..fc8638c1b61 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -109,6 +109,8 @@ static const struct typinfo TypInfo[] = {
F_REGROLEIN, F_REGROLEOUT},
{"regnamespace", REGNAMESPACEOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
F_REGNAMESPACEIN, F_REGNAMESPACEOUT},
+ {"regdatabase", REGDATABASEOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
+ F_REGDATABASEIN, F_REGDATABASEOUT},
{"text", TEXTOID, 0, -1, false, TYPALIGN_INT, TYPSTORAGE_EXTENDED, DEFAULT_COLLATION_OID,
F_TEXTIN, F_TEXTOUT},
{"oid", OIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid,
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c
index 18316a3968b..7dded634eb8 100644
--- a/src/backend/catalog/dependency.c
+++ b/src/backend/catalog/dependency.c
@@ -1850,6 +1850,17 @@ find_expr_references_walker(Node *node,
errmsg("constant of the type %s cannot be used here",
"regrole")));
break;
+
+ /*
+ * Dependencies for regdatabase should be shared among all
+ * databases, so explicitly inhibit to have dependencies.
+ */
+ case REGDATABASEOID:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("constant of the type %s cannot be used here",
+ "regdatabase")));
+ break;
}
}
return false;
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index fbaed5359ad..fd6537567ea 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -665,6 +665,15 @@ CheckAttributeType(const char *attname,
}
/*
+ * For consistency with check_virtual_generated_security().
+ */
+ if ((flags & CHKATYPE_IS_VIRTUAL) && atttypid >= FirstUnpinnedObjectId)
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("virtual generated column \"%s\" cannot have a user-defined type", attname),
+ errdetail("Virtual generated columns that make use of user-defined types are not yet supported."));
+
+ /*
* This might not be strictly invalid per SQL standard, but it is pretty
* useless, and it cannot be dumped, so we must disallow it.
*/
@@ -1100,6 +1109,7 @@ AddNewRelationType(const char *typeName,
* if false, relacl is always set NULL
* allow_system_table_mods: true to allow creation in system namespaces
* is_internal: is this a system-generated catalog?
+ * relrewrite: link to original relation during a table rewrite
*
* Output parameters:
* typaddress: if not null, gets the object address of the new pg_type entry
@@ -2996,7 +3006,7 @@ AddRelationNotNullConstraints(Relation rel, List *constraints,
if (constr->is_no_inherit)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
- errmsg("cannot define not-null constraint on column \"%s\" with NO INHERIT",
+ errmsg("cannot define not-null constraint with NO INHERIT on column \"%s\"",
strVal(linitial(constr->keys))),
errdetail("The column has an inherited not-null constraint.")));
@@ -3215,6 +3225,86 @@ check_nested_generated(ParseState *pstate, Node *node)
}
/*
+ * Check security of virtual generated column expression.
+ *
+ * Just like selecting from a view is exploitable (CVE-2024-7348), selecting
+ * from a table with virtual generated columns is exploitable. Users who are
+ * concerned about this can avoid selecting from views, but telling them to
+ * avoid selecting from tables is less practical.
+ *
+ * To address this, this restricts generation expressions for virtual
+ * generated columns are restricted to using built-in functions and types. We
+ * assume that built-in functions and types cannot be exploited for this
+ * purpose. Note the overall security also requires that all functions in use
+ * a immutable. (For example, there are some built-in non-immutable functions
+ * that can run arbitrary SQL.) The immutability is checked elsewhere, since
+ * that is a property that needs to hold independent of security
+ * considerations.
+ *
+ * In the future, this could be expanded by some new mechanism to declare
+ * other functions and types as safe or trusted for this purpose, but that is
+ * to be designed.
+ */
+
+/*
+ * Callback for check_functions_in_node() that determines whether a function
+ * is user-defined.
+ */
+static bool
+contains_user_functions_checker(Oid func_id, void *context)
+{
+ return (func_id >= FirstUnpinnedObjectId);
+}
+
+/*
+ * Checks for all the things we don't want in the generation expressions of
+ * virtual generated columns for security reasons. Errors out if it finds
+ * one.
+ */
+static bool
+check_virtual_generated_security_walker(Node *node, void *context)
+{
+ ParseState *pstate = context;
+
+ if (node == NULL)
+ return false;
+
+ if (!IsA(node, List))
+ {
+ if (check_functions_in_node(node, contains_user_functions_checker, NULL))
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("generation expression uses user-defined function"),
+ errdetail("Virtual generated columns that make use of user-defined functions are not yet supported."),
+ parser_errposition(pstate, exprLocation(node)));
+
+ /*
+ * check_functions_in_node() doesn't check some node types (see
+ * comment there). We handle CoerceToDomain and MinMaxExpr by
+ * checking for built-in types. The other listed node types cannot
+ * call user-definable SQL-visible functions.
+ *
+ * We furthermore need this type check to handle built-in, immutable
+ * polymorphic functions such as array_eq().
+ */
+ if (exprType(node) >= FirstUnpinnedObjectId)
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("generation expression uses user-defined type"),
+ errdetail("Virtual generated columns that make use of user-defined types are not yet supported."),
+ parser_errposition(pstate, exprLocation(node)));
+ }
+
+ return expression_tree_walker(node, check_virtual_generated_security_walker, context);
+}
+
+static void
+check_virtual_generated_security(ParseState *pstate, Node *node)
+{
+ check_virtual_generated_security_walker(node, pstate);
+}
+
+/*
* Take a raw default and convert it to a cooked format ready for
* storage.
*
@@ -3253,6 +3343,10 @@ cookDefault(ParseState *pstate,
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("generation expression is not immutable")));
+
+ /* Check security of expressions for virtual generated column */
+ if (attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
+ check_virtual_generated_security(pstate, expr);
}
else
{
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 739a92bdcc1..aa216683b74 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -3020,7 +3020,7 @@ index_build(Relation heapRelation,
/*
* Determine worker process details for parallel CREATE INDEX. Currently,
- * only btree and BRIN have support for parallel builds.
+ * only btree, GIN, and BRIN have support for parallel builds.
*
* Note that planner considers parallel safety for us.
*/
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 15efb02badb..e5dbbe61b81 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -674,11 +674,6 @@ GRANT SELECT ON pg_backend_memory_contexts TO pg_read_all_stats;
REVOKE EXECUTE ON FUNCTION pg_get_backend_memory_contexts() FROM PUBLIC;
GRANT EXECUTE ON FUNCTION pg_get_backend_memory_contexts() TO pg_read_all_stats;
-REVOKE EXECUTE ON FUNCTION
- pg_get_process_memory_contexts(integer, boolean, float) FROM PUBLIC;
-GRANT EXECUTE ON FUNCTION
- pg_get_process_memory_contexts(integer, boolean, float) TO pg_read_all_stats;
-
-- Statistics views
CREATE VIEW pg_stat_all_tables AS
@@ -900,7 +895,7 @@ CREATE VIEW pg_stat_activity AS
S.wait_event,
S.state,
S.backend_xid,
- s.backend_xmin,
+ S.backend_xmin,
S.query_id,
S.query,
S.backend_type
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 4fffb76e557..7111d5d5334 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -76,7 +76,7 @@ static BufferAccessStrategy vac_strategy;
static void do_analyze_rel(Relation onerel,
- VacuumParams *params, List *va_cols,
+ const VacuumParams params, List *va_cols,
AcquireSampleRowsFunc acquirefunc, BlockNumber relpages,
bool inh, bool in_outer_xact, int elevel);
static void compute_index_stats(Relation onerel, double totalrows,
@@ -107,7 +107,7 @@ static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
*/
void
analyze_rel(Oid relid, RangeVar *relation,
- VacuumParams *params, List *va_cols, bool in_outer_xact,
+ const VacuumParams params, List *va_cols, bool in_outer_xact,
BufferAccessStrategy bstrategy)
{
Relation onerel;
@@ -116,7 +116,7 @@ analyze_rel(Oid relid, RangeVar *relation,
BlockNumber relpages = 0;
/* Select logging level */
- if (params->options & VACOPT_VERBOSE)
+ if (params.options & VACOPT_VERBOSE)
elevel = INFO;
else
elevel = DEBUG2;
@@ -138,8 +138,8 @@ analyze_rel(Oid relid, RangeVar *relation,
*
* Make sure to generate only logs for ANALYZE in this case.
*/
- onerel = vacuum_open_relation(relid, relation, params->options & ~(VACOPT_VACUUM),
- params->log_min_duration >= 0,
+ onerel = vacuum_open_relation(relid, relation, params.options & ~(VACOPT_VACUUM),
+ params.log_min_duration >= 0,
ShareUpdateExclusiveLock);
/* leave if relation could not be opened or locked */
@@ -155,7 +155,7 @@ analyze_rel(Oid relid, RangeVar *relation,
*/
if (!vacuum_is_permitted_for_relation(RelationGetRelid(onerel),
onerel->rd_rel,
- params->options & ~VACOPT_VACUUM))
+ params.options & ~VACOPT_VACUUM))
{
relation_close(onerel, ShareUpdateExclusiveLock);
return;
@@ -227,7 +227,7 @@ analyze_rel(Oid relid, RangeVar *relation,
else
{
/* No need for a WARNING if we already complained during VACUUM */
- if (!(params->options & VACOPT_VACUUM))
+ if (!(params.options & VACOPT_VACUUM))
ereport(WARNING,
(errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables",
RelationGetRelationName(onerel))));
@@ -275,7 +275,7 @@ analyze_rel(Oid relid, RangeVar *relation,
* appropriate acquirefunc for each child table.
*/
static void
-do_analyze_rel(Relation onerel, VacuumParams *params,
+do_analyze_rel(Relation onerel, const VacuumParams params,
List *va_cols, AcquireSampleRowsFunc acquirefunc,
BlockNumber relpages, bool inh, bool in_outer_xact,
int elevel)
@@ -309,9 +309,9 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
PgStat_Counter startreadtime = 0;
PgStat_Counter startwritetime = 0;
- verbose = (params->options & VACOPT_VERBOSE) != 0;
+ verbose = (params.options & VACOPT_VERBOSE) != 0;
instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
- params->log_min_duration >= 0));
+ params.log_min_duration >= 0));
if (inh)
ereport(elevel,
(errmsg("analyzing \"%s.%s\" inheritance tree",
@@ -706,7 +706,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
* amvacuumcleanup() when called in ANALYZE-only mode. The only exception
* among core index AMs is GIN/ginvacuumcleanup().
*/
- if (!(params->options & VACOPT_VACUUM))
+ if (!(params.options & VACOPT_VACUUM))
{
for (ind = 0; ind < nindexes; ind++)
{
@@ -736,9 +736,9 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
{
TimestampTz endtime = GetCurrentTimestamp();
- if (verbose || params->log_min_duration == 0 ||
+ if (verbose || params.log_min_duration == 0 ||
TimestampDifferenceExceeds(starttime, endtime,
- params->log_min_duration))
+ params.log_min_duration))
{
long delay_in_ms;
WalUsage walusage;
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 54a08e4102e..b55221d44cd 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -917,7 +917,7 @@ copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex, bool verb
* not to be aggressive about this.
*/
memset(&params, 0, sizeof(VacuumParams));
- vacuum_get_cutoffs(OldHeap, &params, &cutoffs);
+ vacuum_get_cutoffs(OldHeap, params, &cutoffs);
/*
* FreezeXid will become the table's new relfrozenxid, and that mustn't go
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 74ae42b19a7..fae9c41db65 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -322,11 +322,13 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
}
/*
- * Extract a CopyHeaderChoice value from a DefElem. This is like
- * defGetBoolean() but also accepts the special value "match".
+ * Extract the CopyFormatOptions.header_line value from a DefElem.
+ *
+ * Parses the HEADER option for COPY, which can be a boolean, a non-negative
+ * integer (number of lines to skip), or the special value "match".
*/
-static CopyHeaderChoice
-defGetCopyHeaderChoice(DefElem *def, bool is_from)
+static int
+defGetCopyHeaderOption(DefElem *def, bool is_from)
{
/*
* If no parameter value given, assume "true" is meant.
@@ -335,20 +337,27 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
return COPY_HEADER_TRUE;
/*
- * Allow 0, 1, "true", "false", "on", "off", or "match".
+ * Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or
+ * "match".
*/
switch (nodeTag(def->arg))
{
case T_Integer:
- switch (intVal(def->arg))
{
- case 0:
- return COPY_HEADER_FALSE;
- case 1:
- return COPY_HEADER_TRUE;
- default:
- /* otherwise, error out below */
- break;
+ int ival = intVal(def->arg);
+
+ if (ival < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("a negative integer value cannot be "
+ "specified for %s", def->defname)));
+
+ if (!is_from && ival > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot use multi-line header in COPY TO")));
+
+ return ival;
}
break;
default:
@@ -381,7 +390,8 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
}
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("%s requires a Boolean value or \"match\"",
+ errmsg("%s requires a Boolean value, a non-negative integer, "
+ "or the string \"match\"",
def->defname)));
return COPY_HEADER_FALSE; /* keep compiler quiet */
}
@@ -566,7 +576,7 @@ ProcessCopyOptions(ParseState *pstate,
if (header_specified)
errorConflictingDefElem(defel, pstate);
header_specified = true;
- opts_out->header_line = defGetCopyHeaderChoice(defel, is_from);
+ opts_out->header_line = defGetCopyHeaderOption(defel, is_from);
}
else if (strcmp(defel->defname, "quote") == 0)
{
@@ -769,7 +779,7 @@ ProcessCopyOptions(ParseState *pstate,
errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
/* Check header */
- if (opts_out->binary && opts_out->header_line)
+ if (opts_out->binary && opts_out->header_line != COPY_HEADER_FALSE)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index f5fc346e201..b1ae97b833d 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -771,21 +771,30 @@ static pg_attribute_always_inline bool
NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
{
int fldct;
- bool done;
+ bool done = false;
/* only available for text or csv input */
Assert(!cstate->opts.binary);
/* on input check that the header line is correct if needed */
- if (cstate->cur_lineno == 0 && cstate->opts.header_line)
+ if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
{
ListCell *cur;
TupleDesc tupDesc;
+ int lines_to_skip = cstate->opts.header_line;
+
+ /* If set to "match", one header line is skipped */
+ if (cstate->opts.header_line == COPY_HEADER_MATCH)
+ lines_to_skip = 1;
tupDesc = RelationGetDescr(cstate->rel);
- cstate->cur_lineno++;
- done = CopyReadLine(cstate, is_csv);
+ for (int i = 0; i < lines_to_skip; i++)
+ {
+ cstate->cur_lineno++;
+ if ((done = CopyReadLine(cstate, is_csv)))
+ break;
+ }
if (cstate->opts.header_line == COPY_HEADER_MATCH)
{
@@ -1538,7 +1547,7 @@ GetDecimalFromHex(char hex)
if (isdigit((unsigned char) hex))
return hex - '0';
else
- return tolower((unsigned char) hex) - 'a' + 10;
+ return pg_ascii_tolower((unsigned char) hex) - 'a' + 10;
}
/*
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index f87e405351d..67b94b91cae 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
cstate->file_encoding);
/* if a header has been requested send the line */
- if (cstate->opts.header_line)
+ if (cstate->opts.header_line == COPY_HEADER_TRUE)
{
ListCell *cur;
bool hdr_delim = false;
@@ -835,7 +835,7 @@ BeginCopyTo(ParseState *pstate,
((DR_copy *) dest)->cstate = cstate;
/* Create a QueryDesc requesting no output */
- cstate->queryDesc = CreateQueryDesc(plan, NULL, pstate->p_sourcetext,
+ cstate->queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext,
GetActiveSnapshot(),
InvalidSnapshot,
dest, NULL, NULL, 0);
@@ -845,8 +845,7 @@ BeginCopyTo(ParseState *pstate,
*
* ExecutorStart computes a result tupdesc for us
*/
- if (!ExecutorStart(cstate->queryDesc, 0))
- elog(ERROR, "ExecutorStart() failed unexpectedly");
+ ExecutorStart(cstate->queryDesc, 0);
tupDesc = cstate->queryDesc->tupDesc;
}
diff --git a/src/backend/commands/createas.c b/src/backend/commands/createas.c
index 0a4155773eb..dfd2ab8e862 100644
--- a/src/backend/commands/createas.c
+++ b/src/backend/commands/createas.c
@@ -334,13 +334,12 @@ ExecCreateTableAs(ParseState *pstate, CreateTableAsStmt *stmt,
UpdateActiveSnapshotCommandId();
/* Create a QueryDesc, redirecting output to our tuple receiver */
- queryDesc = CreateQueryDesc(plan, NULL, pstate->p_sourcetext,
+ queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext,
GetActiveSnapshot(), InvalidSnapshot,
dest, params, queryEnv, 0);
/* call ExecutorStart to prepare the plan for execution */
- if (!ExecutorStart(queryDesc, GetIntoRelEFlags(into)))
- elog(ERROR, "ExecutorStart() failed unexpectedly");
+ ExecutorStart(queryDesc, GetIntoRelEFlags(into));
/* run the plan to completion */
ExecutorRun(queryDesc, ForwardScanDirection, 0);
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 5fbbcdaabb1..c95eb945016 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -1065,16 +1065,41 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
/* Check that the chosen locales are valid, and get canonical spellings */
if (!check_locale(LC_COLLATE, dbcollate, &canonname))
- ereport(ERROR,
- (errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate),
- errhint("If the locale name is specific to ICU, use ICU_LOCALE.")));
+ {
+ if (dblocprovider == COLLPROVIDER_BUILTIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate),
+ errhint("If the locale name is specific to the builtin provider, use BUILTIN_LOCALE.")));
+ else if (dblocprovider == COLLPROVIDER_ICU)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate),
+ errhint("If the locale name is specific to the ICU provider, use ICU_LOCALE.")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate)));
+ }
dbcollate = canonname;
if (!check_locale(LC_CTYPE, dbctype, &canonname))
- ereport(ERROR,
- (errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype),
- errhint("If the locale name is specific to ICU, use ICU_LOCALE.")));
+ {
+ if (dblocprovider == COLLPROVIDER_BUILTIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype),
+ errhint("If the locale name is specific to the builtin provider, use BUILTIN_LOCALE.")));
+ else if (dblocprovider == COLLPROVIDER_ICU)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype),
+ errhint("If the locale name is specific to the ICU provider, use ICU_LOCALE.")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype)));
+ }
+
dbctype = canonname;
check_encoding_locale_matches(encoding, dbcollate, dbctype);
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 786ee865f14..7e2792ead71 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -369,8 +369,7 @@ standard_ExplainOneQuery(Query *query, int cursorOptions,
}
/* run it (if needed) and produce output */
- ExplainOnePlan(plan, NULL, NULL, -1, into, es, queryString, params,
- queryEnv,
+ ExplainOnePlan(plan, into, es, queryString, params, queryEnv,
&planduration, (es->buffers ? &bufusage : NULL),
es->memory ? &mem_counters : NULL);
}
@@ -492,9 +491,7 @@ ExplainOneUtility(Node *utilityStmt, IntoClause *into, ExplainState *es,
* to call it.
*/
void
-ExplainOnePlan(PlannedStmt *plannedstmt, CachedPlan *cplan,
- CachedPlanSource *plansource, int query_index,
- IntoClause *into, ExplainState *es,
+ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
const char *queryString, ParamListInfo params,
QueryEnvironment *queryEnv, const instr_time *planduration,
const BufferUsage *bufusage,
@@ -550,7 +547,7 @@ ExplainOnePlan(PlannedStmt *plannedstmt, CachedPlan *cplan,
dest = None_Receiver;
/* Create a QueryDesc for the query */
- queryDesc = CreateQueryDesc(plannedstmt, cplan, queryString,
+ queryDesc = CreateQueryDesc(plannedstmt, queryString,
GetActiveSnapshot(), InvalidSnapshot,
dest, params, queryEnv, instrument_option);
@@ -564,17 +561,8 @@ ExplainOnePlan(PlannedStmt *plannedstmt, CachedPlan *cplan,
if (into)
eflags |= GetIntoRelEFlags(into);
- /* Prepare the plan for execution. */
- if (queryDesc->cplan)
- {
- ExecutorStartCachedPlan(queryDesc, eflags, plansource, query_index);
- Assert(queryDesc->planstate);
- }
- else
- {
- if (!ExecutorStart(queryDesc, eflags))
- elog(ERROR, "ExecutorStart() failed unexpectedly");
- }
+ /* call ExecutorStart to prepare the plan for execution */
+ ExecutorStart(queryDesc, eflags);
/* Execute the plan for statistics if asked for */
if (es->analyze)
@@ -823,14 +811,10 @@ ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc)
* the queryid in any of the EXPLAIN plans to keep stable the results
* generated by regression test suites.
*/
- if (es->verbose && queryDesc->plannedstmt->queryId != UINT64CONST(0) &&
+ if (es->verbose && queryDesc->plannedstmt->queryId != INT64CONST(0) &&
compute_query_id != COMPUTE_QUERY_ID_REGRESS)
{
- /*
- * Output the queryid as an int64 rather than a uint64 so we match
- * what would be seen in the BIGINT pg_stat_statements.queryid column.
- */
- ExplainPropertyInteger("Query Identifier", NULL, (int64)
+ ExplainPropertyInteger("Query Identifier", NULL,
queryDesc->plannedstmt->queryId, es);
}
}
@@ -1232,6 +1216,10 @@ ExplainPreScanNode(PlanState *planstate, Bitmapset **rels_used)
if (((ModifyTable *) plan)->exclRelRTI)
*rels_used = bms_add_member(*rels_used,
((ModifyTable *) plan)->exclRelRTI);
+ /* Ensure Vars used in RETURNING will have refnames */
+ if (plan->targetlist)
+ *rels_used = bms_add_member(*rels_used,
+ linitial_int(((ModifyTable *) plan)->resultRelations));
break;
case T_Append:
*rels_used = bms_add_members(*rels_used,
diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c
index 73c52e970f6..e6f9ab6dfd6 100644
--- a/src/backend/commands/extension.c
+++ b/src/backend/commands/extension.c
@@ -993,13 +993,11 @@ execute_sql_string(const char *sql, const char *filename)
QueryDesc *qdesc;
qdesc = CreateQueryDesc(stmt,
- NULL,
sql,
GetActiveSnapshot(), NULL,
dest, NULL, NULL, 0);
- if (!ExecutorStart(qdesc, 0))
- elog(ERROR, "ExecutorStart() failed unexpectedly");
+ ExecutorStart(qdesc, 0);
ExecutorRun(qdesc, ForwardScanDirection, 0);
ExecutorFinish(qdesc);
ExecutorEnd(qdesc);
diff --git a/src/backend/commands/foreigncmds.c b/src/backend/commands/foreigncmds.c
index c14e038d54f..8d2d7431544 100644
--- a/src/backend/commands/foreigncmds.c
+++ b/src/backend/commands/foreigncmds.c
@@ -71,15 +71,26 @@ optionListToArray(List *options)
foreach(cell, options)
{
DefElem *def = lfirst(cell);
+ const char *name;
const char *value;
Size len;
text *t;
+ name = def->defname;
value = defGetString(def);
- len = VARHDRSZ + strlen(def->defname) + 1 + strlen(value);
+
+ /* Insist that name not contain "=", else "a=b=c" is ambiguous */
+ if (strchr(name, '=') != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid option name \"%s\": must not contain \"=\"",
+ name)));
+
+ len = VARHDRSZ + strlen(name) + 1 + strlen(value);
+ /* +1 leaves room for sprintf's trailing null */
t = palloc(len + 1);
SET_VARSIZE(t, len);
- sprintf(VARDATA(t), "%s=%s", def->defname, value);
+ sprintf(VARDATA(t), "%s=%s", name, value);
astate = accumArrayResult(astate, PointerGetDatum(t),
false, TEXTOID,
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index d962fe392cd..6f753ab6d7a 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -2469,8 +2469,8 @@ GetOperatorFromCompareType(Oid opclass, Oid rhstype, CompareType cmptype,
cmptype == COMPARE_EQ ? errmsg("could not identify an equality operator for type %s", format_type_be(opcintype)) :
cmptype == COMPARE_OVERLAP ? errmsg("could not identify an overlaps operator for type %s", format_type_be(opcintype)) :
cmptype == COMPARE_CONTAINED_BY ? errmsg("could not identify a contained-by operator for type %s", format_type_be(opcintype)) : 0,
- errdetail("Could not translate compare type %d for operator family \"%s\", input type %s, access method \"%s\".",
- cmptype, get_opfamily_name(opfamily, false), format_type_be(opcintype), get_am_name(amid)));
+ errdetail("Could not translate compare type %d for operator family \"%s\" of access method \"%s\".",
+ cmptype, get_opfamily_name(opfamily, false), get_am_name(amid)));
/*
* We parameterize rhstype so foreign keys can ask for a <@ operator
@@ -2592,7 +2592,9 @@ makeObjectName(const char *name1, const char *name2, const char *label)
* constraint names.)
*
* Note: it is theoretically possible to get a collision anyway, if someone
- * else chooses the same name concurrently. This is fairly unlikely to be
+ * else chooses the same name concurrently. We shorten the race condition
+ * window by checking for conflicting relations using SnapshotDirty, but
+ * that doesn't close the window entirely. This is fairly unlikely to be
* a problem in practice, especially if one is holding an exclusive lock on
* the relation identified by name1. However, if choosing multiple names
* within a single command, you'd better create the new object and do
@@ -2608,15 +2610,45 @@ ChooseRelationName(const char *name1, const char *name2,
int pass = 0;
char *relname = NULL;
char modlabel[NAMEDATALEN];
+ SnapshotData SnapshotDirty;
+ Relation pgclassrel;
+
+ /* prepare to search pg_class with a dirty snapshot */
+ InitDirtySnapshot(SnapshotDirty);
+ pgclassrel = table_open(RelationRelationId, AccessShareLock);
/* try the unmodified label first */
strlcpy(modlabel, label, sizeof(modlabel));
for (;;)
{
+ ScanKeyData key[2];
+ SysScanDesc scan;
+ bool collides;
+
relname = makeObjectName(name1, name2, modlabel);
- if (!OidIsValid(get_relname_relid(relname, namespaceid)))
+ /* is there any conflicting relation name? */
+ ScanKeyInit(&key[0],
+ Anum_pg_class_relname,
+ BTEqualStrategyNumber, F_NAMEEQ,
+ CStringGetDatum(relname));
+ ScanKeyInit(&key[1],
+ Anum_pg_class_relnamespace,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(namespaceid));
+
+ scan = systable_beginscan(pgclassrel, ClassNameNspIndexId,
+ true /* indexOK */ ,
+ &SnapshotDirty,
+ 2, key);
+
+ collides = HeapTupleIsValid(systable_getnext(scan));
+
+ systable_endscan(scan);
+
+ /* break out of loop if no conflict */
+ if (!collides)
{
if (!isconstraint ||
!ConstraintNameExists(relname, namespaceid))
@@ -2628,6 +2660,8 @@ ChooseRelationName(const char *name1, const char *name2,
snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
}
+ table_close(pgclassrel, AccessShareLock);
+
return relname;
}
@@ -4226,7 +4260,7 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein
false);
/*
- * Updating pg_index might involve TOAST table access, so ensure we
+ * Swapping the indexes might involve TOAST table access, so ensure we
* have a valid snapshot.
*/
PushActiveSnapshot(GetTransactionSnapshot());
diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c
index e7854add178..188e26f0e6e 100644
--- a/src/backend/commands/matview.c
+++ b/src/backend/commands/matview.c
@@ -438,13 +438,12 @@ refresh_matview_datafill(DestReceiver *dest, Query *query,
UpdateActiveSnapshotCommandId();
/* Create a QueryDesc, redirecting output to our tuple receiver */
- queryDesc = CreateQueryDesc(plan, NULL, queryString,
+ queryDesc = CreateQueryDesc(plan, queryString,
GetActiveSnapshot(), InvalidSnapshot,
dest, NULL, NULL, 0);
/* call ExecutorStart to prepare the plan for execution */
- if (!ExecutorStart(queryDesc, 0))
- elog(ERROR, "ExecutorStart() failed unexpectedly");
+ ExecutorStart(queryDesc, 0);
/* run the plan */
ExecutorRun(queryDesc, ForwardScanDirection, 0);
@@ -836,7 +835,8 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner,
if (!foundUniqueIndex)
ereport(ERROR,
errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("could not find suitable unique index on materialized view"));
+ errmsg("could not find suitable unique index on materialized view \"%s\"",
+ RelationGetRelationName(matviewRel)));
appendStringInfoString(&querybuf,
" AND newdata.* OPERATOR(pg_catalog.*=) mv.*) "
diff --git a/src/backend/commands/portalcmds.c b/src/backend/commands/portalcmds.c
index 4c2ac045224..e7c8171c102 100644
--- a/src/backend/commands/portalcmds.c
+++ b/src/backend/commands/portalcmds.c
@@ -117,7 +117,6 @@ PerformCursorOpen(ParseState *pstate, DeclareCursorStmt *cstmt, ParamListInfo pa
queryString,
CMDTAG_SELECT, /* cursor's query is always a SELECT */
list_make1(plan),
- NULL,
NULL);
/*----------
diff --git a/src/backend/commands/prepare.c b/src/backend/commands/prepare.c
index bf7d2b2309f..34b6410d6a2 100644
--- a/src/backend/commands/prepare.c
+++ b/src/backend/commands/prepare.c
@@ -205,8 +205,7 @@ ExecuteQuery(ParseState *pstate,
query_string,
entry->plansource->commandTag,
plan_list,
- cplan,
- entry->plansource);
+ cplan);
/*
* For CREATE TABLE ... AS EXECUTE, we must verify that the prepared
@@ -586,7 +585,6 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es,
MemoryContextCounters mem_counters;
MemoryContext planner_ctx = NULL;
MemoryContext saved_ctx = NULL;
- int query_index = 0;
if (es->memory)
{
@@ -659,8 +657,7 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es,
PlannedStmt *pstmt = lfirst_node(PlannedStmt, p);
if (pstmt->commandType != CMD_UTILITY)
- ExplainOnePlan(pstmt, cplan, entry->plansource, query_index,
- into, es, query_string, paramLI, pstate->p_queryEnv,
+ ExplainOnePlan(pstmt, into, es, query_string, paramLI, pstate->p_queryEnv,
&planduration, (es->buffers ? &bufusage : NULL),
es->memory ? &mem_counters : NULL);
else
@@ -671,8 +668,6 @@ ExplainExecuteQuery(ExecuteStmt *execstmt, IntoClause *into, ExplainState *es,
/* Separate plans with an appropriate separator */
if (lnext(plan_list, p) != NULL)
ExplainSeparatePlans(es);
-
- query_index++;
}
if (estate)
diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c
index 0b23d94c38e..1bf7eaae5b3 100644
--- a/src/backend/commands/publicationcmds.c
+++ b/src/backend/commands/publicationcmds.c
@@ -2130,8 +2130,8 @@ defGetGeneratedColsOption(DefElem *def)
ereport(ERROR,
errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("%s requires a \"none\" or \"stored\" value",
- def->defname));
+ errmsg("invalid value for publication parameter \"%s\": \"%s\"", def->defname, sval),
+ errdetail("Valid values are \"%s\" and \"%s\".", "none", "stored"));
return PUBLISH_GENCOLS_NONE; /* keep compiler quiet */
}
diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c
index 4aec73bcc6b..4ff246cd943 100644
--- a/src/backend/commands/subscriptioncmds.c
+++ b/src/backend/commands/subscriptioncmds.c
@@ -1267,7 +1267,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt,
IsSet(opts.specified_opts, SUBOPT_SLOT_NAME))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("slot_name and two_phase cannot be altered at the same time")));
+ errmsg("\"slot_name\" and \"two_phase\" cannot be altered at the same time")));
/*
* Note that workers may still survive even if the
@@ -1283,7 +1283,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt,
if (logicalrep_workers_find(subid, true, true))
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("cannot alter two_phase when logical replication worker is still running"),
+ errmsg("cannot alter \"two_phase\" when logical replication worker is still running"),
errhint("Try again after some time.")));
/*
@@ -1297,7 +1297,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt,
LookupGXactBySubid(subid))
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("cannot disable two_phase when prepared transactions are present"),
+ errmsg("cannot disable \"two_phase\" when prepared transactions exist"),
errhint("Resolve these transactions and try again.")));
/* Change system catalog accordingly */
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 54ad38247aa..cb811520c29 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -430,8 +430,8 @@ static void AlterConstrUpdateConstraintEntry(ATAlterConstraint *cmdcon, Relation
static ObjectAddress ATExecValidateConstraint(List **wqueue,
Relation rel, char *constrName,
bool recurse, bool recursing, LOCKMODE lockmode);
-static void QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
- HeapTuple contuple, LOCKMODE lockmode);
+static void QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation fkrel,
+ Oid pkrelid, HeapTuple contuple, LOCKMODE lockmode);
static void QueueCheckConstraintValidation(List **wqueue, Relation conrel, Relation rel,
char *constrName, HeapTuple contuple,
bool recurse, bool recursing, LOCKMODE lockmode);
@@ -2711,8 +2711,7 @@ MergeAttributes(List *columns, const List *supers, char relpersistence,
RelationGetRelationName(relation))));
/* If existing rel is temp, it must belong to this session */
- if (relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
- !relation->rd_islocaltemp)
+ if (RELATION_IS_OTHER_TEMP(relation))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg(!is_partition
@@ -7374,7 +7373,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel,
/* make sure datatype is legal for a column */
CheckAttributeType(NameStr(attribute->attname), attribute->atttypid, attribute->attcollation,
list_make1_oid(rel->rd_rel->reltype),
- 0);
+ (attribute->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL ? CHKATYPE_IS_VIRTUAL : 0));
InsertPgAttributeTuples(attrdesc, tupdesc, myrelid, NULL, NULL);
@@ -8609,7 +8608,7 @@ ATExecSetExpression(AlteredTableInfo *tab, Relation rel, const char *colName,
rel->rd_att->constr && rel->rd_att->constr->num_check > 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns on tables with check constraints"),
+ errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns in tables with check constraints"),
errdetail("Column \"%s\" of relation \"%s\" is a virtual generated column.",
colName, RelationGetRelationName(rel))));
@@ -8627,7 +8626,7 @@ ATExecSetExpression(AlteredTableInfo *tab, Relation rel, const char *colName,
GetRelationPublications(RelationGetRelid(rel)) != NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns on tables that are part of a publication"),
+ errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns in tables that are part of a publication"),
errdetail("Column \"%s\" of relation \"%s\" is a virtual generated column.",
colName, RelationGetRelationName(rel))));
@@ -10189,7 +10188,7 @@ ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
if (pk_has_without_overlaps && !with_period)
ereport(ERROR,
errcode(ERRCODE_INVALID_FOREIGN_KEY),
- errmsg("foreign key must use PERIOD when referencing a primary using WITHOUT OVERLAPS"));
+ errmsg("foreign key must use PERIOD when referencing a primary key using WITHOUT OVERLAPS"));
/*
* Now we can check permissions.
@@ -10330,8 +10329,8 @@ ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
for_overlaps
? errmsg("could not identify an overlaps operator for foreign key")
: errmsg("could not identify an equality operator for foreign key"),
- errdetail("Could not translate compare type %d for operator family \"%s\", input type %s, access method \"%s\".",
- cmptype, get_opfamily_name(opfamily, false), format_type_be(opcintype), get_am_name(amid)));
+ errdetail("Could not translate compare type %d for operator family \"%s\" of access method \"%s\".",
+ cmptype, get_opfamily_name(opfamily, false), get_am_name(amid)));
/*
* There had better be a primary equality operator for the index.
@@ -11858,6 +11857,7 @@ AttachPartitionForeignKey(List **wqueue,
if (queueValidation)
{
Relation conrel;
+ Oid confrelid;
conrel = table_open(ConstraintRelationId, RowExclusiveLock);
@@ -11865,9 +11865,11 @@ AttachPartitionForeignKey(List **wqueue,
if (!HeapTupleIsValid(partcontup))
elog(ERROR, "cache lookup failed for constraint %u", partConstrOid);
+ confrelid = ((Form_pg_constraint) GETSTRUCT(partcontup))->confrelid;
+
/* Use the same lock as for AT_ValidateConstraint */
- QueueFKConstraintValidation(wqueue, conrel, partition, partcontup,
- ShareUpdateExclusiveLock);
+ QueueFKConstraintValidation(wqueue, conrel, partition, confrelid,
+ partcontup, ShareUpdateExclusiveLock);
ReleaseSysCache(partcontup);
table_close(conrel, RowExclusiveLock);
}
@@ -12463,9 +12465,12 @@ ATExecAlterConstrEnforceability(List **wqueue, ATAlterConstraint *cmdcon,
/*
* Tell Phase 3 to check that the constraint is satisfied by existing
- * rows.
+ * rows. Only applies to leaf partitions, and (for constraints that
+ * reference a partitioned table) only if this is not one of the
+ * pg_constraint rows that exist solely to support action triggers.
*/
- if (rel->rd_rel->relkind == RELKIND_RELATION)
+ if (rel->rd_rel->relkind == RELKIND_RELATION &&
+ currcon->confrelid == pkrelid)
{
AlteredTableInfo *tab;
NewConstraint *newcon;
@@ -12907,8 +12912,9 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
con->contype != CONSTRAINT_NOTNULL)
ereport(ERROR,
errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("constraint \"%s\" of relation \"%s\" is not a foreign key, check, or not-null constraint",
- constrName, RelationGetRelationName(rel)));
+ errmsg("cannot validate constraint \"%s\" of relation \"%s\"",
+ constrName, RelationGetRelationName(rel)),
+ errdetail("This operation is not supported for this type of constraint."));
if (!con->conenforced)
ereport(ERROR,
@@ -12919,7 +12925,8 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
{
if (con->contype == CONSTRAINT_FOREIGN)
{
- QueueFKConstraintValidation(wqueue, conrel, rel, tuple, lockmode);
+ QueueFKConstraintValidation(wqueue, conrel, rel, con->confrelid,
+ tuple, lockmode);
}
else if (con->contype == CONSTRAINT_CHECK)
{
@@ -12952,8 +12959,8 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName,
* for the specified relation and all its children.
*/
static void
-QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
- HeapTuple contuple, LOCKMODE lockmode)
+QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation fkrel,
+ Oid pkrelid, HeapTuple contuple, LOCKMODE lockmode)
{
Form_pg_constraint con;
AlteredTableInfo *tab;
@@ -12964,7 +12971,17 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
Assert(con->contype == CONSTRAINT_FOREIGN);
Assert(!con->convalidated);
- if (rel->rd_rel->relkind == RELKIND_RELATION)
+ /*
+ * Add the validation to phase 3's queue; not needed for partitioned
+ * tables themselves, only for their partitions.
+ *
+ * When the referenced table (pkrelid) is partitioned, the referencing
+ * table (fkrel) has one pg_constraint row pointing to each partition
+ * thereof. These rows are there only to support action triggers and no
+ * table scan is needed, therefore skip this for them as well.
+ */
+ if (fkrel->rd_rel->relkind == RELKIND_RELATION &&
+ con->confrelid == pkrelid)
{
NewConstraint *newcon;
Constraint *fkconstraint;
@@ -12983,15 +13000,16 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
newcon->qual = (Node *) fkconstraint;
/* Find or create work queue entry for this table */
- tab = ATGetQueueEntry(wqueue, rel);
+ tab = ATGetQueueEntry(wqueue, fkrel);
tab->constraints = lappend(tab->constraints, newcon);
}
/*
* If the table at either end of the constraint is partitioned, we need to
- * recurse and handle every constraint that is a child of this constraint.
+ * recurse and handle every unvalidate constraint that is a child of this
+ * constraint.
*/
- if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
+ if (fkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
get_rel_relkind(con->confrelid) == RELKIND_PARTITIONED_TABLE)
{
ScanKeyData pkey;
@@ -13023,8 +13041,12 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
childrel = table_open(childcon->conrelid, lockmode);
- QueueFKConstraintValidation(wqueue, conrel, childrel, childtup,
- lockmode);
+ /*
+ * NB: Note that pkrelid should be passed as-is during recursion,
+ * as it is required to identify the root referenced table.
+ */
+ QueueFKConstraintValidation(wqueue, conrel, childrel, pkrelid,
+ childtup, lockmode);
table_close(childrel, NoLock);
}
@@ -13032,7 +13054,11 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel,
}
/*
- * Now update the catalog, while we have the door open.
+ * Now mark the pg_constraint row as validated (even if we didn't check,
+ * notably the ones for partitions on the referenced side).
+ *
+ * We rely on transaction abort to roll back this change if phase 3
+ * ultimately finds violating rows. This is a bit ugly.
*/
copyTuple = heap_copytuple(contuple);
copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple);
@@ -14400,7 +14426,7 @@ ATPrepAlterColumnType(List **wqueue,
/* make sure datatype is legal for a column */
CheckAttributeType(colName, targettype, targetcollid,
list_make1_oid(rel->rd_rel->reltype),
- 0);
+ (attTup->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL ? CHKATYPE_IS_VIRTUAL : 0));
if (attTup->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
{
@@ -14458,6 +14484,9 @@ ATPrepAlterColumnType(List **wqueue,
/* Fix collations after all else */
assign_expr_collations(pstate, transform);
+ /* Expand virtual generated columns in the expr. */
+ transform = expand_generated_columns_in_expr(transform, rel, 1);
+
/* Plan the expr now so we can accurately assess the need to rewrite. */
transform = (Node *) expression_planner((Expr *) transform);
@@ -15385,9 +15414,12 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
/*
* Re-parse the index and constraint definitions, and attach them to the
* appropriate work queue entries. We do this before dropping because in
- * the case of a FOREIGN KEY constraint, we might not yet have exclusive
- * lock on the table the constraint is attached to, and we need to get
- * that before reparsing/dropping.
+ * the case of a constraint on another table, we might not yet have
+ * exclusive lock on the table the constraint is attached to, and we need
+ * to get that before reparsing/dropping. (That's possible at least for
+ * FOREIGN KEY, CHECK, and EXCLUSION constraints; in non-FK cases it
+ * requires a dependency on the target table's composite type in the other
+ * table's constraint expressions.)
*
* We can't rely on the output of deparsing to tell us which relation to
* operate on, because concurrent activity might have made the name
@@ -15403,7 +15435,6 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
Form_pg_constraint con;
Oid relid;
Oid confrelid;
- char contype;
bool conislocal;
tup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(oldId));
@@ -15420,7 +15451,6 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
elog(ERROR, "could not identify relation associated with constraint %u", oldId);
}
confrelid = con->confrelid;
- contype = con->contype;
conislocal = con->conislocal;
ReleaseSysCache(tup);
@@ -15438,12 +15468,12 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
continue;
/*
- * When rebuilding an FK constraint that references the table we're
- * modifying, we might not yet have any lock on the FK's table, so get
- * one now. We'll need AccessExclusiveLock for the DROP CONSTRAINT
- * step, so there's no value in asking for anything weaker.
+ * When rebuilding another table's constraint that references the
+ * table we're modifying, we might not yet have any lock on the other
+ * table, so get one now. We'll need AccessExclusiveLock for the DROP
+ * CONSTRAINT step, so there's no value in asking for anything weaker.
*/
- if (relid != tab->relid && contype == CONSTRAINT_FOREIGN)
+ if (relid != tab->relid)
LockRelationOid(relid, AccessExclusiveLock);
ATPostAlterTypeParse(oldId, relid, confrelid,
@@ -15457,6 +15487,14 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
Oid relid;
relid = IndexGetRelation(oldId, false);
+
+ /*
+ * As above, make sure we have lock on the index's table if it's not
+ * the same table.
+ */
+ if (relid != tab->relid)
+ LockRelationOid(relid, AccessExclusiveLock);
+
ATPostAlterTypeParse(oldId, relid, InvalidOid,
(char *) lfirst(def_item),
wqueue, lockmode, tab->rewrite);
@@ -15473,6 +15511,20 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode)
Oid relid;
relid = StatisticsGetRelation(oldId, false);
+
+ /*
+ * As above, make sure we have lock on the statistics object's table
+ * if it's not the same table. However, we take
+ * ShareUpdateExclusiveLock here, aligning with the lock level used in
+ * CreateStatistics and RemoveStatisticsById.
+ *
+ * CAUTION: this should be done after all cases that grab
+ * AccessExclusiveLock, else we risk causing deadlock due to needing
+ * to promote our table lock.
+ */
+ if (relid != tab->relid)
+ LockRelationOid(relid, ShareUpdateExclusiveLock);
+
ATPostAlterTypeParse(oldId, relid, InvalidOid,
(char *) lfirst(def_item),
wqueue, lockmode, tab->rewrite);
@@ -15696,7 +15748,7 @@ ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd,
{
AlterDomainStmt *stmt = (AlterDomainStmt *) stm;
- if (stmt->subtype == 'C') /* ADD CONSTRAINT */
+ if (stmt->subtype == AD_AddConstraint)
{
Constraint *con = castNode(Constraint, stmt->def);
AlterTableCmd *cmd = makeNode(AlterTableCmd);
@@ -17199,15 +17251,13 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode)
RelationGetRelationName(parent_rel))));
/* If parent rel is temp, it must belong to this session */
- if (parent_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
- !parent_rel->rd_islocaltemp)
+ if (RELATION_IS_OTHER_TEMP(parent_rel))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot inherit from temporary relation of another session")));
/* Ditto for the child */
- if (child_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
- !child_rel->rd_islocaltemp)
+ if (RELATION_IS_OTHER_TEMP(child_rel))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot inherit to temporary relation of another session")));
@@ -20278,15 +20328,13 @@ ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd,
RelationGetRelationName(rel))));
/* If the parent is temp, it must belong to this session */
- if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
- !rel->rd_islocaltemp)
+ if (RELATION_IS_OTHER_TEMP(rel))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot attach as partition of temporary relation of another session")));
/* Ditto for the partition */
- if (attachrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP &&
- !attachrel->rd_islocaltemp)
+ if (RELATION_IS_OTHER_TEMP(attachrel))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("cannot attach temporary relation of another session as partition")));
@@ -20964,9 +21012,17 @@ ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab, Relation rel,
tab->rel = rel;
}
+ /*
+ * Detaching the partition might involve TOAST table access, so ensure we
+ * have a valid snapshot.
+ */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
/* Do the final part of detaching */
DetachPartitionFinalize(rel, partRel, concurrent, defaultPartOid);
+ PopActiveSnapshot();
+
ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel));
/* keep our lock until commit */
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index c9f61130c69..67f8e70f9c1 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -5058,21 +5058,6 @@ AfterTriggerBeginQuery(void)
/* ----------
- * AfterTriggerAbortQuery()
- *
- * Called by standard_ExecutorEnd() if the query execution was aborted due to
- * the plan becoming invalid during initialization.
- * ----------
- */
-void
-AfterTriggerAbortQuery(void)
-{
- /* Revert the actions of AfterTriggerBeginQuery(). */
- afterTriggers.query_depth--;
-}
-
-
-/* ----------
* AfterTriggerEndQuery()
*
* Called after one query has been completely processed. At this time
diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c
index 45ae7472ab5..26d985193ae 100644
--- a/src/backend/commands/typecmds.c
+++ b/src/backend/commands/typecmds.c
@@ -939,11 +939,19 @@ DefineDomain(ParseState *pstate, CreateDomainStmt *stmt)
break;
case CONSTR_NOTNULL:
- if (nullDefined && !typNotNull)
+ if (nullDefined)
+ {
+ if (!typNotNull)
+ ereport(ERROR,
+ errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("conflicting NULL/NOT NULL constraints"),
+ parser_errposition(pstate, constr->location));
+
ereport(ERROR,
- errcode(ERRCODE_SYNTAX_ERROR),
- errmsg("conflicting NULL/NOT NULL constraints"),
+ errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("redundant NOT NULL constraint definition"),
parser_errposition(pstate, constr->location));
+ }
if (constr->is_no_inherit)
ereport(ERROR,
errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 33a33bf6b1c..733ef40ae7c 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -56,6 +56,7 @@
#include "utils/fmgroids.h"
#include "utils/guc.h"
#include "utils/guc_hooks.h"
+#include "utils/injection_point.h"
#include "utils/memutils.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
@@ -123,7 +124,7 @@ static void vac_truncate_clog(TransactionId frozenXID,
MultiXactId minMulti,
TransactionId lastSaneFrozenXid,
MultiXactId lastSaneMinMulti);
-static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
+static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
BufferAccessStrategy bstrategy);
static double compute_parallel_delay(void);
static VacOptValue get_vacoptval_from_boolean(DefElem *def);
@@ -464,7 +465,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
}
/* Now go through the common routine */
- vacuum(vacstmt->rels, &params, bstrategy, vac_context, isTopLevel);
+ vacuum(vacstmt->rels, params, bstrategy, vac_context, isTopLevel);
/* Finally, clean up the vacuum memory context */
MemoryContextDelete(vac_context);
@@ -493,7 +494,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
* memory context that will not disappear at transaction commit.
*/
void
-vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
+vacuum(List *relations, const VacuumParams params, BufferAccessStrategy bstrategy,
MemoryContext vac_context, bool isTopLevel)
{
static bool in_vacuum = false;
@@ -502,9 +503,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
volatile bool in_outer_xact,
use_own_xacts;
- Assert(params != NULL);
-
- stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
+ stmttype = (params.options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE";
/*
* We cannot run VACUUM inside a user transaction block; if we were inside
@@ -514,7 +513,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
*
* ANALYZE (without VACUUM) can run either way.
*/
- if (params->options & VACOPT_VACUUM)
+ if (params.options & VACOPT_VACUUM)
{
PreventInTransactionBlock(isTopLevel, stmttype);
in_outer_xact = false;
@@ -537,7 +536,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
* Build list of relation(s) to process, putting any new data in
* vac_context for safekeeping.
*/
- if (params->options & VACOPT_ONLY_DATABASE_STATS)
+ if (params.options & VACOPT_ONLY_DATABASE_STATS)
{
/* We don't process any tables in this case */
Assert(relations == NIL);
@@ -553,7 +552,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
List *sublist;
MemoryContext old_context;
- sublist = expand_vacuum_rel(vrel, vac_context, params->options);
+ sublist = expand_vacuum_rel(vrel, vac_context, params.options);
old_context = MemoryContextSwitchTo(vac_context);
newrels = list_concat(newrels, sublist);
MemoryContextSwitchTo(old_context);
@@ -561,7 +560,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
relations = newrels;
}
else
- relations = get_all_vacuum_rels(vac_context, params->options);
+ relations = get_all_vacuum_rels(vac_context, params.options);
/*
* Decide whether we need to start/commit our own transactions.
@@ -577,11 +576,11 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
* transaction block, and also in an autovacuum worker, use own
* transactions so we can release locks sooner.
*/
- if (params->options & VACOPT_VACUUM)
+ if (params.options & VACOPT_VACUUM)
use_own_xacts = true;
else
{
- Assert(params->options & VACOPT_ANALYZE);
+ Assert(params.options & VACOPT_ANALYZE);
if (AmAutoVacuumWorkerProcess())
use_own_xacts = true;
else if (in_outer_xact)
@@ -632,13 +631,13 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
{
VacuumRelation *vrel = lfirst_node(VacuumRelation, cur);
- if (params->options & VACOPT_VACUUM)
+ if (params.options & VACOPT_VACUUM)
{
if (!vacuum_rel(vrel->oid, vrel->relation, params, bstrategy))
continue;
}
- if (params->options & VACOPT_ANALYZE)
+ if (params.options & VACOPT_ANALYZE)
{
/*
* If using separate xacts, start one for analyze. Otherwise,
@@ -702,8 +701,8 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy,
StartTransactionCommand();
}
- if ((params->options & VACOPT_VACUUM) &&
- !(params->options & VACOPT_SKIP_DATABASE_STATS))
+ if ((params.options & VACOPT_VACUUM) &&
+ !(params.options & VACOPT_SKIP_DATABASE_STATS))
{
/*
* Update pg_database.datfrozenxid, and truncate pg_xact if possible.
@@ -1101,7 +1100,7 @@ get_all_vacuum_rels(MemoryContext vac_context, int options)
* minimum).
*/
bool
-vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
+vacuum_get_cutoffs(Relation rel, const VacuumParams params,
struct VacuumCutoffs *cutoffs)
{
int freeze_min_age,
@@ -1117,10 +1116,10 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams *params,
aggressiveMXIDCutoff;
/* Use mutable copies of freeze age parameters */
- freeze_min_age = params->freeze_min_age;
- multixact_freeze_min_age = params->multixact_freeze_min_age;
- freeze_table_age = params->freeze_table_age;
- multixact_freeze_table_age = params->multixact_freeze_table_age;
+ freeze_min_age = params.freeze_min_age;
+ multixact_freeze_min_age = params.multixact_freeze_min_age;
+ freeze_table_age = params.freeze_table_age;
+ multixact_freeze_table_age = params.multixact_freeze_table_age;
/* Set pg_class fields in cutoffs */
cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid;
@@ -1997,7 +1996,7 @@ vac_truncate_clog(TransactionId frozenXID,
* At entry and exit, we are not inside a transaction.
*/
static bool
-vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
+vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params,
BufferAccessStrategy bstrategy)
{
LOCKMODE lmode;
@@ -2008,13 +2007,18 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
Oid save_userid;
int save_sec_context;
int save_nestlevel;
+ VacuumParams toast_vacuum_params;
- Assert(params != NULL);
+ /*
+ * This function scribbles on the parameters, so make a copy early to
+ * avoid affecting the TOAST table (if we do end up recursing to it).
+ */
+ memcpy(&toast_vacuum_params, &params, sizeof(VacuumParams));
/* Begin a transaction for vacuuming this relation */
StartTransactionCommand();
- if (!(params->options & VACOPT_FULL))
+ if (!(params.options & VACOPT_FULL))
{
/*
* In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets
@@ -2040,7 +2044,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
*/
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
MyProc->statusFlags |= PROC_IN_VACUUM;
- if (params->is_wraparound)
+ if (params.is_wraparound)
MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND;
ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
LWLockRelease(ProcArrayLock);
@@ -2064,12 +2068,12 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
* vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either
* way, we can be sure that no other backend is vacuuming the same table.
*/
- lmode = (params->options & VACOPT_FULL) ?
+ lmode = (params.options & VACOPT_FULL) ?
AccessExclusiveLock : ShareUpdateExclusiveLock;
/* open the relation and get the appropriate lock on it */
- rel = vacuum_open_relation(relid, relation, params->options,
- params->log_min_duration >= 0, lmode);
+ rel = vacuum_open_relation(relid, relation, params.options,
+ params.log_min_duration >= 0, lmode);
/* leave if relation could not be opened or locked */
if (!rel)
@@ -2084,8 +2088,8 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
* This is only safe to do because we hold a session lock on the main
* relation that prevents concurrent deletion.
*/
- if (OidIsValid(params->toast_parent))
- priv_relid = params->toast_parent;
+ if (OidIsValid(params.toast_parent))
+ priv_relid = params.toast_parent;
else
priv_relid = RelationGetRelid(rel);
@@ -2098,7 +2102,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
*/
if (!vacuum_is_permitted_for_relation(priv_relid,
rel->rd_rel,
- params->options & ~VACOPT_ANALYZE))
+ params.options & ~VACOPT_ANALYZE))
{
relation_close(rel, lmode);
PopActiveSnapshot();
@@ -2169,7 +2173,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
* Set index_cleanup option based on index_cleanup reloption if it wasn't
* specified in VACUUM command, or when running in an autovacuum worker
*/
- if (params->index_cleanup == VACOPTVALUE_UNSPECIFIED)
+ if (params.index_cleanup == VACOPTVALUE_UNSPECIFIED)
{
StdRdOptIndexCleanup vacuum_index_cleanup;
@@ -2180,56 +2184,74 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup;
if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO)
- params->index_cleanup = VACOPTVALUE_AUTO;
+ params.index_cleanup = VACOPTVALUE_AUTO;
else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON)
- params->index_cleanup = VACOPTVALUE_ENABLED;
+ params.index_cleanup = VACOPTVALUE_ENABLED;
else
{
Assert(vacuum_index_cleanup ==
STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF);
- params->index_cleanup = VACOPTVALUE_DISABLED;
+ params.index_cleanup = VACOPTVALUE_DISABLED;
}
}
+#ifdef USE_INJECTION_POINTS
+ if (params.index_cleanup == VACOPTVALUE_AUTO)
+ INJECTION_POINT("vacuum-index-cleanup-auto", NULL);
+ else if (params.index_cleanup == VACOPTVALUE_DISABLED)
+ INJECTION_POINT("vacuum-index-cleanup-disabled", NULL);
+ else if (params.index_cleanup == VACOPTVALUE_ENABLED)
+ INJECTION_POINT("vacuum-index-cleanup-enabled", NULL);
+#endif
+
/*
* Check if the vacuum_max_eager_freeze_failure_rate table storage
* parameter was specified. This overrides the GUC value.
*/
if (rel->rd_options != NULL &&
((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate >= 0)
- params->max_eager_freeze_failure_rate =
+ params.max_eager_freeze_failure_rate =
((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate;
/*
* Set truncate option based on truncate reloption or GUC if it wasn't
* specified in VACUUM command, or when running in an autovacuum worker
*/
- if (params->truncate == VACOPTVALUE_UNSPECIFIED)
+ if (params.truncate == VACOPTVALUE_UNSPECIFIED)
{
StdRdOptions *opts = (StdRdOptions *) rel->rd_options;
if (opts && opts->vacuum_truncate_set)
{
if (opts->vacuum_truncate)
- params->truncate = VACOPTVALUE_ENABLED;
+ params.truncate = VACOPTVALUE_ENABLED;
else
- params->truncate = VACOPTVALUE_DISABLED;
+ params.truncate = VACOPTVALUE_DISABLED;
}
else if (vacuum_truncate)
- params->truncate = VACOPTVALUE_ENABLED;
+ params.truncate = VACOPTVALUE_ENABLED;
else
- params->truncate = VACOPTVALUE_DISABLED;
+ params.truncate = VACOPTVALUE_DISABLED;
}
+#ifdef USE_INJECTION_POINTS
+ if (params.truncate == VACOPTVALUE_AUTO)
+ INJECTION_POINT("vacuum-truncate-auto", NULL);
+ else if (params.truncate == VACOPTVALUE_DISABLED)
+ INJECTION_POINT("vacuum-truncate-disabled", NULL);
+ else if (params.truncate == VACOPTVALUE_ENABLED)
+ INJECTION_POINT("vacuum-truncate-enabled", NULL);
+#endif
+
/*
* Remember the relation's TOAST relation for later, if the caller asked
* us to process it. In VACUUM FULL, though, the toast table is
* automatically rebuilt by cluster_rel so we shouldn't recurse to it,
* unless PROCESS_MAIN is disabled.
*/
- if ((params->options & VACOPT_PROCESS_TOAST) != 0 &&
- ((params->options & VACOPT_FULL) == 0 ||
- (params->options & VACOPT_PROCESS_MAIN) == 0))
+ if ((params.options & VACOPT_PROCESS_TOAST) != 0 &&
+ ((params.options & VACOPT_FULL) == 0 ||
+ (params.options & VACOPT_PROCESS_MAIN) == 0))
toast_relid = rel->rd_rel->reltoastrelid;
else
toast_relid = InvalidOid;
@@ -2252,16 +2274,16 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
* table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN
* to be set when we recurse to the TOAST table.
*/
- if (params->options & VACOPT_PROCESS_MAIN)
+ if (params.options & VACOPT_PROCESS_MAIN)
{
/*
* Do the actual work --- either FULL or "lazy" vacuum
*/
- if (params->options & VACOPT_FULL)
+ if (params.options & VACOPT_FULL)
{
ClusterParams cluster_params = {0};
- if ((params->options & VACOPT_VERBOSE) != 0)
+ if ((params.options & VACOPT_VERBOSE) != 0)
cluster_params.options |= CLUOPT_VERBOSE;
/* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
@@ -2299,19 +2321,16 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
*/
if (toast_relid != InvalidOid)
{
- VacuumParams toast_vacuum_params;
-
/*
* Force VACOPT_PROCESS_MAIN so vacuum_rel() processes it. Likewise,
* set toast_parent so that the privilege checks are done on the main
* relation. NB: This is only safe to do because we hold a session
* lock on the main relation that prevents concurrent deletion.
*/
- memcpy(&toast_vacuum_params, params, sizeof(VacuumParams));
toast_vacuum_params.options |= VACOPT_PROCESS_MAIN;
toast_vacuum_params.toast_parent = relid;
- vacuum_rel(toast_relid, NULL, &toast_vacuum_params, bstrategy);
+ vacuum_rel(toast_relid, NULL, toast_vacuum_params, bstrategy);
}
/*
diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c
index 2b9d548cdeb..0feea1d30ec 100644
--- a/src/backend/commands/vacuumparallel.c
+++ b/src/backend/commands/vacuumparallel.c
@@ -63,7 +63,7 @@ typedef struct PVShared
*/
Oid relid;
int elevel;
- uint64 queryid;
+ int64 queryid;
/*
* Fields for both index vacuum and cleanup.
diff --git a/src/backend/executor/README b/src/backend/executor/README
index 02745c23ed9..54f4782f31b 100644
--- a/src/backend/executor/README
+++ b/src/backend/executor/README
@@ -285,28 +285,6 @@ are typically reset to empty once per tuple. Per-tuple contexts are usually
associated with ExprContexts, and commonly each PlanState node has its own
ExprContext to evaluate its qual and targetlist expressions in.
-Relation Locking
-----------------
-
-When the executor initializes a plan tree for execution, it doesn't lock
-non-index relations if the plan tree is freshly generated and not derived
-from a CachedPlan. This is because such locks have already been established
-during the query's parsing, rewriting, and planning phases. However, with a
-cached plan tree, some relations may remain unlocked. The function
-AcquireExecutorLocks() only locks unprunable relations in the plan, deferring
-the locking of prunable ones to executor initialization. This avoids
-unnecessary locking of relations that will be pruned during "initial" runtime
-pruning in ExecDoInitialPruning().
-
-This approach creates a window where a cached plan tree with child tables
-could become outdated if another backend modifies these tables before
-ExecDoInitialPruning() locks them. As a result, the executor has the added duty
-to verify the plan tree's validity whenever it locks a child table after
-doing initial pruning. This validation is done by checking the CachedPlan.is_valid
-flag. If the plan tree is outdated (is_valid = false), the executor stops
-further initialization, cleans up anything in EState that would have been
-allocated up to that point, and retries execution after recreating the
-invalid plan in the CachedPlan. See ExecutorStartCachedPlan().
Query Processing Control Flow
-----------------------------
@@ -315,13 +293,11 @@ This is a sketch of control flow for full query processing:
CreateQueryDesc
- ExecutorStart or ExecutorStartCachedPlan
+ ExecutorStart
CreateExecutorState
creates per-query context
- switch to per-query context to run ExecDoInitialPruning and ExecInitNode
+ switch to per-query context to run ExecInitNode
AfterTriggerBeginQuery
- ExecDoInitialPruning
- does initial pruning and locks surviving partitions if needed
ExecInitNode --- recursively scans plan tree
ExecInitNode
recurse into subsidiary nodes
@@ -345,12 +321,7 @@ This is a sketch of control flow for full query processing:
FreeQueryDesc
-As mentioned in the "Relation Locking" section, if the plan tree is found to
-be stale after locking partitions in ExecDoInitialPruning(), the control is
-immediately returned to ExecutorStartCachedPlan(), which will create a new plan
-tree and perform the steps starting from CreateExecutorState() again.
-
-Per above comments, it's not really critical for ExecEndPlan to free any
+Per above comments, it's not really critical for ExecEndNode to free any
memory; it'll all go away in FreeExecutorState anyway. However, we do need to
be careful to close relations, drop buffer pins, etc, so we do need to scan
the plan state tree to find these sorts of resources.
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 255bd795361..b5400749353 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -144,7 +144,7 @@ execTuplesHashPrepare(int numCols,
* hashfunctions: FmgrInfos of datatype-specific hashing functions to use
* collations: collations to use in comparisons
* nbuckets: initial estimate of hashtable size
- * additionalsize: size of data stored in ->additional
+ * additionalsize: size of data that may be stored along with the hash entry
* metacxt: memory context for long-lived allocation, but not per-entry data
* tablecxt: memory context in which to store table entries
* tempcxt: short-lived context for evaluation hash and comparison functions
@@ -288,7 +288,7 @@ ResetTupleHashTable(TupleHashTable hashtable)
*
* If isnew isn't NULL, then a new entry is created if no existing entry
* matches. On return, *isnew is true if the entry is newly created,
- * false if it existed already. ->additional_data in the new entry has
+ * false if it existed already. The additional data in the new entry has
* been zeroed.
*/
TupleHashEntry
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c
index bdf862b2406..ca33a854278 100644
--- a/src/backend/executor/execIndexing.c
+++ b/src/backend/executor/execIndexing.c
@@ -279,7 +279,7 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
* executor is performing an UPDATE that could not use an
* optimization like heapam's HOT (in more general terms a
* call to table_tuple_update() took place and set
- * 'update_indexes' to TUUI_All). Receiving this hint makes
+ * 'update_indexes' to TU_All). Receiving this hint makes
* us consider if we should pass down the 'indexUnchanged'
* hint in turn. That's something that we figure out for
* each index_insert() call iff 'update' is true.
@@ -290,7 +290,7 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo)
* HOT has been applied and any updated columns are indexed
* only by summarizing indexes (or in more general terms a
* call to table_tuple_update() took place and set
- * 'update_indexes' to TUUI_Summarizing). We can (and must)
+ * 'update_indexes' to TU_Summarizing). We can (and must)
* therefore only update the indexes that have
* 'amsummarizing' = true.
*
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index 7230f968101..0391798dd2c 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -55,13 +55,11 @@
#include "parser/parse_relation.h"
#include "pgstat.h"
#include "rewrite/rewriteHandler.h"
-#include "storage/lmgr.h"
#include "tcop/utility.h"
#include "utils/acl.h"
#include "utils/backend_status.h"
#include "utils/lsyscache.h"
#include "utils/partcache.h"
-#include "utils/plancache.h"
#include "utils/rls.h"
#include "utils/snapmgr.h"
@@ -119,16 +117,11 @@ static void ReportNotNullViolationError(ResultRelInfo *resultRelInfo,
* get control when ExecutorStart is called. Such a plugin would
* normally call standard_ExecutorStart().
*
- * Return value indicates if the plan has been initialized successfully so
- * that queryDesc->planstate contains a valid PlanState tree. It may not
- * if the plan got invalidated during InitPlan().
* ----------------------------------------------------------------
*/
-bool
+void
ExecutorStart(QueryDesc *queryDesc, int eflags)
{
- bool plan_valid;
-
/*
* In some cases (e.g. an EXECUTE statement or an execute message with the
* extended query protocol) the query_id won't be reported, so do it now.
@@ -140,14 +133,12 @@ ExecutorStart(QueryDesc *queryDesc, int eflags)
pgstat_report_query_id(queryDesc->plannedstmt->queryId, false);
if (ExecutorStart_hook)
- plan_valid = (*ExecutorStart_hook) (queryDesc, eflags);
+ (*ExecutorStart_hook) (queryDesc, eflags);
else
- plan_valid = standard_ExecutorStart(queryDesc, eflags);
-
- return plan_valid;
+ standard_ExecutorStart(queryDesc, eflags);
}
-bool
+void
standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
{
EState *estate;
@@ -271,64 +262,6 @@ standard_ExecutorStart(QueryDesc *queryDesc, int eflags)
InitPlan(queryDesc, eflags);
MemoryContextSwitchTo(oldcontext);
-
- return ExecPlanStillValid(queryDesc->estate);
-}
-
-/*
- * ExecutorStartCachedPlan
- * Start execution for a given query in the CachedPlanSource, replanning
- * if the plan is invalidated due to deferred locks taken during the
- * plan's initialization
- *
- * This function handles cases where the CachedPlan given in queryDesc->cplan
- * might become invalid during the initialization of the plan given in
- * queryDesc->plannedstmt, particularly when prunable relations in it are
- * locked after performing initial pruning. If the locks invalidate the plan,
- * the function calls UpdateCachedPlan() to replan all queries in the
- * CachedPlan, and then retries initialization.
- *
- * The function repeats the process until ExecutorStart() successfully
- * initializes the plan, that is without the CachedPlan becoming invalid.
- */
-void
-ExecutorStartCachedPlan(QueryDesc *queryDesc, int eflags,
- CachedPlanSource *plansource,
- int query_index)
-{
- if (unlikely(queryDesc->cplan == NULL))
- elog(ERROR, "ExecutorStartCachedPlan(): missing CachedPlan");
- if (unlikely(plansource == NULL))
- elog(ERROR, "ExecutorStartCachedPlan(): missing CachedPlanSource");
-
- /*
- * Loop and retry with an updated plan until no further invalidation
- * occurs.
- */
- while (1)
- {
- if (!ExecutorStart(queryDesc, eflags))
- {
- /*
- * Clean up the current execution state before creating the new
- * plan to retry ExecutorStart(). Mark execution as aborted to
- * ensure that AFTER trigger state is properly reset.
- */
- queryDesc->estate->es_aborted = true;
- ExecutorEnd(queryDesc);
-
- /* Retry ExecutorStart() with an updated plan tree. */
- queryDesc->plannedstmt = UpdateCachedPlan(plansource, query_index,
- queryDesc->queryEnv);
- }
- else
-
- /*
- * Exit the loop if the plan is initialized successfully and no
- * sinval messages were received that invalidated the CachedPlan.
- */
- break;
- }
}
/* ----------------------------------------------------------------
@@ -387,7 +320,6 @@ standard_ExecutorRun(QueryDesc *queryDesc,
estate = queryDesc->estate;
Assert(estate != NULL);
- Assert(!estate->es_aborted);
Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
/* caller must ensure the query's snapshot is active */
@@ -494,11 +426,8 @@ standard_ExecutorFinish(QueryDesc *queryDesc)
Assert(estate != NULL);
Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
- /*
- * This should be run once and only once per Executor instance and never
- * if the execution was aborted.
- */
- Assert(!estate->es_finished && !estate->es_aborted);
+ /* This should be run once and only once per Executor instance */
+ Assert(!estate->es_finished);
/* Switch into per-query memory context */
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
@@ -561,10 +490,11 @@ standard_ExecutorEnd(QueryDesc *queryDesc)
(PgStat_Counter) estate->es_parallel_workers_launched);
/*
- * Check that ExecutorFinish was called, unless in EXPLAIN-only mode or if
- * execution was aborted.
+ * Check that ExecutorFinish was called, unless in EXPLAIN-only mode. This
+ * Assert is needed because ExecutorFinish is new as of 9.1, and callers
+ * might forget to call it.
*/
- Assert(estate->es_finished || estate->es_aborted ||
+ Assert(estate->es_finished ||
(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY));
/*
@@ -579,14 +509,6 @@ standard_ExecutorEnd(QueryDesc *queryDesc)
UnregisterSnapshot(estate->es_crosscheck_snapshot);
/*
- * Reset AFTER trigger module if the query execution was aborted.
- */
- if (estate->es_aborted &&
- !(estate->es_top_eflags &
- (EXEC_FLAG_SKIP_TRIGGERS | EXEC_FLAG_EXPLAIN_ONLY)))
- AfterTriggerAbortQuery();
-
- /*
* Must switch out of context before destroying it
*/
MemoryContextSwitchTo(oldcontext);
@@ -684,21 +606,6 @@ ExecCheckPermissions(List *rangeTable, List *rteperminfos,
(rte->rtekind == RTE_SUBQUERY &&
rte->relkind == RELKIND_VIEW));
- /*
- * Ensure that we have at least an AccessShareLock on relations
- * whose permissions need to be checked.
- *
- * Skip this check in a parallel worker because locks won't be
- * taken until ExecInitNode() performs plan initialization.
- *
- * XXX: ExecCheckPermissions() in a parallel worker may be
- * redundant with the checks done in the leader process, so this
- * should be reviewed to ensure it’s necessary.
- */
- Assert(IsParallelWorker() ||
- CheckRelationOidLockedByMe(rte->relid, AccessShareLock,
- true));
-
(void) getRTEPermissionInfo(rteperminfos, rte);
/* Many-to-one mapping not allowed */
Assert(!bms_is_member(rte->perminfoindex, indexset));
@@ -924,12 +831,6 @@ ExecCheckXactReadOnly(PlannedStmt *plannedstmt)
*
* Initializes the query plan: open files, allocate storage
* and start up the rule manager
- *
- * If the plan originates from a CachedPlan (given in queryDesc->cplan),
- * it can become invalid during runtime "initial" pruning when the
- * remaining set of locks is taken. The function returns early in that
- * case without initializing the plan, and the caller is expected to
- * retry with a new valid plan.
* ----------------------------------------------------------------
*/
static void
@@ -937,7 +838,6 @@ InitPlan(QueryDesc *queryDesc, int eflags)
{
CmdType operation = queryDesc->operation;
PlannedStmt *plannedstmt = queryDesc->plannedstmt;
- CachedPlan *cachedplan = queryDesc->cplan;
Plan *plan = plannedstmt->planTree;
List *rangeTable = plannedstmt->rtable;
EState *estate = queryDesc->estate;
@@ -958,7 +858,6 @@ InitPlan(QueryDesc *queryDesc, int eflags)
bms_copy(plannedstmt->unprunableRelids));
estate->es_plannedstmt = plannedstmt;
- estate->es_cachedplan = cachedplan;
estate->es_part_prune_infos = plannedstmt->partPruneInfos;
/*
@@ -972,9 +871,6 @@ InitPlan(QueryDesc *queryDesc, int eflags)
*/
ExecDoInitialPruning(estate);
- if (!ExecPlanStillValid(estate))
- return;
-
/*
* Next, build the ExecRowMark array from the PlanRowMark(s), if any.
*/
@@ -3092,9 +2988,6 @@ EvalPlanQualStart(EPQState *epqstate, Plan *planTree)
* the snapshot, rangetable, and external Param info. They need their own
* copies of local state, including a tuple table, es_param_exec_vals,
* result-rel info, etc.
- *
- * es_cachedplan is not copied because EPQ plan execution does not acquire
- * any new locks that could invalidate the CachedPlan.
*/
rcestate->es_direction = ForwardScanDirection;
rcestate->es_snapshot = parentestate->es_snapshot;
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index 39c990ae638..f3e77bda279 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -1278,15 +1278,8 @@ ExecParallelGetQueryDesc(shm_toc *toc, DestReceiver *receiver,
paramspace = shm_toc_lookup(toc, PARALLEL_KEY_PARAMLISTINFO, false);
paramLI = RestoreParamList(&paramspace);
- /*
- * Create a QueryDesc for the query. We pass NULL for cachedplan, because
- * we don't have a pointer to the CachedPlan in the leader's process. It's
- * fine because the only reason the executor needs to see it is to decide
- * if it should take locks on certain relations, but parallel workers
- * always take locks anyway.
- */
+ /* Create a QueryDesc for the query. */
return CreateQueryDesc(pstmt,
- NULL,
queryString,
GetActiveSnapshot(), InvalidSnapshot,
receiver, paramLI, NULL, instrument_options);
@@ -1471,8 +1464,7 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
/* Start up the executor */
queryDesc->plannedstmt->jitFlags = fpes->jit_flags;
- if (!ExecutorStart(queryDesc, fpes->eflags))
- elog(ERROR, "ExecutorStart() failed unexpectedly");
+ ExecutorStart(queryDesc, fpes->eflags);
/* Special executor initialization steps for parallel workers */
queryDesc->planstate->state->es_query_dsa = area;
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index 3f8a4cb5244..514eae1037d 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -26,7 +26,6 @@
#include "partitioning/partdesc.h"
#include "partitioning/partprune.h"
#include "rewrite/rewriteManip.h"
-#include "storage/lmgr.h"
#include "utils/acl.h"
#include "utils/lsyscache.h"
#include "utils/partcache.h"
@@ -1771,8 +1770,7 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
* ExecDoInitialPruning:
* Perform runtime "initial" pruning, if necessary, to determine the set
* of child subnodes that need to be initialized during ExecInitNode() for
- * all plan nodes that contain a PartitionPruneInfo. This also locks the
- * leaf partitions whose subnodes will be initialized if needed.
+ * all plan nodes that contain a PartitionPruneInfo.
*
* ExecInitPartitionExecPruning:
* Updates the PartitionPruneState found at given part_prune_index in
@@ -1798,8 +1796,7 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
* ExecDoInitialPruning
* Perform runtime "initial" pruning, if necessary, to determine the set
* of child subnodes that need to be initialized during ExecInitNode() for
- * plan nodes that support partition pruning. This also locks the leaf
- * partitions whose subnodes will be initialized if needed.
+ * plan nodes that support partition pruning.
*
* This function iterates over each PartitionPruneInfo entry in
* estate->es_part_prune_infos. For each entry, it creates a PartitionPruneState
@@ -1821,9 +1818,7 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap)
void
ExecDoInitialPruning(EState *estate)
{
- PlannedStmt *stmt = estate->es_plannedstmt;
ListCell *lc;
- List *locked_relids = NIL;
foreach(lc, estate->es_part_prune_infos)
{
@@ -1849,68 +1844,11 @@ ExecDoInitialPruning(EState *estate)
else
validsubplan_rtis = all_leafpart_rtis;
- if (ExecShouldLockRelations(estate))
- {
- int rtindex = -1;
-
- while ((rtindex = bms_next_member(validsubplan_rtis,
- rtindex)) >= 0)
- {
- RangeTblEntry *rte = exec_rt_fetch(rtindex, estate);
-
- Assert(rte->rtekind == RTE_RELATION &&
- rte->rellockmode != NoLock);
- LockRelationOid(rte->relid, rte->rellockmode);
- locked_relids = lappend_int(locked_relids, rtindex);
- }
- }
estate->es_unpruned_relids = bms_add_members(estate->es_unpruned_relids,
validsubplan_rtis);
estate->es_part_prune_results = lappend(estate->es_part_prune_results,
validsubplans);
}
-
- /*
- * Lock the first result relation of each ModifyTable node, even if it was
- * pruned. This is required for ExecInitModifyTable(), which keeps its
- * first result relation if all other result relations have been pruned,
- * because some executor paths (e.g., in nodeModifyTable.c and
- * execPartition.c) rely on there being at least one result relation.
- *
- * There's room for improvement here --- we actually only need to do this
- * if all other result relations of the ModifyTable node were pruned, but
- * we don't have an easy way to tell that here.
- */
- if (stmt->resultRelations && ExecShouldLockRelations(estate))
- {
- foreach(lc, stmt->firstResultRels)
- {
- Index firstResultRel = lfirst_int(lc);
-
- if (!bms_is_member(firstResultRel, estate->es_unpruned_relids))
- {
- RangeTblEntry *rte = exec_rt_fetch(firstResultRel, estate);
-
- Assert(rte->rtekind == RTE_RELATION && rte->rellockmode != NoLock);
- LockRelationOid(rte->relid, rte->rellockmode);
- locked_relids = lappend_int(locked_relids, firstResultRel);
- }
- }
- }
-
- /*
- * Release the useless locks if the plan won't be executed. This is the
- * same as what CheckCachedPlan() in plancache.c does.
- */
- if (!ExecPlanStillValid(estate))
- {
- foreach(lc, locked_relids)
- {
- RangeTblEntry *rte = exec_rt_fetch(lfirst_int(lc), estate);
-
- UnlockRelationOid(rte->relid, rte->rellockmode);
- }
- }
}
/*
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index 772c86e70e9..fdc65c2b42b 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -147,7 +147,6 @@ CreateExecutorState(void)
estate->es_top_eflags = 0;
estate->es_instrument = 0;
estate->es_finished = false;
- estate->es_aborted = false;
estate->es_exprcontexts = NIL;
diff --git a/src/backend/executor/functions.c b/src/backend/executor/functions.c
index 8d4d062d579..359aafea681 100644
--- a/src/backend/executor/functions.c
+++ b/src/backend/executor/functions.c
@@ -34,6 +34,7 @@
#include "utils/funccache.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
+#include "utils/plancache.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
@@ -1338,7 +1339,6 @@ postquel_start(execution_state *es, SQLFunctionCachePtr fcache)
dest = None_Receiver;
es->qd = CreateQueryDesc(es->stmt,
- NULL,
fcache->func->src,
GetActiveSnapshot(),
InvalidSnapshot,
@@ -1363,8 +1363,7 @@ postquel_start(execution_state *es, SQLFunctionCachePtr fcache)
eflags = EXEC_FLAG_SKIP_TRIGGERS;
else
eflags = 0; /* default run-to-completion flags */
- if (!ExecutorStart(es->qd, eflags))
- elog(ERROR, "ExecutorStart() failed unexpectedly");
+ ExecutorStart(es->qd, eflags);
}
es->status = F_EXEC_RUN;
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 46d533b7288..54da8e7995b 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -64,6 +64,7 @@
#include "nodes/nodeFuncs.h"
#include "optimizer/optimizer.h"
#include "rewrite/rewriteHandler.h"
+#include "rewrite/rewriteManip.h"
#include "storage/lmgr.h"
#include "utils/builtins.h"
#include "utils/datum.h"
@@ -3735,6 +3736,7 @@ ExecInitMerge(ModifyTableState *mtstate, EState *estate)
switch (action->commandType)
{
case CMD_INSERT:
+ /* INSERT actions always use rootRelInfo */
ExecCheckPlanOutput(rootRelInfo->ri_RelationDesc,
action->targetList);
@@ -3774,9 +3776,23 @@ ExecInitMerge(ModifyTableState *mtstate, EState *estate)
}
else
{
- /* not partitioned? use the stock relation and slot */
- tgtslot = resultRelInfo->ri_newTupleSlot;
- tgtdesc = RelationGetDescr(resultRelInfo->ri_RelationDesc);
+ /*
+ * If the MERGE targets an inherited table, we insert
+ * into the root table, so we must initialize its
+ * "new" tuple slot, if not already done, and use its
+ * relation descriptor for the projection.
+ *
+ * For non-inherited tables, rootRelInfo and
+ * resultRelInfo are the same, and the "new" tuple
+ * slot will already have been initialized.
+ */
+ if (rootRelInfo->ri_newTupleSlot == NULL)
+ rootRelInfo->ri_newTupleSlot =
+ table_slot_create(rootRelInfo->ri_RelationDesc,
+ &estate->es_tupleTable);
+
+ tgtslot = rootRelInfo->ri_newTupleSlot;
+ tgtdesc = RelationGetDescr(rootRelInfo->ri_RelationDesc);
}
action_state->mas_proj =
@@ -3809,6 +3825,114 @@ ExecInitMerge(ModifyTableState *mtstate, EState *estate)
}
}
}
+
+ /*
+ * If the MERGE targets an inherited table, any INSERT actions will use
+ * rootRelInfo, and rootRelInfo will not be in the resultRelInfo array.
+ * Therefore we must initialize its WITH CHECK OPTION constraints and
+ * RETURNING projection, as ExecInitModifyTable did for the resultRelInfo
+ * entries.
+ *
+ * Note that the planner does not build a withCheckOptionList or
+ * returningList for the root relation, but as in ExecInitPartitionInfo,
+ * we can use the first resultRelInfo entry as a reference to calculate
+ * the attno's for the root table.
+ */
+ if (rootRelInfo != mtstate->resultRelInfo &&
+ rootRelInfo->ri_RelationDesc->rd_rel->relkind != RELKIND_PARTITIONED_TABLE &&
+ (mtstate->mt_merge_subcommands & MERGE_INSERT) != 0)
+ {
+ ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
+ Relation rootRelation = rootRelInfo->ri_RelationDesc;
+ Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
+ int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
+ AttrMap *part_attmap = NULL;
+ bool found_whole_row;
+
+ if (node->withCheckOptionLists != NIL)
+ {
+ List *wcoList;
+ List *wcoExprs = NIL;
+
+ /* There should be as many WCO lists as result rels */
+ Assert(list_length(node->withCheckOptionLists) ==
+ list_length(node->resultRelations));
+
+ /*
+ * Use the first WCO list as a reference. In the most common case,
+ * this will be for the same relation as rootRelInfo, and so there
+ * will be no need to adjust its attno's.
+ */
+ wcoList = linitial(node->withCheckOptionLists);
+ if (rootRelation != firstResultRel)
+ {
+ /* Convert any Vars in it to contain the root's attno's */
+ part_attmap =
+ build_attrmap_by_name(RelationGetDescr(rootRelation),
+ RelationGetDescr(firstResultRel),
+ false);
+
+ wcoList = (List *)
+ map_variable_attnos((Node *) wcoList,
+ firstVarno, 0,
+ part_attmap,
+ RelationGetForm(rootRelation)->reltype,
+ &found_whole_row);
+ }
+
+ foreach(lc, wcoList)
+ {
+ WithCheckOption *wco = lfirst_node(WithCheckOption, lc);
+ ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual),
+ &mtstate->ps);
+
+ wcoExprs = lappend(wcoExprs, wcoExpr);
+ }
+
+ rootRelInfo->ri_WithCheckOptions = wcoList;
+ rootRelInfo->ri_WithCheckOptionExprs = wcoExprs;
+ }
+
+ if (node->returningLists != NIL)
+ {
+ List *returningList;
+
+ /* There should be as many returning lists as result rels */
+ Assert(list_length(node->returningLists) ==
+ list_length(node->resultRelations));
+
+ /*
+ * Use the first returning list as a reference. In the most common
+ * case, this will be for the same relation as rootRelInfo, and so
+ * there will be no need to adjust its attno's.
+ */
+ returningList = linitial(node->returningLists);
+ if (rootRelation != firstResultRel)
+ {
+ /* Convert any Vars in it to contain the root's attno's */
+ if (part_attmap == NULL)
+ part_attmap =
+ build_attrmap_by_name(RelationGetDescr(rootRelation),
+ RelationGetDescr(firstResultRel),
+ false);
+
+ returningList = (List *)
+ map_variable_attnos((Node *) returningList,
+ firstVarno, 0,
+ part_attmap,
+ RelationGetForm(rootRelation)->reltype,
+ &found_whole_row);
+ }
+ rootRelInfo->ri_returningList = returningList;
+
+ /* Initialize the RETURNING projection */
+ rootRelInfo->ri_projectReturning =
+ ExecBuildProjectionInfo(returningList, econtext,
+ mtstate->ps.ps_ResultTupleSlot,
+ &mtstate->ps,
+ RelationGetDescr(rootRelation));
+ }
+ }
}
/*
@@ -4830,12 +4954,11 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
ExprContext *econtext;
/*
- * Initialize result tuple slot and assign its rowtype using the first
- * RETURNING list. We assume the rest will look the same.
+ * Initialize result tuple slot and assign its rowtype using the plan
+ * node's declared targetlist, which the planner set up to be the same
+ * as the first (before runtime pruning) RETURNING list. We assume
+ * all the result rels will produce compatible output.
*/
- mtstate->ps.plan->targetlist = (List *) linitial(returningLists);
-
- /* Set up a slot for the output of the RETURNING projection(s) */
ExecInitResultTupleSlotTL(&mtstate->ps, &TTSOpsVirtual);
slot = mtstate->ps.ps_ResultTupleSlot;
@@ -4865,7 +4988,6 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
* We still must construct a dummy result tuple type, because InitPlan
* expects one (maybe should change that?).
*/
- mtstate->ps.plan->targetlist = NIL;
ExecInitResultTypeTL(&mtstate->ps);
mtstate->ps.ps_ExprContext = NULL;
diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c
index ab2eab9596e..26f7420b64b 100644
--- a/src/backend/executor/nodeTidrangescan.c
+++ b/src/backend/executor/nodeTidrangescan.c
@@ -128,9 +128,11 @@ TidExprListCreate(TidRangeScanState *tidrangestate)
* TidRangeEval
*
* Compute and set node's block and offset range to scan by evaluating
- * the trss_tidexprs. Returns false if we detect the range cannot
+ * node->trss_tidexprs. Returns false if we detect the range cannot
* contain any tuples. Returns true if it's possible for the range to
- * contain tuples.
+ * contain tuples. We don't bother validating that trss_mintid is less
+ * than or equal to trss_maxtid, as the scan_set_tidrange() table AM
+ * function will handle that.
* ----------------------------------------------------------------
*/
static bool
diff --git a/src/backend/executor/spi.c b/src/backend/executor/spi.c
index 3288396def3..ecb2e4ccaa1 100644
--- a/src/backend/executor/spi.c
+++ b/src/backend/executor/spi.c
@@ -70,8 +70,7 @@ static int _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options,
static ParamListInfo _SPI_convert_params(int nargs, Oid *argtypes,
Datum *Values, const char *Nulls);
-static int _SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount,
- CachedPlanSource *plansource, int query_index);
+static int _SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount);
static void _SPI_error_callback(void *arg);
@@ -1686,8 +1685,7 @@ SPI_cursor_open_internal(const char *name, SPIPlanPtr plan,
query_string,
plansource->commandTag,
stmt_list,
- cplan,
- plansource);
+ cplan);
/*
* Set up options for portal. Default SCROLL type is chosen the same way
@@ -2502,7 +2500,6 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options,
CachedPlanSource *plansource = (CachedPlanSource *) lfirst(lc1);
List *stmt_list;
ListCell *lc2;
- int query_index = 0;
spicallbackarg.query = plansource->query_string;
@@ -2693,16 +2690,14 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options,
snap = InvalidSnapshot;
qdesc = CreateQueryDesc(stmt,
- cplan,
plansource->query_string,
snap, crosscheck_snapshot,
dest,
options->params,
_SPI_current->queryEnv,
0);
-
- res = _SPI_pquery(qdesc, fire_triggers, canSetTag ? options->tcount : 0,
- plansource, query_index);
+ res = _SPI_pquery(qdesc, fire_triggers,
+ canSetTag ? options->tcount : 0);
FreeQueryDesc(qdesc);
}
else
@@ -2799,8 +2794,6 @@ _SPI_execute_plan(SPIPlanPtr plan, const SPIExecuteOptions *options,
my_res = res;
goto fail;
}
-
- query_index++;
}
/* Done with this plan, so release refcount */
@@ -2878,8 +2871,7 @@ _SPI_convert_params(int nargs, Oid *argtypes,
}
static int
-_SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount,
- CachedPlanSource *plansource, int query_index)
+_SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount)
{
int operation = queryDesc->operation;
int eflags;
@@ -2935,16 +2927,7 @@ _SPI_pquery(QueryDesc *queryDesc, bool fire_triggers, uint64 tcount,
else
eflags = EXEC_FLAG_SKIP_TRIGGERS;
- if (queryDesc->cplan)
- {
- ExecutorStartCachedPlan(queryDesc, eflags, plansource, query_index);
- Assert(queryDesc->planstate);
- }
- else
- {
- if (!ExecutorStart(queryDesc, eflags))
- elog(ERROR, "ExecutorStart() failed unexpectedly");
- }
+ ExecutorStart(queryDesc, eflags);
ExecutorRun(queryDesc, ForwardScanDirection, tcount);
diff --git a/src/backend/jit/README b/src/backend/jit/README
index 5427bdf2153..a40950dfb03 100644
--- a/src/backend/jit/README
+++ b/src/backend/jit/README
@@ -205,7 +205,7 @@ The ability to do so allows us to get the LLVM IR for all operators
bitcode files get installed into the server's
$pkglibdir/bitcode/postgres/
Using existing LLVM functionality (for parallel LTO compilation),
-additionally an index is over these is stored to
+additionally an index over these is stored to
$pkglibdir/bitcode/postgres.index.bc
Similarly extensions can install code into
diff --git a/src/backend/jit/llvm/meson.build b/src/backend/jit/llvm/meson.build
index c8e06dfbe35..805fbd69006 100644
--- a/src/backend/jit/llvm/meson.build
+++ b/src/backend/jit/llvm/meson.build
@@ -53,7 +53,7 @@ llvm_irgen_args = [
if ccache.found()
llvm_irgen_command = ccache
- llvm_irgen_args = [clang.path()] + llvm_irgen_args
+ llvm_irgen_args = [clang.full_path()] + llvm_irgen_args
else
llvm_irgen_command = clang
endif
diff --git a/src/backend/lib/README b/src/backend/lib/README
index f2fb591237d..c28cbe356f0 100644
--- a/src/backend/lib/README
+++ b/src/backend/lib/README
@@ -1,8 +1,6 @@
This directory contains a general purpose data structures, for use anywhere
in the backend:
-binaryheap.c - a binary heap
-
bipartite_match.c - Hopcroft-Karp maximum cardinality algorithm for bipartite graphs
bloomfilter.c - probabilistic, space-efficient set membership testing
@@ -21,8 +19,6 @@ pairingheap.c - a pairing heap
rbtree.c - a red-black tree
-stringinfo.c - an extensible string type
-
Aside from the inherent characteristics of the data structures, there are a
few practical differences between the binary heap and the pairing heap. The
diff --git a/src/backend/libpq/be-secure-gssapi.c b/src/backend/libpq/be-secure-gssapi.c
index 717ba9824f9..5d98c58ffa8 100644
--- a/src/backend/libpq/be-secure-gssapi.c
+++ b/src/backend/libpq/be-secure-gssapi.c
@@ -46,11 +46,18 @@
* don't want the other side to send arbitrarily huge packets as we
* would have to allocate memory for them to then pass them to GSSAPI.
*
- * Therefore, these two #define's are effectively part of the protocol
+ * Therefore, this #define is effectively part of the protocol
* spec and can't ever be changed.
*/
-#define PQ_GSS_SEND_BUFFER_SIZE 16384
-#define PQ_GSS_RECV_BUFFER_SIZE 16384
+#define PQ_GSS_MAX_PACKET_SIZE 16384 /* includes uint32 header word */
+
+/*
+ * However, during the authentication exchange we must cope with whatever
+ * message size the GSSAPI library wants to send (because our protocol
+ * doesn't support splitting those messages). Depending on configuration
+ * those messages might be as much as 64kB.
+ */
+#define PQ_GSS_AUTH_BUFFER_SIZE 65536 /* includes uint32 header word */
/*
* Since we manage at most one GSS-encrypted connection per backend,
@@ -114,9 +121,9 @@ be_gssapi_write(Port *port, const void *ptr, size_t len)
* again, so if it offers a len less than that, something is wrong.
*
* Note: it may seem attractive to report partial write completion once
- * we've successfully sent any encrypted packets. However, that can cause
- * problems for callers; notably, pqPutMsgEnd's heuristic to send only
- * full 8K blocks interacts badly with such a hack. We won't save much,
+ * we've successfully sent any encrypted packets. However, doing that
+ * expands the state space of this processing and has been responsible for
+ * bugs in the past (cf. commit d053a879b). We won't save much,
* typically, by letting callers discard data early, so don't risk it.
*/
if (len < PqGSSSendConsumed)
@@ -210,12 +217,12 @@ be_gssapi_write(Port *port, const void *ptr, size_t len)
errno = ECONNRESET;
return -1;
}
- if (output.length > PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32))
+ if (output.length > PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32))
{
ereport(COMMERROR,
(errmsg("server tried to send oversize GSSAPI packet (%zu > %zu)",
(size_t) output.length,
- PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32))));
+ PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32))));
errno = ECONNRESET;
return -1;
}
@@ -346,12 +353,12 @@ be_gssapi_read(Port *port, void *ptr, size_t len)
/* Decode the packet length and check for overlength packet */
input.length = pg_ntoh32(*(uint32 *) PqGSSRecvBuffer);
- if (input.length > PQ_GSS_RECV_BUFFER_SIZE - sizeof(uint32))
+ if (input.length > PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32))
{
ereport(COMMERROR,
(errmsg("oversize GSSAPI packet sent by the client (%zu > %zu)",
(size_t) input.length,
- PQ_GSS_RECV_BUFFER_SIZE - sizeof(uint32))));
+ PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32))));
errno = ECONNRESET;
return -1;
}
@@ -517,10 +524,13 @@ secure_open_gssapi(Port *port)
* that will never use them, and we ensure that the buffers are
* sufficiently aligned for the length-word accesses that we do in some
* places in this file.
+ *
+ * We'll use PQ_GSS_AUTH_BUFFER_SIZE-sized buffers until transport
+ * negotiation is complete, then switch to PQ_GSS_MAX_PACKET_SIZE.
*/
- PqGSSSendBuffer = malloc(PQ_GSS_SEND_BUFFER_SIZE);
- PqGSSRecvBuffer = malloc(PQ_GSS_RECV_BUFFER_SIZE);
- PqGSSResultBuffer = malloc(PQ_GSS_RECV_BUFFER_SIZE);
+ PqGSSSendBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE);
+ PqGSSRecvBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE);
+ PqGSSResultBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE);
if (!PqGSSSendBuffer || !PqGSSRecvBuffer || !PqGSSResultBuffer)
ereport(FATAL,
(errcode(ERRCODE_OUT_OF_MEMORY),
@@ -568,16 +578,16 @@ secure_open_gssapi(Port *port)
/*
* During initialization, packets are always fully consumed and
- * shouldn't ever be over PQ_GSS_RECV_BUFFER_SIZE in length.
+ * shouldn't ever be over PQ_GSS_AUTH_BUFFER_SIZE in total length.
*
* Verify on our side that the client doesn't do something funny.
*/
- if (input.length > PQ_GSS_RECV_BUFFER_SIZE)
+ if (input.length > PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32))
{
ereport(COMMERROR,
- (errmsg("oversize GSSAPI packet sent by the client (%zu > %d)",
+ (errmsg("oversize GSSAPI packet sent by the client (%zu > %zu)",
(size_t) input.length,
- PQ_GSS_RECV_BUFFER_SIZE)));
+ PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32))));
return -1;
}
@@ -631,12 +641,12 @@ secure_open_gssapi(Port *port)
{
uint32 netlen = pg_hton32(output.length);
- if (output.length > PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32))
+ if (output.length > PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32))
{
ereport(COMMERROR,
(errmsg("server tried to send oversize GSSAPI packet (%zu > %zu)",
(size_t) output.length,
- PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32))));
+ PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32))));
gss_release_buffer(&minor, &output);
return -1;
}
@@ -692,11 +702,28 @@ secure_open_gssapi(Port *port)
}
/*
+ * Release the large authentication buffers and allocate the ones we want
+ * for normal operation.
+ */
+ free(PqGSSSendBuffer);
+ free(PqGSSRecvBuffer);
+ free(PqGSSResultBuffer);
+ PqGSSSendBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE);
+ PqGSSRecvBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE);
+ PqGSSResultBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE);
+ if (!PqGSSSendBuffer || !PqGSSRecvBuffer || !PqGSSResultBuffer)
+ ereport(FATAL,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+ PqGSSSendLength = PqGSSSendNext = PqGSSSendConsumed = 0;
+ PqGSSRecvLength = PqGSSResultLength = PqGSSResultNext = 0;
+
+ /*
* Determine the max packet size which will fit in our buffer, after
* accounting for the length. be_gssapi_write will need this.
*/
major = gss_wrap_size_limit(&minor, port->gss->ctx, 1, GSS_C_QOP_DEFAULT,
- PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32),
+ PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32),
&PqGSSMaxPktSize);
if (GSS_ERROR(major))
diff --git a/src/backend/libpq/be-secure-openssl.c b/src/backend/libpq/be-secure-openssl.c
index 64ff3ce3d6a..c8b63ef8249 100644
--- a/src/backend/libpq/be-secure-openssl.c
+++ b/src/backend/libpq/be-secure-openssl.c
@@ -1436,10 +1436,10 @@ initialize_ecdh(SSL_CTX *context, bool isServerStart)
*/
ereport(isServerStart ? FATAL : LOG,
errcode(ERRCODE_CONFIG_FILE_ERROR),
- errmsg("failed to set group names specified in ssl_groups: %s",
+ errmsg("could not set group names specified in ssl_groups: %s",
SSLerrmessageExt(ERR_get_error(),
_("No valid groups found"))),
- errhint("Ensure that each group name is spelled correctly and supported by the installed version of OpenSSL"));
+ errhint("Ensure that each group name is spelled correctly and supported by the installed version of OpenSSL."));
return false;
}
#endif
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 77659b0f760..9ecddb14231 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -1039,6 +1039,11 @@ _read${n}(void)
print $off "\tWRITE_UINT_FIELD($f);\n";
print $rff "\tREAD_UINT_FIELD($f);\n" unless $no_read;
}
+ elsif ($t eq 'int64')
+ {
+ print $off "\tWRITE_INT64_FIELD($f);\n";
+ print $rff "\tREAD_INT64_FIELD($f);\n" unless $no_read;
+ }
elsif ($t eq 'uint64'
|| $t eq 'AclMode')
{
@@ -1324,7 +1329,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
# Node type. Squash constants if requested.
if ($query_jumble_squash)
{
- print $jff "\tJUMBLE_ELEMENTS($f);\n"
+ print $jff "\tJUMBLE_ELEMENTS($f, node);\n"
unless $query_jumble_ignore;
}
else
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index ceac3fd8620..eaf391fc2ab 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -51,6 +51,12 @@ static void outDouble(StringInfo str, double d);
#define WRITE_UINT_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " %u", node->fldname)
+/* Write a signed integer field (anything written with INT64_FORMAT) */
+#define WRITE_INT64_FIELD(fldname) \
+ appendStringInfo(str, \
+ " :" CppAsString(fldname) " " INT64_FORMAT, \
+ node->fldname)
+
/* Write an unsigned integer field (anything written with UINT64_FORMAT) */
#define WRITE_UINT64_FIELD(fldname) \
appendStringInfo(str, " :" CppAsString(fldname) " " UINT64_FORMAT, \
@@ -647,6 +653,8 @@ _outA_Expr(StringInfo str, const A_Expr *node)
WRITE_NODE_FIELD(lexpr);
WRITE_NODE_FIELD(rexpr);
+ WRITE_LOCATION_FIELD(rexpr_list_start);
+ WRITE_LOCATION_FIELD(rexpr_list_end);
WRITE_LOCATION_FIELD(location);
}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index d1e82a63f09..31f97151977 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -21,6 +21,11 @@
* tree(s) generated from the query. The executor can then use this value
* to blame query costs on the proper queryId.
*
+ * Arrays of two or more constants and PARAM_EXTERN parameters are "squashed"
+ * and contribute only once to the jumble. This has the effect that queries
+ * that differ only on the length of such lists have the same queryId.
+ *
+ *
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
@@ -56,16 +61,18 @@ int compute_query_id = COMPUTE_QUERY_ID_AUTO;
bool query_id_enabled = false;
static JumbleState *InitJumble(void);
-static uint64 DoJumble(JumbleState *jstate, Node *node);
+static int64 DoJumble(JumbleState *jstate, Node *node);
static void AppendJumble(JumbleState *jstate,
const unsigned char *value, Size size);
static void FlushPendingNulls(JumbleState *jstate);
static void RecordConstLocation(JumbleState *jstate,
- int location, bool squashed);
+ bool extern_param,
+ int location, int len);
static void _jumbleNode(JumbleState *jstate, Node *node);
-static void _jumbleElements(JumbleState *jstate, List *elements);
-static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleList(JumbleState *jstate, Node *node);
+static void _jumbleElements(JumbleState *jstate, List *elements, Node *node);
+static void _jumbleParam(JumbleState *jstate, Node *node);
+static void _jumbleA_Const(JumbleState *jstate, Node *node);
static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
static void _jumbleRangeTblEntry_eref(JumbleState *jstate,
RangeTblEntry *rte,
@@ -141,12 +148,12 @@ JumbleQuery(Query *query)
* If we are unlucky enough to get a hash of zero, use 1 instead for
* normal statements and 2 for utility queries.
*/
- if (query->queryId == UINT64CONST(0))
+ if (query->queryId == INT64CONST(0))
{
if (query->utilityStmt)
- query->queryId = UINT64CONST(2);
+ query->queryId = INT64CONST(2);
else
- query->queryId = UINT64CONST(1);
+ query->queryId = INT64CONST(1);
}
return jstate;
@@ -185,6 +192,7 @@ InitJumble(void)
jstate->clocations_count = 0;
jstate->highest_extern_param_id = 0;
jstate->pending_nulls = 0;
+ jstate->has_squashed_lists = false;
#ifdef USE_ASSERT_CHECKING
jstate->total_jumble_len = 0;
#endif
@@ -197,7 +205,7 @@ InitJumble(void)
* Jumble the given Node using the given JumbleState and return the resulting
* jumble hash.
*/
-static uint64
+static int64
DoJumble(JumbleState *jstate, Node *node)
{
/* Jumble the given node */
@@ -207,10 +215,14 @@ DoJumble(JumbleState *jstate, Node *node)
if (jstate->pending_nulls > 0)
FlushPendingNulls(jstate);
+ /* Squashed list found, reset highest_extern_param_id */
+ if (jstate->has_squashed_lists)
+ jstate->highest_extern_param_id = 0;
+
/* Process the jumble buffer and produce the hash value */
- return DatumGetUInt64(hash_any_extended(jstate->jumble,
- jstate->jumble_len,
- 0));
+ return DatumGetInt64(hash_any_extended(jstate->jumble,
+ jstate->jumble_len,
+ 0));
}
/*
@@ -256,10 +268,10 @@ AppendJumbleInternal(JumbleState *jstate, const unsigned char *item,
if (unlikely(jumble_len >= JUMBLE_SIZE))
{
- uint64 start_hash;
+ int64 start_hash;
- start_hash = DatumGetUInt64(hash_any_extended(jumble,
- JUMBLE_SIZE, 0));
+ start_hash = DatumGetInt64(hash_any_extended(jumble,
+ JUMBLE_SIZE, 0));
memcpy(jumble, &start_hash, sizeof(start_hash));
jumble_len = sizeof(start_hash);
}
@@ -373,15 +385,17 @@ FlushPendingNulls(JumbleState *jstate)
/*
- * Record location of constant within query string of query tree that is
- * currently being walked.
+ * Record the location of some kind of constant within a query string.
+ * These are not only bare constants but also expressions that ultimately
+ * constitute a constant, such as those inside casts and simple function
+ * calls; if extern_param, then it corresponds to a PARAM_EXTERN Param.
*
- * 'squashed' signals that the constant represents the first or the last
- * element in a series of merged constants, and everything but the first/last
- * element contributes nothing to the jumble hash.
+ * If length is -1, it indicates a single such constant element. If
+ * it's a positive integer, it indicates the length of a squashable
+ * list of them.
*/
static void
-RecordConstLocation(JumbleState *jstate, int location, bool squashed)
+RecordConstLocation(JumbleState *jstate, bool extern_param, int location, int len)
{
/* -1 indicates unknown or undefined location */
if (location >= 0)
@@ -396,9 +410,15 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed)
sizeof(LocationLen));
}
jstate->clocations[jstate->clocations_count].location = location;
- /* initialize lengths to -1 to simplify third-party module usage */
- jstate->clocations[jstate->clocations_count].squashed = squashed;
- jstate->clocations[jstate->clocations_count].length = -1;
+
+ /*
+ * Lengths are either positive integers (indicating a squashable
+ * list), or -1.
+ */
+ Assert(len > -1 || len == -1);
+ jstate->clocations[jstate->clocations_count].length = len;
+ jstate->clocations[jstate->clocations_count].squashed = (len > -1);
+ jstate->clocations[jstate->clocations_count].extern_param = extern_param;
jstate->clocations_count++;
}
}
@@ -407,47 +427,74 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed)
* Subroutine for _jumbleElements: Verify a few simple cases where we can
* deduce that the expression is a constant:
*
- * - Ignore a possible wrapping RelabelType and CoerceViaIO.
- * - If it's a FuncExpr, check that the function is an implicit
+ * - See through any wrapping RelabelType and CoerceViaIO layers.
+ * - If it's a FuncExpr, check that the function is a builtin
* cast and its arguments are Const.
- * - Otherwise test if the expression is a simple Const.
+ * - Otherwise test if the expression is a simple Const or a
+ * PARAM_EXTERN param.
*/
static bool
-IsSquashableConst(Node *element)
+IsSquashableConstant(Node *element)
{
- if (IsA(element, RelabelType))
- element = (Node *) ((RelabelType *) element)->arg;
-
- if (IsA(element, CoerceViaIO))
- element = (Node *) ((CoerceViaIO *) element)->arg;
-
- if (IsA(element, FuncExpr))
+restart:
+ switch (nodeTag(element))
{
- FuncExpr *func = (FuncExpr *) element;
- ListCell *temp;
+ case T_RelabelType:
+ /* Unwrap RelabelType */
+ element = (Node *) ((RelabelType *) element)->arg;
+ goto restart;
- if (func->funcformat != COERCE_IMPLICIT_CAST &&
- func->funcformat != COERCE_EXPLICIT_CAST)
- return false;
+ case T_CoerceViaIO:
+ /* Unwrap CoerceViaIO */
+ element = (Node *) ((CoerceViaIO *) element)->arg;
+ goto restart;
- if (func->funcid > FirstGenbkiObjectId)
- return false;
+ case T_Const:
+ return true;
- foreach(temp, func->args)
- {
- Node *arg = lfirst(temp);
+ case T_Param:
+ return castNode(Param, element)->paramkind == PARAM_EXTERN;
- if (!IsA(arg, Const)) /* XXX we could recurse here instead */
- return false;
- }
+ case T_FuncExpr:
+ {
+ FuncExpr *func = (FuncExpr *) element;
+ ListCell *temp;
- return true;
- }
+ if (func->funcformat != COERCE_IMPLICIT_CAST &&
+ func->funcformat != COERCE_EXPLICIT_CAST)
+ return false;
- if (!IsA(element, Const))
- return false;
+ if (func->funcid > FirstGenbkiObjectId)
+ return false;
- return true;
+ /*
+ * We can check function arguments recursively, being careful
+ * about recursing too deep. At each recursion level it's
+ * enough to test the stack on the first element. (Note that
+ * I wasn't able to hit this without bloating the stack
+ * artificially in this function: the parser errors out before
+ * stack size becomes a problem here.)
+ */
+ foreach(temp, func->args)
+ {
+ Node *arg = lfirst(temp);
+
+ if (!IsA(arg, Const))
+ {
+ if (foreach_current_index(temp) == 0 &&
+ stack_is_too_deep())
+ return false;
+ else if (!IsSquashableConstant(arg))
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ default:
+ return false;
+ }
}
/*
@@ -457,39 +504,33 @@ IsSquashableConst(Node *element)
* Return value indicates if squashing is possible.
*
* Note that this function searches only for explicit Const nodes with
- * possibly very simple decorations on top, and does not try to simplify
- * expressions.
+ * possibly very simple decorations on top and PARAM_EXTERN parameters,
+ * and does not try to simplify expressions.
*/
static bool
-IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr)
+IsSquashableConstantList(List *elements)
{
ListCell *temp;
- /*
- * If squashing is disabled, or the list is too short, we don't try to
- * squash it.
- */
+ /* If the list is too short, we don't try to squash it. */
if (list_length(elements) < 2)
return false;
foreach(temp, elements)
{
- if (!IsSquashableConst(lfirst(temp)))
+ if (!IsSquashableConstant(lfirst(temp)))
return false;
}
- *firstExpr = linitial(elements);
- *lastExpr = llast(elements);
-
return true;
}
#define JUMBLE_NODE(item) \
_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_ELEMENTS(list) \
- _jumbleElements(jstate, (List *) expr->list)
+#define JUMBLE_ELEMENTS(list, node) \
+ _jumbleElements(jstate, (List *) expr->list, node)
#define JUMBLE_LOCATION(location) \
- RecordConstLocation(jstate, expr->location, false)
+ RecordConstLocation(jstate, false, expr->location, -1)
#define JUMBLE_FIELD(item) \
do { \
if (sizeof(expr->item) == 8) \
@@ -516,42 +557,6 @@ do { \
#include "queryjumblefuncs.funcs.c"
-/*
- * We jumble lists of constant elements as one individual item regardless
- * of how many elements are in the list. This means different queries
- * jumble to the same query_id, if the only difference is the number of
- * elements in the list.
- */
-static void
-_jumbleElements(JumbleState *jstate, List *elements)
-{
- Node *first,
- *last;
-
- if (IsSquashableConstList(elements, &first, &last))
- {
- /*
- * If this list of elements is squashable, keep track of the location
- * of its first and last elements. When reading back the locations
- * array, we'll see two consecutive locations with ->squashed set to
- * true, indicating the location of initial and final elements of this
- * list.
- *
- * For the limited set of cases we support now (implicit coerce via
- * FuncExpr, Const) it's fine to use exprLocation of the 'last'
- * expression, but if more complex composite expressions are to be
- * supported (e.g., OpExpr or FuncExpr as an explicit call), more
- * sophisticated tracking will be needed.
- */
- RecordConstLocation(jstate, exprLocation(first), true);
- RecordConstLocation(jstate, exprLocation(last), true);
- }
- else
- {
- _jumbleNode(jstate, (Node *) elements);
- }
-}
-
static void
_jumbleNode(JumbleState *jstate, Node *node)
{
@@ -593,26 +598,6 @@ _jumbleNode(JumbleState *jstate, Node *node)
break;
}
- /* Special cases to handle outside the automated code */
- switch (nodeTag(expr))
- {
- case T_Param:
- {
- Param *p = (Param *) node;
-
- /*
- * Update the highest Param id seen, in order to start
- * normalization correctly.
- */
- if (p->paramkind == PARAM_EXTERN &&
- p->paramid > jstate->highest_extern_param_id)
- jstate->highest_extern_param_id = p->paramid;
- }
- break;
- default:
- break;
- }
-
/* Ensure we added something to the jumble buffer */
Assert(jstate->total_jumble_len > prev_jumble_len);
}
@@ -648,6 +633,79 @@ _jumbleList(JumbleState *jstate, Node *node)
}
}
+/*
+ * We try to jumble lists of expressions as one individual item regardless
+ * of how many elements are in the list. This is know as squashing, which
+ * results in different queries jumbling to the same query_id, if the only
+ * difference is the number of elements in the list.
+ *
+ * We allow constants and PARAM_EXTERN parameters to be squashed. To normalize
+ * such queries, we use the start and end locations of the list of elements in
+ * a list.
+ */
+static void
+_jumbleElements(JumbleState *jstate, List *elements, Node *node)
+{
+ bool normalize_list = false;
+
+ if (IsSquashableConstantList(elements))
+ {
+ if (IsA(node, ArrayExpr))
+ {
+ ArrayExpr *aexpr = (ArrayExpr *) node;
+
+ if (aexpr->list_start > 0 && aexpr->list_end > 0)
+ {
+ RecordConstLocation(jstate,
+ false,
+ aexpr->list_start + 1,
+ (aexpr->list_end - aexpr->list_start) - 1);
+ normalize_list = true;
+ jstate->has_squashed_lists = true;
+ }
+ }
+ }
+
+ if (!normalize_list)
+ {
+ _jumbleNode(jstate, (Node *) elements);
+ }
+}
+
+/*
+ * We store the highest param ID of extern params. This can later be used
+ * to start the numbering of the placeholder for squashed lists.
+ */
+static void
+_jumbleParam(JumbleState *jstate, Node *node)
+{
+ Param *expr = (Param *) node;
+
+ JUMBLE_FIELD(paramkind);
+ JUMBLE_FIELD(paramid);
+ JUMBLE_FIELD(paramtype);
+ /* paramtypmode and paramcollid are ignored */
+
+ if (expr->paramkind == PARAM_EXTERN)
+ {
+ /*
+ * At this point, only external parameter locations outside of
+ * squashable lists will be recorded.
+ */
+ RecordConstLocation(jstate, true, expr->location, -1);
+
+ /*
+ * Update the highest Param id seen, in order to start normalization
+ * correctly.
+ *
+ * Note: This value is reset at the end of jumbling if there exists a
+ * squashable list. See the comment in the definition of JumbleState.
+ */
+ if (expr->paramid > jstate->highest_extern_param_id)
+ jstate->highest_extern_param_id = expr->paramid;
+ }
+}
+
static void
_jumbleA_Const(JumbleState *jstate, Node *node)
{
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 64d3a09f765..48b5d13b9b6 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -68,6 +68,12 @@
token = pg_strtok(&length); /* get field value */ \
local_node->fldname = atoui(token)
+/* Read a signed integer field (anything written using INT64_FORMAT) */
+#define READ_INT64_FIELD(fldname) \
+ token = pg_strtok(&length); /* skip :fldname */ \
+ token = pg_strtok(&length); /* get field value */ \
+ local_node->fldname = strtoi64(token, NULL, 10)
+
/* Read an unsigned integer field (anything written using UINT64_FORMAT) */
#define READ_UINT64_FIELD(fldname) \
token = pg_strtok(&length); /* skip :fldname */ \
@@ -520,6 +526,8 @@ _readA_Expr(void)
READ_NODE_FIELD(lexpr);
READ_NODE_FIELD(rexpr);
+ READ_LOCATION_FIELD(rexpr_list_start);
+ READ_LOCATION_FIELD(rexpr_list_end);
READ_LOCATION_FIELD(location);
READ_DONE();
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 26f0336f1e4..ebedc5574ca 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -154,13 +154,17 @@ add_paths_to_joinrel(PlannerInfo *root,
/*
* See if the inner relation is provably unique for this outer rel.
*
- * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
- * matter since the executor can make the equivalent optimization anyway;
- * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we
- * must be considering a semijoin whose inner side is not provably unique
- * (else reduce_unique_semijoins would've simplified it), so there's no
- * point in calling innerrel_is_unique. However, if the LHS covers all of
- * the semijoin's min_lefthand, then it's appropriate to set inner_unique
+ * We have some special cases: for JOIN_SEMI, it doesn't matter since the
+ * executor can make the equivalent optimization anyway. It also doesn't
+ * help enable use of Memoize, since a semijoin with a provably unique
+ * inner side should have been reduced to an inner join in that case.
+ * Therefore, we need not expend planner cycles on proofs. (For
+ * JOIN_ANTI, although it doesn't help the executor for the same reason,
+ * it can benefit Memoize paths.) For JOIN_UNIQUE_INNER, we must be
+ * considering a semijoin whose inner side is not provably unique (else
+ * reduce_unique_semijoins would've simplified it), so there's no point in
+ * calling innerrel_is_unique. However, if the LHS covers all of the
+ * semijoin's min_lefthand, then it's appropriate to set inner_unique
* because the path produced by create_unique_path will be unique relative
* to the LHS. (If we have an LHS that's only part of the min_lefthand,
* that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
@@ -169,12 +173,6 @@ add_paths_to_joinrel(PlannerInfo *root,
switch (jointype)
{
case JOIN_SEMI:
- case JOIN_ANTI:
-
- /*
- * XXX it may be worth proving this to allow a Memoize to be
- * considered for Nested Loop Semi/Anti Joins.
- */
extra.inner_unique = false; /* well, unproven */
break;
case JOIN_UNIQUE_INNER:
@@ -715,16 +713,21 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
return NULL;
/*
- * Currently we don't do this for SEMI and ANTI joins unless they're
- * marked as inner_unique. This is because nested loop SEMI/ANTI joins
- * don't scan the inner node to completion, which will mean memoize cannot
- * mark the cache entry as complete.
- *
- * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique
- * = true. Should we? See add_paths_to_joinrel()
+ * Currently we don't do this for SEMI and ANTI joins, because nested loop
+ * SEMI/ANTI joins don't scan the inner node to completion, which means
+ * memoize cannot mark the cache entry as complete. Nor can we mark the
+ * cache entry as complete after fetching the first inner tuple, because
+ * if that tuple and the current outer tuple don't satisfy the join
+ * clauses, a second inner tuple that satisfies the parameters would find
+ * the cache entry already marked as complete. The only exception is when
+ * the inner relation is provably unique, as in that case, there won't be
+ * a second matching tuple and we can safely mark the cache entry as
+ * complete after fetching the first inner tuple. Note that in such
+ * cases, the SEMI join should have been reduced to an inner join by
+ * reduce_unique_semijoins.
*/
- if (!extra->inner_unique && (jointype == JOIN_SEMI ||
- jointype == JOIN_ANTI))
+ if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) &&
+ !extra->inner_unique)
return NULL;
/*
@@ -876,16 +879,13 @@ try_nestloop_path(PlannerInfo *root,
/*
* Check to see if proposed path is still parameterized, and reject if the
* parameterization wouldn't be sensible --- unless allow_star_schema_join
- * says to allow it anyway. Also, we must reject if have_dangerous_phv
- * doesn't like the look of it, which could only happen if the nestloop is
- * still parameterized.
+ * says to allow it anyway.
*/
required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
innerrelids, inner_paramrels);
if (required_outer &&
- ((!bms_overlap(required_outer, extra->param_source_rels) &&
- !allow_star_schema_join(root, outerrelids, inner_paramrels)) ||
- have_dangerous_phv(root, outerrelids, inner_paramrels)))
+ !bms_overlap(required_outer, extra->param_source_rels) &&
+ !allow_star_schema_join(root, outerrelids, inner_paramrels))
{
/* Waste no memory when we reject a path here */
bms_free(required_outer);
diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 60d65762b5d..aad41b94009 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -565,9 +565,6 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
* Also, if the lateral reference is only indirect, we should reject
* the join; whatever rel(s) the reference chain goes through must be
* joined to first.
- *
- * Another case that might keep us from building a valid plan is the
- * implementation restriction described by have_dangerous_phv().
*/
lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids);
lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids);
@@ -584,9 +581,6 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
/* check there is a direct reference from rel2 to rel1 */
if (!bms_overlap(rel1->relids, rel2->direct_lateral_relids))
return false; /* only indirect refs, so reject */
- /* check we won't have a dangerous PHV */
- if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids))
- return false; /* might be unable to handle required PHV */
}
else if (lateral_rev)
{
@@ -599,9 +593,6 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
/* check there is a direct reference from rel1 to rel2 */
if (!bms_overlap(rel2->relids, rel1->direct_lateral_relids))
return false; /* only indirect refs, so reject */
- /* check we won't have a dangerous PHV */
- if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids))
- return false; /* might be unable to handle required PHV */
}
/*
@@ -1279,57 +1270,6 @@ has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel)
/*
- * There's a pitfall for creating parameterized nestloops: suppose the inner
- * rel (call it A) has a parameter that is a PlaceHolderVar, and that PHV's
- * minimum eval_at set includes the outer rel (B) and some third rel (C).
- * We might think we could create a B/A nestloop join that's parameterized by
- * C. But we would end up with a plan in which the PHV's expression has to be
- * evaluated as a nestloop parameter at the B/A join; and the executor is only
- * set up to handle simple Vars as NestLoopParams. Rather than add complexity
- * and overhead to the executor for such corner cases, it seems better to
- * forbid the join. (Note that we can still make use of A's parameterized
- * path with pre-joined B+C as the outer rel. have_join_order_restriction()
- * ensures that we will consider making such a join even if there are not
- * other reasons to do so.)
- *
- * So we check whether any PHVs used in the query could pose such a hazard.
- * We don't have any simple way of checking whether a risky PHV would actually
- * be used in the inner plan, and the case is so unusual that it doesn't seem
- * worth working very hard on it.
- *
- * This needs to be checked in two places. If the inner rel's minimum
- * parameterization would trigger the restriction, then join_is_legal() should
- * reject the join altogether, because there will be no workable paths for it.
- * But joinpath.c has to check again for every proposed nestloop path, because
- * the inner path might have more than the minimum parameterization, causing
- * some PHV to be dangerous for it that otherwise wouldn't be.
- */
-bool
-have_dangerous_phv(PlannerInfo *root,
- Relids outer_relids, Relids inner_params)
-{
- ListCell *lc;
-
- foreach(lc, root->placeholder_list)
- {
- PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
-
- if (!bms_is_subset(phinfo->ph_eval_at, inner_params))
- continue; /* ignore, could not be a nestloop param */
- if (!bms_overlap(phinfo->ph_eval_at, outer_relids))
- continue; /* ignore, not relevant to this join */
- if (bms_is_subset(phinfo->ph_eval_at, outer_relids))
- continue; /* safe, it can be eval'd within outerrel */
- /* Otherwise, it's potentially unsafe, so reject the join */
- return true;
- }
-
- /* OK to perform the join */
- return false;
-}
-
-
-/*
* is_dummy_rel --- has relation been proven empty?
*/
bool
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 4ad30b7627e..0b61aef962c 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -4344,13 +4344,16 @@ create_nestloop_plan(PlannerInfo *root,
NestLoop *join_plan;
Plan *outer_plan;
Plan *inner_plan;
+ Relids outerrelids;
List *tlist = build_path_tlist(root, &best_path->jpath.path);
List *joinrestrictclauses = best_path->jpath.joinrestrictinfo;
List *joinclauses;
List *otherclauses;
- Relids outerrelids;
List *nestParams;
+ List *outer_tlist;
+ bool outer_parallel_safe;
Relids saveOuterRels = root->curOuterRels;
+ ListCell *lc;
/*
* If the inner path is parameterized by the topmost parent of the outer
@@ -4372,8 +4375,8 @@ create_nestloop_plan(PlannerInfo *root,
outer_plan = create_plan_recurse(root, best_path->jpath.outerjoinpath, 0);
/* For a nestloop, include outer relids in curOuterRels for inner side */
- root->curOuterRels = bms_union(root->curOuterRels,
- best_path->jpath.outerjoinpath->parent->relids);
+ outerrelids = best_path->jpath.outerjoinpath->parent->relids;
+ root->curOuterRels = bms_union(root->curOuterRels, outerrelids);
inner_plan = create_plan_recurse(root, best_path->jpath.innerjoinpath, 0);
@@ -4412,9 +4415,66 @@ create_nestloop_plan(PlannerInfo *root,
* Identify any nestloop parameters that should be supplied by this join
* node, and remove them from root->curOuterParams.
*/
- outerrelids = best_path->jpath.outerjoinpath->parent->relids;
- nestParams = identify_current_nestloop_params(root, outerrelids);
+ nestParams = identify_current_nestloop_params(root,
+ outerrelids,
+ PATH_REQ_OUTER((Path *) best_path));
+
+ /*
+ * While nestloop parameters that are Vars had better be available from
+ * the outer_plan already, there are edge cases where nestloop parameters
+ * that are PHVs won't be. In such cases we must add them to the
+ * outer_plan's tlist, since the executor's NestLoopParam machinery
+ * requires the params to be simple outer-Var references to that tlist.
+ * (This is cheating a little bit, because the outer path's required-outer
+ * relids might not be enough to allow evaluating such a PHV. But in
+ * practice, if we could have evaluated the PHV at the nestloop node, we
+ * can do so in the outer plan too.)
+ */
+ outer_tlist = outer_plan->targetlist;
+ outer_parallel_safe = outer_plan->parallel_safe;
+ foreach(lc, nestParams)
+ {
+ NestLoopParam *nlp = (NestLoopParam *) lfirst(lc);
+ PlaceHolderVar *phv;
+ TargetEntry *tle;
+
+ if (IsA(nlp->paramval, Var))
+ continue; /* nothing to do for simple Vars */
+ /* Otherwise it must be a PHV */
+ phv = castNode(PlaceHolderVar, nlp->paramval);
+
+ if (tlist_member((Expr *) phv, outer_tlist))
+ continue; /* already available */
+
+ /*
+ * It's possible that nestloop parameter PHVs selected to evaluate
+ * here contain references to surviving root->curOuterParams items
+ * (that is, they reference values that will be supplied by some
+ * higher-level nestloop). Those need to be converted to Params now.
+ * Note: it's safe to do this after the tlist_member() check, because
+ * equal() won't pay attention to phv->phexpr.
+ */
+ phv->phexpr = (Expr *) replace_nestloop_params(root,
+ (Node *) phv->phexpr);
+
+ /* Make a shallow copy of outer_tlist, if we didn't already */
+ if (outer_tlist == outer_plan->targetlist)
+ outer_tlist = list_copy(outer_tlist);
+ /* ... and add the needed expression */
+ tle = makeTargetEntry((Expr *) copyObject(phv),
+ list_length(outer_tlist) + 1,
+ NULL,
+ true);
+ outer_tlist = lappend(outer_tlist, tle);
+ /* ... and track whether tlist is (still) parallel-safe */
+ if (outer_parallel_safe)
+ outer_parallel_safe = is_parallel_safe(root, (Node *) phv);
+ }
+ if (outer_tlist != outer_plan->targetlist)
+ outer_plan = change_plan_targetlist(outer_plan, outer_tlist,
+ outer_parallel_safe);
+ /* And finally, we can build the join plan node */
join_plan = make_nestloop(tlist,
joinclauses,
otherclauses,
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 49ad6e83578..549aedcfa99 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -331,7 +331,6 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
glob->finalrteperminfos = NIL;
glob->finalrowmarks = NIL;
glob->resultRelations = NIL;
- glob->firstResultRels = NIL;
glob->appendRelations = NIL;
glob->partPruneInfos = NIL;
glob->relationOids = NIL;
@@ -571,7 +570,6 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions,
glob->prunableRelids);
result->permInfos = glob->finalrteperminfos;
result->resultRelations = glob->resultRelations;
- result->firstResultRels = glob->firstResultRels;
result->appendRelations = glob->appendRelations;
result->subplans = glob->subplans;
result->rewindPlanIDs = glob->rewindPlanIDs;
@@ -6881,7 +6879,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
*
* tableOid is the table on which the index is to be built. indexOid is the
* OID of an index to be created or reindexed (which must be an index with
- * support for parallel builds - currently btree or BRIN).
+ * support for parallel builds - currently btree, GIN, or BRIN).
*
* Return value is the number of parallel worker processes to request. It
* may be unsafe to proceed if this is 0. Note that this does not include the
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 150e9f060ee..846e44186c3 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -1097,9 +1097,10 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
/*
* Set up the visible plan targetlist as being the same as
- * the first RETURNING list. This is for the use of
- * EXPLAIN; the executor won't pay any attention to the
- * targetlist. We postpone this step until here so that
+ * the first RETURNING list. This is mostly for the use
+ * of EXPLAIN; the executor won't execute that targetlist,
+ * although it does use it to prepare the node's result
+ * tuple slot. We postpone this step until here so that
* we don't have to do set_returning_clause_references()
* twice on identical targetlists.
*/
@@ -1248,9 +1249,6 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
lappend_int(root->glob->resultRelations,
splan->rootRelation);
}
- root->glob->firstResultRels =
- lappend_int(root->glob->firstResultRels,
- linitial_int(splan->resultRelations));
}
break;
case T_Append:
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 26a3e050086..f45131c34c5 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -3333,6 +3333,13 @@ eval_const_expressions_mutator(Node *node,
-1,
coalesceexpr->coalescecollid);
+ /*
+ * If there's exactly one surviving argument, we no longer
+ * need COALESCE at all: the result is that argument
+ */
+ if (list_length(newargs) == 1)
+ return (Node *) linitial(newargs);
+
newcoalesce = makeNode(CoalesceExpr);
newcoalesce->coalescetype = coalesceexpr->coalescetype;
newcoalesce->coalescecollid = coalesceexpr->coalescecollid;
diff --git a/src/backend/optimizer/util/paramassign.c b/src/backend/optimizer/util/paramassign.c
index 3bd3ce37c8f..4c13c5931b4 100644
--- a/src/backend/optimizer/util/paramassign.c
+++ b/src/backend/optimizer/util/paramassign.c
@@ -599,38 +599,46 @@ process_subquery_nestloop_params(PlannerInfo *root, List *subplan_params)
}
/*
- * Identify any NestLoopParams that should be supplied by a NestLoop plan
- * node with the specified lefthand rels. Remove them from the active
- * root->curOuterParams list and return them as the result list.
+ * Identify any NestLoopParams that should be supplied by a NestLoop
+ * plan node with the specified lefthand rels and required-outer rels.
+ * Remove them from the active root->curOuterParams list and return
+ * them as the result list.
*
- * XXX Here we also hack up the returned Vars and PHVs so that they do not
- * contain nullingrel sets exceeding what is available from the outer side.
- * This is needed if we have applied outer join identity 3,
- * (A leftjoin B on (Pab)) leftjoin C on (Pb*c)
- * = A leftjoin (B leftjoin C on (Pbc)) on (Pab)
- * and C contains lateral references to B. It's still safe to apply the
- * identity, but the parser will have created those references in the form
- * "b*" (i.e., with varnullingrels listing the A/B join), while what we will
- * have available from the nestloop's outer side is just "b". We deal with
- * that here by stripping the nullingrels down to what is available from the
- * outer side according to leftrelids.
- *
- * That fixes matters for the case of forward application of identity 3.
- * If the identity was applied in the reverse direction, we will have
- * parameter Vars containing too few nullingrel bits rather than too many.
- * Currently, that causes no problems because setrefs.c applies only a
- * subset check to nullingrels in NestLoopParams, but we'd have to work
- * harder if we ever want to tighten that check. This is all pretty annoying
- * because it greatly weakens setrefs.c's cross-check, but the alternative
+ * Vars and PHVs appearing in the result list must have nullingrel sets
+ * that could validly appear in the lefthand rel's output. Ordinarily that
+ * would be true already, but if we have applied outer join identity 3,
+ * there could be more or fewer nullingrel bits in the nodes appearing in
+ * curOuterParams than are in the nominal leftrelids. We deal with that by
+ * forcing their nullingrel sets to include exactly the outer-join relids
+ * that appear in leftrelids and can null the respective Var or PHV.
+ * This fix is a bit ad-hoc and intellectually unsatisfactory, because it's
+ * essentially jumping to the conclusion that we've placed evaluation of
+ * the nestloop parameters correctly, and thus it defeats the intent of the
+ * subsequent nullingrel cross-checks in setrefs.c. But the alternative
* seems to be to generate multiple versions of each laterally-parameterized
* subquery, which'd be unduly expensive.
*/
List *
-identify_current_nestloop_params(PlannerInfo *root, Relids leftrelids)
+identify_current_nestloop_params(PlannerInfo *root,
+ Relids leftrelids,
+ Relids outerrelids)
{
List *result;
+ Relids allleftrelids;
ListCell *cell;
+ /*
+ * We'll be able to evaluate a PHV in the lefthand path if it uses the
+ * lefthand rels plus any available required-outer rels. But don't do so
+ * if it uses *only* required-outer rels; in that case it should be
+ * evaluated higher in the tree. For Vars, no such hair-splitting is
+ * necessary since they depend on only one relid.
+ */
+ if (outerrelids)
+ allleftrelids = bms_union(leftrelids, outerrelids);
+ else
+ allleftrelids = leftrelids;
+
result = NIL;
foreach(cell, root->curOuterParams)
{
@@ -646,25 +654,60 @@ identify_current_nestloop_params(PlannerInfo *root, Relids leftrelids)
bms_is_member(nlp->paramval->varno, leftrelids))
{
Var *var = (Var *) nlp->paramval;
+ RelOptInfo *rel = root->simple_rel_array[var->varno];
root->curOuterParams = foreach_delete_current(root->curOuterParams,
cell);
- var->varnullingrels = bms_intersect(var->varnullingrels,
+ var->varnullingrels = bms_intersect(rel->nulling_relids,
leftrelids);
result = lappend(result, nlp);
}
- else if (IsA(nlp->paramval, PlaceHolderVar) &&
- bms_is_subset(find_placeholder_info(root,
- (PlaceHolderVar *) nlp->paramval)->ph_eval_at,
- leftrelids))
+ else if (IsA(nlp->paramval, PlaceHolderVar))
{
PlaceHolderVar *phv = (PlaceHolderVar *) nlp->paramval;
+ PlaceHolderInfo *phinfo = find_placeholder_info(root, phv);
+ Relids eval_at = phinfo->ph_eval_at;
- root->curOuterParams = foreach_delete_current(root->curOuterParams,
- cell);
- phv->phnullingrels = bms_intersect(phv->phnullingrels,
- leftrelids);
- result = lappend(result, nlp);
+ if (bms_is_subset(eval_at, allleftrelids) &&
+ bms_overlap(eval_at, leftrelids))
+ {
+ root->curOuterParams = foreach_delete_current(root->curOuterParams,
+ cell);
+
+ /*
+ * Deal with an edge case: if the PHV was pulled up out of a
+ * subquery and it contains a subquery that was originally
+ * pushed down from this query level, then that will still be
+ * represented as a SubLink, because SS_process_sublinks won't
+ * recurse into outer PHVs, so it didn't get transformed
+ * during expression preprocessing in the subquery. We need a
+ * version of the PHV that has a SubPlan, which we can get
+ * from the current query level's placeholder_list. This is
+ * quite grotty of course, but dealing with it earlier in the
+ * handling of subplan params would be just as grotty, and it
+ * might end up being a waste of cycles if we don't decide to
+ * treat the PHV as a NestLoopParam. (Perhaps that whole
+ * mechanism should be redesigned someday, but today is not
+ * that day.)
+ */
+ if (root->parse->hasSubLinks)
+ {
+ phv = copyObject(phinfo->ph_var);
+
+ /*
+ * The ph_var will have empty nullingrels, but that
+ * doesn't matter since we're about to overwrite
+ * phv->phnullingrels. Other fields should be OK already.
+ */
+ nlp->paramval = (Var *) phv;
+ }
+
+ phv->phnullingrels =
+ bms_intersect(get_placeholder_nulling_relids(root, phinfo),
+ leftrelids);
+
+ result = lappend(result, nlp);
+ }
}
}
return result;
diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c
index 41a4c81e94a..e1cd00a72fb 100644
--- a/src/backend/optimizer/util/placeholder.c
+++ b/src/backend/optimizer/util/placeholder.c
@@ -545,3 +545,43 @@ contain_placeholder_references_walker(Node *node,
return expression_tree_walker(node, contain_placeholder_references_walker,
context);
}
+
+/*
+ * Compute the set of outer-join relids that can null a placeholder.
+ *
+ * This is analogous to RelOptInfo.nulling_relids for Vars, but we compute it
+ * on-the-fly rather than saving it somewhere. Currently the value is needed
+ * at most once per query, so there's little value in doing otherwise. If it
+ * ever gains more widespread use, perhaps we should cache the result in
+ * PlaceHolderInfo.
+ */
+Relids
+get_placeholder_nulling_relids(PlannerInfo *root, PlaceHolderInfo *phinfo)
+{
+ Relids result = NULL;
+ int relid = -1;
+
+ /*
+ * Form the union of all potential nulling OJs for each baserel included
+ * in ph_eval_at.
+ */
+ while ((relid = bms_next_member(phinfo->ph_eval_at, relid)) > 0)
+ {
+ RelOptInfo *rel = root->simple_rel_array[relid];
+
+ /* ignore the RTE_GROUP RTE */
+ if (relid == root->group_rtindex)
+ continue;
+
+ if (rel == NULL) /* must be an outer join */
+ {
+ Assert(bms_is_member(relid, root->outer_join_rels));
+ continue;
+ }
+ result = bms_add_members(result, rel->nulling_relids);
+ }
+
+ /* Now remove any OJs already included in ph_eval_at, and we're done. */
+ result = bms_del_members(result, phinfo->ph_eval_at);
+ return result;
+}
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 1f4d6adda52..34f7c17f576 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -239,107 +239,23 @@ parse_sub_analyze(Node *parseTree, ParseState *parentParseState,
}
/*
- * setQueryLocationAndLength
- * Set query's location and length from statement and ParseState
- *
- * Some statements, like PreparableStmt, can be located within parentheses.
- * For example "(SELECT 1)" or "COPY (UPDATE ...) to x;". For those, we
- * cannot use the whole string from the statement's location or the SQL
- * string would yield incorrectly. The parser will set stmt_len, reflecting
- * the size of the statement within the parentheses. Thus, when stmt_len is
- * available, we need to use it for the Query's stmt_len.
- *
- * For other cases, the parser can't provide the length of individual
- * statements. However, we have the statement's location plus the length
- * (p_stmt_len) and location (p_stmt_location) of the top level RawStmt,
- * stored in pstate. Thus, the statement's length is the RawStmt's length
- * minus how much we've advanced in the RawStmt's string.
- */
-static void
-setQueryLocationAndLength(ParseState *pstate, Query *qry, Node *parseTree)
-{
- ParseLoc stmt_len = 0;
-
- /*
- * If there is no information about the top RawStmt's length, leave it at
- * 0 to use the whole string.
- */
- if (pstate->p_stmt_len == 0)
- return;
-
- switch (nodeTag(parseTree))
- {
- case T_InsertStmt:
- qry->stmt_location = ((InsertStmt *) parseTree)->stmt_location;
- stmt_len = ((InsertStmt *) parseTree)->stmt_len;
- break;
-
- case T_DeleteStmt:
- qry->stmt_location = ((DeleteStmt *) parseTree)->stmt_location;
- stmt_len = ((DeleteStmt *) parseTree)->stmt_len;
- break;
-
- case T_UpdateStmt:
- qry->stmt_location = ((UpdateStmt *) parseTree)->stmt_location;
- stmt_len = ((UpdateStmt *) parseTree)->stmt_len;
- break;
-
- case T_MergeStmt:
- qry->stmt_location = ((MergeStmt *) parseTree)->stmt_location;
- stmt_len = ((MergeStmt *) parseTree)->stmt_len;
- break;
-
- case T_SelectStmt:
- qry->stmt_location = ((SelectStmt *) parseTree)->stmt_location;
- stmt_len = ((SelectStmt *) parseTree)->stmt_len;
- break;
-
- case T_PLAssignStmt:
- qry->stmt_location = ((PLAssignStmt *) parseTree)->location;
- break;
-
- default:
- qry->stmt_location = pstate->p_stmt_location;
- break;
- }
-
- if (stmt_len > 0)
- {
- /* Statement's length is known, use it */
- qry->stmt_len = stmt_len;
- }
- else
- {
- /*
- * Compute the statement's length from the statement's location and
- * the RawStmt's length and location.
- */
- qry->stmt_len = pstate->p_stmt_len - (qry->stmt_location - pstate->p_stmt_location);
- }
-
- /* The calculated statement length should be calculated as positive. */
- Assert(qry->stmt_len >= 0);
-}
-
-/*
* transformTopLevelStmt -
* transform a Parse tree into a Query tree.
*
- * This function is just responsible for storing location data
- * from the RawStmt into the ParseState.
+ * This function is just responsible for transferring statement location data
+ * from the RawStmt into the finished Query.
*/
Query *
transformTopLevelStmt(ParseState *pstate, RawStmt *parseTree)
{
Query *result;
- /* Store RawStmt's length and location in pstate */
- pstate->p_stmt_len = parseTree->stmt_len;
- pstate->p_stmt_location = parseTree->stmt_location;
-
/* We're at top level, so allow SELECT INTO */
result = transformOptionalSelectInto(pstate, parseTree->stmt);
+ result->stmt_location = parseTree->stmt_location;
+ result->stmt_len = parseTree->stmt_len;
+
return result;
}
@@ -508,7 +424,6 @@ transformStmt(ParseState *pstate, Node *parseTree)
/* Mark as original query until we learn differently */
result->querySource = QSRC_ORIGINAL;
result->canSetTag = true;
- setQueryLocationAndLength(pstate, result, parseTree);
return result;
}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 0b5652071d1..70a0d832a11 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -154,7 +154,6 @@ static void base_yyerror(YYLTYPE *yylloc, core_yyscan_t yyscanner,
const char *msg);
static RawStmt *makeRawStmt(Node *stmt, int stmt_location);
static void updateRawStmtEnd(RawStmt *rs, int end_location);
-static void updatePreparableStmtEnd(Node *n, int end_location);
static Node *makeColumnRef(char *colname, List *indirection,
int location, core_yyscan_t yyscanner);
static Node *makeTypeCast(Node *arg, TypeName *typename, int location);
@@ -178,13 +177,13 @@ static void insertSelectOptions(SelectStmt *stmt,
SelectLimit *limitClause,
WithClause *withClause,
core_yyscan_t yyscanner);
-static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location);
+static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg);
static Node *doNegate(Node *n, int location);
static void doNegateFloat(Float *v);
static Node *makeAndExpr(Node *lexpr, Node *rexpr, int location);
static Node *makeOrExpr(Node *lexpr, Node *rexpr, int location);
static Node *makeNotExpr(Node *expr, int location);
-static Node *makeAArrayExpr(List *elements, int location);
+static Node *makeAArrayExpr(List *elements, int location, int end_location);
static Node *makeSQLValueFunction(SQLValueFunctionOp op, int32 typmod,
int location);
static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args,
@@ -523,7 +522,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type <defelt> def_elem reloption_elem old_aggr_elem operator_def_elem
%type <node> def_arg columnElem where_clause where_or_current_clause
a_expr b_expr c_expr AexprConst indirection_el opt_slice_bound
- columnref in_expr having_clause func_table xmltable array_expr
+ columnref having_clause func_table xmltable array_expr
OptWhereClause operator_def_arg
%type <list> opt_column_and_period_list
%type <list> rowsfrom_item rowsfrom_list opt_col_def_list
@@ -2669,6 +2668,12 @@ alter_table_cmd:
c->alterDeferrability = true;
if ($4 & CAS_NO_INHERIT)
c->alterInheritability = true;
+ /* handle unsupported case with specific error message */
+ if ($4 & CAS_NOT_VALID)
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("constraints cannot be altered to be NOT VALID"),
+ parser_errposition(@4));
processCASbits($4, @4, "FOREIGN KEY",
&c->deferrable,
&c->initdeferred,
@@ -3417,7 +3422,6 @@ CopyStmt: COPY opt_binary qualified_name opt_column_list
{
CopyStmt *n = makeNode(CopyStmt);
- updatePreparableStmtEnd($3, @4);
n->relation = NULL;
n->query = $3;
n->attlist = NIL;
@@ -6037,6 +6041,26 @@ CreateTrigStmt:
EXECUTE FUNCTION_or_PROCEDURE func_name '(' TriggerFuncArgs ')'
{
CreateTrigStmt *n = makeNode(CreateTrigStmt);
+ bool dummy;
+
+ if (($11 & CAS_NOT_VALID) != 0)
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("constraint triggers cannot be marked %s",
+ "NOT VALID"),
+ parser_errposition(@11));
+ if (($11 & CAS_NO_INHERIT) != 0)
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("constraint triggers cannot be marked %s",
+ "NO INHERIT"),
+ parser_errposition(@11));
+ if (($11 & CAS_NOT_ENFORCED) != 0)
+ ereport(ERROR,
+ errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("constraint triggers cannot be marked %s",
+ "NOT ENFORCED"),
+ parser_errposition(@11));
n->replace = $2;
if (n->replace) /* not supported, see CreateTrigger */
@@ -6056,7 +6080,7 @@ CreateTrigStmt:
n->whenClause = $15;
n->transitionRels = NIL;
processCASbits($11, @11, "TRIGGER",
- &n->deferrable, &n->initdeferred, NULL,
+ &n->deferrable, &n->initdeferred, &dummy,
NULL, NULL, yyscanner);
n->constrrel = $10;
$$ = (Node *) n;
@@ -7479,6 +7503,8 @@ fetch_args: cursor_name
n->portalname = $1;
n->direction = FETCH_FORWARD;
n->howMany = 1;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_NONE;
$$ = (Node *) n;
}
| from_in cursor_name
@@ -7488,6 +7514,19 @@ fetch_args: cursor_name
n->portalname = $2;
n->direction = FETCH_FORWARD;
n->howMany = 1;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_NONE;
+ $$ = (Node *) n;
+ }
+ | SignedIconst opt_from_in cursor_name
+ {
+ FetchStmt *n = makeNode(FetchStmt);
+
+ n->portalname = $3;
+ n->direction = FETCH_FORWARD;
+ n->howMany = $1;
+ n->location = @1;
+ n->direction_keyword = FETCH_KEYWORD_NONE;
$$ = (Node *) n;
}
| NEXT opt_from_in cursor_name
@@ -7497,6 +7536,8 @@ fetch_args: cursor_name
n->portalname = $3;
n->direction = FETCH_FORWARD;
n->howMany = 1;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_NEXT;
$$ = (Node *) n;
}
| PRIOR opt_from_in cursor_name
@@ -7506,6 +7547,8 @@ fetch_args: cursor_name
n->portalname = $3;
n->direction = FETCH_BACKWARD;
n->howMany = 1;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_PRIOR;
$$ = (Node *) n;
}
| FIRST_P opt_from_in cursor_name
@@ -7515,6 +7558,8 @@ fetch_args: cursor_name
n->portalname = $3;
n->direction = FETCH_ABSOLUTE;
n->howMany = 1;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_FIRST;
$$ = (Node *) n;
}
| LAST_P opt_from_in cursor_name
@@ -7524,6 +7569,8 @@ fetch_args: cursor_name
n->portalname = $3;
n->direction = FETCH_ABSOLUTE;
n->howMany = -1;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_LAST;
$$ = (Node *) n;
}
| ABSOLUTE_P SignedIconst opt_from_in cursor_name
@@ -7533,6 +7580,8 @@ fetch_args: cursor_name
n->portalname = $4;
n->direction = FETCH_ABSOLUTE;
n->howMany = $2;
+ n->location = @2;
+ n->direction_keyword = FETCH_KEYWORD_ABSOLUTE;
$$ = (Node *) n;
}
| RELATIVE_P SignedIconst opt_from_in cursor_name
@@ -7542,15 +7591,8 @@ fetch_args: cursor_name
n->portalname = $4;
n->direction = FETCH_RELATIVE;
n->howMany = $2;
- $$ = (Node *) n;
- }
- | SignedIconst opt_from_in cursor_name
- {
- FetchStmt *n = makeNode(FetchStmt);
-
- n->portalname = $3;
- n->direction = FETCH_FORWARD;
- n->howMany = $1;
+ n->location = @2;
+ n->direction_keyword = FETCH_KEYWORD_RELATIVE;
$$ = (Node *) n;
}
| ALL opt_from_in cursor_name
@@ -7560,6 +7602,8 @@ fetch_args: cursor_name
n->portalname = $3;
n->direction = FETCH_FORWARD;
n->howMany = FETCH_ALL;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_ALL;
$$ = (Node *) n;
}
| FORWARD opt_from_in cursor_name
@@ -7569,6 +7613,8 @@ fetch_args: cursor_name
n->portalname = $3;
n->direction = FETCH_FORWARD;
n->howMany = 1;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_FORWARD;
$$ = (Node *) n;
}
| FORWARD SignedIconst opt_from_in cursor_name
@@ -7578,6 +7624,8 @@ fetch_args: cursor_name
n->portalname = $4;
n->direction = FETCH_FORWARD;
n->howMany = $2;
+ n->location = @2;
+ n->direction_keyword = FETCH_KEYWORD_FORWARD;
$$ = (Node *) n;
}
| FORWARD ALL opt_from_in cursor_name
@@ -7587,6 +7635,8 @@ fetch_args: cursor_name
n->portalname = $4;
n->direction = FETCH_FORWARD;
n->howMany = FETCH_ALL;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_FORWARD_ALL;
$$ = (Node *) n;
}
| BACKWARD opt_from_in cursor_name
@@ -7596,6 +7646,8 @@ fetch_args: cursor_name
n->portalname = $3;
n->direction = FETCH_BACKWARD;
n->howMany = 1;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_BACKWARD;
$$ = (Node *) n;
}
| BACKWARD SignedIconst opt_from_in cursor_name
@@ -7605,6 +7657,8 @@ fetch_args: cursor_name
n->portalname = $4;
n->direction = FETCH_BACKWARD;
n->howMany = $2;
+ n->location = @2;
+ n->direction_keyword = FETCH_KEYWORD_BACKWARD;
$$ = (Node *) n;
}
| BACKWARD ALL opt_from_in cursor_name
@@ -7614,6 +7668,8 @@ fetch_args: cursor_name
n->portalname = $4;
n->direction = FETCH_BACKWARD;
n->howMany = FETCH_ALL;
+ n->location = -1;
+ n->direction_keyword = FETCH_KEYWORD_BACKWARD_ALL;
$$ = (Node *) n;
}
;
@@ -11629,7 +11685,7 @@ AlterDomainStmt:
{
AlterDomainStmt *n = makeNode(AlterDomainStmt);
- n->subtype = 'T';
+ n->subtype = AD_AlterDefault;
n->typeName = $3;
n->def = $4;
$$ = (Node *) n;
@@ -11639,7 +11695,7 @@ AlterDomainStmt:
{
AlterDomainStmt *n = makeNode(AlterDomainStmt);
- n->subtype = 'N';
+ n->subtype = AD_DropNotNull;
n->typeName = $3;
$$ = (Node *) n;
}
@@ -11648,7 +11704,7 @@ AlterDomainStmt:
{
AlterDomainStmt *n = makeNode(AlterDomainStmt);
- n->subtype = 'O';
+ n->subtype = AD_SetNotNull;
n->typeName = $3;
$$ = (Node *) n;
}
@@ -11657,7 +11713,7 @@ AlterDomainStmt:
{
AlterDomainStmt *n = makeNode(AlterDomainStmt);
- n->subtype = 'C';
+ n->subtype = AD_AddConstraint;
n->typeName = $3;
n->def = $5;
$$ = (Node *) n;
@@ -11667,7 +11723,7 @@ AlterDomainStmt:
{
AlterDomainStmt *n = makeNode(AlterDomainStmt);
- n->subtype = 'X';
+ n->subtype = AD_DropConstraint;
n->typeName = $3;
n->name = $6;
n->behavior = $7;
@@ -11679,7 +11735,7 @@ AlterDomainStmt:
{
AlterDomainStmt *n = makeNode(AlterDomainStmt);
- n->subtype = 'X';
+ n->subtype = AD_DropConstraint;
n->typeName = $3;
n->name = $8;
n->behavior = $9;
@@ -11691,7 +11747,7 @@ AlterDomainStmt:
{
AlterDomainStmt *n = makeNode(AlterDomainStmt);
- n->subtype = 'V';
+ n->subtype = AD_ValidateConstraint;
n->typeName = $3;
n->name = $6;
$$ = (Node *) n;
@@ -12240,7 +12296,6 @@ InsertStmt:
$5->onConflictClause = $6;
$5->returningClause = $7;
$5->withClause = $1;
- $5->stmt_location = @$;
$$ = (Node *) $5;
}
;
@@ -12431,7 +12486,6 @@ DeleteStmt: opt_with_clause DELETE_P FROM relation_expr_opt_alias
n->whereClause = $6;
n->returningClause = $7;
n->withClause = $1;
- n->stmt_location = @$;
$$ = (Node *) n;
}
;
@@ -12506,7 +12560,6 @@ UpdateStmt: opt_with_clause UPDATE relation_expr_opt_alias
n->whereClause = $7;
n->returningClause = $8;
n->withClause = $1;
- n->stmt_location = @$;
$$ = (Node *) n;
}
;
@@ -12584,7 +12637,6 @@ MergeStmt:
m->joinCondition = $8;
m->mergeWhenClauses = $9;
m->returningClause = $10;
- m->stmt_location = @$;
$$ = (Node *) m;
}
@@ -12825,20 +12877,7 @@ SelectStmt: select_no_parens %prec UMINUS
;
select_with_parens:
- '(' select_no_parens ')'
- {
- SelectStmt *n = (SelectStmt *) $2;
-
- /*
- * As SelectStmt's location starts at the SELECT keyword,
- * we need to track the length of the SelectStmt within
- * parentheses to be able to extract the relevant part
- * of the query. Without this, the RawStmt's length would
- * be used and would include the closing parenthesis.
- */
- n->stmt_len = @3 - @2;
- $$ = $2;
- }
+ '(' select_no_parens ')' { $$ = $2; }
| '(' select_with_parens ')' { $$ = $2; }
;
@@ -12960,7 +12999,6 @@ simple_select:
n->groupDistinct = ($7)->distinct;
n->havingClause = $8;
n->windowClause = $9;
- n->stmt_location = @1;
$$ = (Node *) n;
}
| SELECT distinct_clause target_list
@@ -12978,7 +13016,6 @@ simple_select:
n->groupDistinct = ($7)->distinct;
n->havingClause = $8;
n->windowClause = $9;
- n->stmt_location = @1;
$$ = (Node *) n;
}
| values_clause { $$ = $1; }
@@ -12999,20 +13036,19 @@ simple_select:
n->targetList = list_make1(rt);
n->fromClause = list_make1($2);
- n->stmt_location = @1;
$$ = (Node *) n;
}
| select_clause UNION set_quantifier select_clause
{
- $$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
+ $$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4);
}
| select_clause INTERSECT set_quantifier select_clause
{
- $$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
+ $$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4);
}
| select_clause EXCEPT set_quantifier select_clause
{
- $$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4, @1);
+ $$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4);
}
;
@@ -13590,7 +13626,6 @@ values_clause:
{
SelectStmt *n = makeNode(SelectStmt);
- n->stmt_location = @1;
n->valuesLists = list_make1($3);
$$ = (Node *) n;
}
@@ -15287,49 +15322,50 @@ a_expr: c_expr { $$ = $1; }
(Node *) list_make2($5, $7),
@2);
}
- | a_expr IN_P in_expr
+ | a_expr IN_P select_with_parens
{
- /* in_expr returns a SubLink or a list of a_exprs */
- if (IsA($3, SubLink))
- {
- /* generate foo = ANY (subquery) */
- SubLink *n = (SubLink *) $3;
+ /* generate foo = ANY (subquery) */
+ SubLink *n = makeNode(SubLink);
- n->subLinkType = ANY_SUBLINK;
- n->subLinkId = 0;
- n->testexpr = $1;
- n->operName = NIL; /* show it's IN not = ANY */
- n->location = @2;
- $$ = (Node *) n;
- }
- else
- {
- /* generate scalar IN expression */
- $$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "=", $1, $3, @2);
- }
+ n->subselect = $3;
+ n->subLinkType = ANY_SUBLINK;
+ n->subLinkId = 0;
+ n->testexpr = $1;
+ n->operName = NIL; /* show it's IN not = ANY */
+ n->location = @2;
+ $$ = (Node *) n;
}
- | a_expr NOT_LA IN_P in_expr %prec NOT_LA
+ | a_expr IN_P '(' expr_list ')'
{
- /* in_expr returns a SubLink or a list of a_exprs */
- if (IsA($4, SubLink))
- {
- /* generate NOT (foo = ANY (subquery)) */
- /* Make an = ANY node */
- SubLink *n = (SubLink *) $4;
-
- n->subLinkType = ANY_SUBLINK;
- n->subLinkId = 0;
- n->testexpr = $1;
- n->operName = NIL; /* show it's IN not = ANY */
- n->location = @2;
- /* Stick a NOT on top; must have same parse location */
- $$ = makeNotExpr((Node *) n, @2);
- }
- else
- {
- /* generate scalar NOT IN expression */
- $$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "<>", $1, $4, @2);
- }
+ /* generate scalar IN expression */
+ A_Expr *n = makeSimpleA_Expr(AEXPR_IN, "=", $1, (Node *) $4, @2);
+
+ n->rexpr_list_start = @3;
+ n->rexpr_list_end = @5;
+ $$ = (Node *) n;
+ }
+ | a_expr NOT_LA IN_P select_with_parens %prec NOT_LA
+ {
+ /* generate NOT (foo = ANY (subquery)) */
+ SubLink *n = makeNode(SubLink);
+
+ n->subselect = $4;
+ n->subLinkType = ANY_SUBLINK;
+ n->subLinkId = 0;
+ n->testexpr = $1;
+ n->operName = NIL; /* show it's IN not = ANY */
+ n->location = @2;
+ /* Stick a NOT on top; must have same parse location */
+ $$ = makeNotExpr((Node *) n, @2);
+ }
+ | a_expr NOT_LA IN_P '(' expr_list ')'
+ {
+ /* generate scalar NOT IN expression */
+ A_Expr *n = makeSimpleA_Expr(AEXPR_IN, "<>", $1, (Node *) $5, @2);
+
+ n->rexpr_list_start = @4;
+ n->rexpr_list_end = @6;
+ $$ = (Node *) n;
}
| a_expr subquery_Op sub_type select_with_parens %prec Op
{
@@ -16764,15 +16800,15 @@ type_list: Typename { $$ = list_make1($1); }
array_expr: '[' expr_list ']'
{
- $$ = makeAArrayExpr($2, @1);
+ $$ = makeAArrayExpr($2, @1, @3);
}
| '[' array_expr_list ']'
{
- $$ = makeAArrayExpr($2, @1);
+ $$ = makeAArrayExpr($2, @1, @3);
}
| '[' ']'
{
- $$ = makeAArrayExpr(NIL, @1);
+ $$ = makeAArrayExpr(NIL, @1, @2);
}
;
@@ -16894,17 +16930,6 @@ trim_list: a_expr FROM expr_list { $$ = lappend($3, $1); }
| expr_list { $$ = $1; }
;
-in_expr: select_with_parens
- {
- SubLink *n = makeNode(SubLink);
-
- n->subselect = $1;
- /* other fields will be filled later */
- $$ = (Node *) n;
- }
- | '(' expr_list ')' { $$ = (Node *) $2; }
- ;
-
/*
* Define SQL-style CASE clause.
* - Full specification
@@ -18748,47 +18773,6 @@ updateRawStmtEnd(RawStmt *rs, int end_location)
rs->stmt_len = end_location - rs->stmt_location;
}
-/*
- * Adjust a PreparableStmt to reflect that it doesn't run to the end of the
- * string.
- */
-static void
-updatePreparableStmtEnd(Node *n, int end_location)
-{
- if (IsA(n, SelectStmt))
- {
- SelectStmt *stmt = (SelectStmt *) n;
-
- stmt->stmt_len = end_location - stmt->stmt_location;
- }
- else if (IsA(n, InsertStmt))
- {
- InsertStmt *stmt = (InsertStmt *) n;
-
- stmt->stmt_len = end_location - stmt->stmt_location;
- }
- else if (IsA(n, UpdateStmt))
- {
- UpdateStmt *stmt = (UpdateStmt *) n;
-
- stmt->stmt_len = end_location - stmt->stmt_location;
- }
- else if (IsA(n, DeleteStmt))
- {
- DeleteStmt *stmt = (DeleteStmt *) n;
-
- stmt->stmt_len = end_location - stmt->stmt_location;
- }
- else if (IsA(n, MergeStmt))
- {
- MergeStmt *stmt = (MergeStmt *) n;
-
- stmt->stmt_len = end_location - stmt->stmt_location;
- }
- else
- elog(ERROR, "unexpected node type %d", (int) n->type);
-}
-
static Node *
makeColumnRef(char *colname, List *indirection,
int location, core_yyscan_t yyscanner)
@@ -19167,14 +19151,11 @@ insertSelectOptions(SelectStmt *stmt,
errmsg("multiple WITH clauses not allowed"),
parser_errposition(exprLocation((Node *) withClause))));
stmt->withClause = withClause;
-
- /* Update SelectStmt's location to the start of the WITH clause */
- stmt->stmt_location = withClause->location;
}
}
static Node *
-makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location)
+makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg)
{
SelectStmt *n = makeNode(SelectStmt);
@@ -19182,7 +19163,6 @@ makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location)
n->all = all;
n->larg = (SelectStmt *) larg;
n->rarg = (SelectStmt *) rarg;
- n->stmt_location = location;
return (Node *) n;
}
@@ -19300,12 +19280,14 @@ makeNotExpr(Node *expr, int location)
}
static Node *
-makeAArrayExpr(List *elements, int location)
+makeAArrayExpr(List *elements, int location, int location_end)
{
A_ArrayExpr *n = makeNode(A_ArrayExpr);
n->elements = elements;
n->location = location;
+ n->list_start = location;
+ n->list_end = location_end;
return (Node *) n;
}
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 1f8e2d54673..d66276801c6 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -1223,6 +1223,8 @@ transformAExprIn(ParseState *pstate, A_Expr *a)
newa->element_typeid = scalar_type;
newa->elements = aexprs;
newa->multidims = false;
+ newa->list_start = a->rexpr_list_start;
+ newa->list_end = a->rexpr_list_end;
newa->location = -1;
result = (Node *) make_scalar_array_op(pstate,
@@ -2165,6 +2167,8 @@ transformArrayExpr(ParseState *pstate, A_ArrayExpr *a,
/* array_collid will be set by parse_collate.c */
newa->element_typeid = element_type;
newa->elements = newcoercedelems;
+ newa->list_start = a->list_start;
+ newa->list_end = a->list_end;
newa->location = a->location;
return (Node *) newa;
diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c
index 62015431fdf..afcf54169c3 100644
--- a/src/backend/parser/parse_utilcmd.c
+++ b/src/backend/parser/parse_utilcmd.c
@@ -1279,6 +1279,28 @@ transformTableLikeClause(CreateStmtContext *cxt, TableLikeClause *table_like_cla
lst = RelationGetNotNullConstraints(RelationGetRelid(relation), false,
true);
cxt->nnconstraints = list_concat(cxt->nnconstraints, lst);
+
+ /* Copy comments on not-null constraints */
+ if (table_like_clause->options & CREATE_TABLE_LIKE_COMMENTS)
+ {
+ foreach_node(Constraint, nnconstr, lst)
+ {
+ if ((comment = GetComment(get_relation_constraint_oid(RelationGetRelid(relation),
+ nnconstr->conname, false),
+ ConstraintRelationId,
+ 0)) != NULL)
+ {
+ CommentStmt *stmt = makeNode(CommentStmt);
+
+ stmt->objtype = OBJECT_TABCONSTRAINT;
+ stmt->object = (Node *) list_make3(makeString(cxt->relation->schemaname),
+ makeString(cxt->relation->relname),
+ makeString(nnconstr->conname));
+ stmt->comment = comment;
+ cxt->alist = lappend(cxt->alist, stmt);
+ }
+ }
+ }
}
/*
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 4d4a1a3197e..9474095f271 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -781,10 +781,6 @@ ProcessAutoVacLauncherInterrupts(void)
if (LogMemoryContextPending)
ProcessLogMemoryContextInterrupt();
- /* Publish memory contexts of this process */
- if (PublishMemoryContextPending)
- ProcessGetMemoryContextInterrupt();
-
/* Process sinval catchup interrupts that happened while sleeping */
ProcessCatchupInterrupt();
}
@@ -2077,6 +2073,12 @@ do_autovacuum(void)
}
}
}
+
+ /* Release stuff to avoid per-relation leakage */
+ if (relopts)
+ pfree(relopts);
+ if (tabentry)
+ pfree(tabentry);
}
table_endscan(relScan);
@@ -2093,7 +2095,8 @@ do_autovacuum(void)
Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple);
PgStat_StatTabEntry *tabentry;
Oid relid;
- AutoVacOpts *relopts = NULL;
+ AutoVacOpts *relopts;
+ bool free_relopts = false;
bool dovacuum;
bool doanalyze;
bool wraparound;
@@ -2111,7 +2114,9 @@ do_autovacuum(void)
* main rel
*/
relopts = extract_autovac_opts(tuple, pg_class_desc);
- if (relopts == NULL)
+ if (relopts)
+ free_relopts = true;
+ else
{
av_relation *hentry;
bool found;
@@ -2132,6 +2137,12 @@ do_autovacuum(void)
/* ignore analyze for toast tables */
if (dovacuum)
table_oids = lappend_oid(table_oids, relid);
+
+ /* Release stuff to avoid leakage */
+ if (free_relopts)
+ pfree(relopts);
+ if (tabentry)
+ pfree(tabentry);
}
table_endscan(relScan);
@@ -2223,6 +2234,12 @@ do_autovacuum(void)
get_namespace_name(classForm->relnamespace),
NameStr(classForm->relname))));
+ /*
+ * Deletion might involve TOAST table access, so ensure we have a
+ * valid snapshot.
+ */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
object.classId = RelationRelationId;
object.objectId = relid;
object.objectSubId = 0;
@@ -2235,6 +2252,7 @@ do_autovacuum(void)
* To commit the deletion, end current transaction and start a new
* one. Note this also releases the locks we took.
*/
+ PopActiveSnapshot();
CommitTransactionCommand();
StartTransactionCommand();
@@ -2503,6 +2521,8 @@ deleted:
pg_atomic_test_set_flag(&MyWorkerInfo->wi_dobalance);
}
+ list_free(table_oids);
+
/*
* Perform additional work items, as requested by backends.
*/
@@ -2684,8 +2704,8 @@ deleted2:
/*
* extract_autovac_opts
*
- * Given a relation's pg_class tuple, return the AutoVacOpts portion of
- * reloptions, if set; otherwise, return NULL.
+ * Given a relation's pg_class tuple, return a palloc'd copy of the
+ * AutoVacOpts portion of reloptions, if set; otherwise, return NULL.
*
* Note: callers do not have a relation lock on the table at this point,
* so the table could have been dropped, and its catalog rows gone, after
@@ -2734,6 +2754,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
autovac_table *tab = NULL;
bool wraparound;
AutoVacOpts *avopts;
+ bool free_avopts = false;
/* fetch the relation's relcache entry */
classTup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
@@ -2746,8 +2767,10 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
* main table reloptions if the toast table itself doesn't have.
*/
avopts = extract_autovac_opts(classTup, pg_class_desc);
- if (classForm->relkind == RELKIND_TOASTVALUE &&
- avopts == NULL && table_toast_map != NULL)
+ if (avopts)
+ free_avopts = true;
+ else if (classForm->relkind == RELKIND_TOASTVALUE &&
+ table_toast_map != NULL)
{
av_relation *hentry;
bool found;
@@ -2856,6 +2879,8 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
avopts->vacuum_cost_delay >= 0));
}
+ if (free_avopts)
+ pfree(avopts);
heap_freetuple(classTup);
return tab;
}
@@ -2887,6 +2912,10 @@ recheck_relation_needs_vacanalyze(Oid relid,
effective_multixact_freeze_max_age,
dovacuum, doanalyze, wraparound);
+ /* Release tabentry to avoid leakage */
+ if (tabentry)
+ pfree(tabentry);
+
/* ignore ANALYZE for toast tables */
if (classForm->relkind == RELKIND_TOASTVALUE)
*doanalyze = false;
@@ -3144,20 +3173,24 @@ autovacuum_do_vac_analyze(autovac_table *tab, BufferAccessStrategy bstrategy)
VacuumRelation *rel;
List *rel_list;
MemoryContext vac_context;
+ MemoryContext old_context;
/* Let pgstat know what we're doing */
autovac_report_activity(tab);
+ /* Create a context that vacuum() can use as cross-transaction storage */
+ vac_context = AllocSetContextCreate(CurrentMemoryContext,
+ "Vacuum",
+ ALLOCSET_DEFAULT_SIZES);
+
/* Set up one VacuumRelation target, identified by OID, for vacuum() */
+ old_context = MemoryContextSwitchTo(vac_context);
rangevar = makeRangeVar(tab->at_nspname, tab->at_relname, -1);
rel = makeVacuumRelation(rangevar, tab->at_relid, NIL);
rel_list = list_make1(rel);
+ MemoryContextSwitchTo(old_context);
- vac_context = AllocSetContextCreate(CurrentMemoryContext,
- "Vacuum",
- ALLOCSET_DEFAULT_SIZES);
-
- vacuum(rel_list, &tab->at_params, bstrategy, vac_context, true);
+ vacuum(rel_list, tab->at_params, bstrategy, vac_context, true);
MemoryContextDelete(vac_context);
}
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index d3cb3f1891c..fda91ffd1ce 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -663,10 +663,6 @@ ProcessCheckpointerInterrupts(void)
/* Perform logging of memory contexts of this process */
if (LogMemoryContextPending)
ProcessLogMemoryContextInterrupt();
-
- /* Publish memory contexts of this process */
- if (PublishMemoryContextPending)
- ProcessGetMemoryContextInterrupt();
}
/*
diff --git a/src/backend/postmaster/interrupt.c b/src/backend/postmaster/interrupt.c
index f24f574e748..0ae9bf906ec 100644
--- a/src/backend/postmaster/interrupt.c
+++ b/src/backend/postmaster/interrupt.c
@@ -48,10 +48,6 @@ ProcessMainLoopInterrupts(void)
/* Perform logging of memory contexts of this process */
if (LogMemoryContextPending)
ProcessLogMemoryContextInterrupt();
-
- /* Publish memory contexts of this process */
- if (PublishMemoryContextPending)
- ProcessGetMemoryContextInterrupt();
}
/*
diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c
index cb7408acf4c..78e39e5f866 100644
--- a/src/backend/postmaster/pgarch.c
+++ b/src/backend/postmaster/pgarch.c
@@ -718,15 +718,15 @@ pgarch_readyXlog(char *xlog)
/*
* Store the file in our max-heap if it has a high enough priority.
*/
- if (arch_files->arch_heap->bh_size < NUM_FILES_PER_DIRECTORY_SCAN)
+ if (binaryheap_size(arch_files->arch_heap) < NUM_FILES_PER_DIRECTORY_SCAN)
{
/* If the heap isn't full yet, quickly add it. */
- arch_file = arch_files->arch_filenames[arch_files->arch_heap->bh_size];
+ arch_file = arch_files->arch_filenames[binaryheap_size(arch_files->arch_heap)];
strcpy(arch_file, basename);
binaryheap_add_unordered(arch_files->arch_heap, CStringGetDatum(arch_file));
/* If we just filled the heap, make it a valid one. */
- if (arch_files->arch_heap->bh_size == NUM_FILES_PER_DIRECTORY_SCAN)
+ if (binaryheap_size(arch_files->arch_heap) == NUM_FILES_PER_DIRECTORY_SCAN)
binaryheap_build(arch_files->arch_heap);
}
else if (ready_file_comparator(binaryheap_first(arch_files->arch_heap),
@@ -744,21 +744,21 @@ pgarch_readyXlog(char *xlog)
FreeDir(rldir);
/* If no files were found, simply return. */
- if (arch_files->arch_heap->bh_size == 0)
+ if (binaryheap_empty(arch_files->arch_heap))
return false;
/*
* If we didn't fill the heap, we didn't make it a valid one. Do that
* now.
*/
- if (arch_files->arch_heap->bh_size < NUM_FILES_PER_DIRECTORY_SCAN)
+ if (binaryheap_size(arch_files->arch_heap) < NUM_FILES_PER_DIRECTORY_SCAN)
binaryheap_build(arch_files->arch_heap);
/*
* Fill arch_files array with the files to archive in ascending order of
* priority.
*/
- arch_files->arch_files_size = arch_files->arch_heap->bh_size;
+ arch_files->arch_files_size = binaryheap_size(arch_files->arch_heap);
for (int i = 0; i < arch_files->arch_files_size; i++)
arch_files->arch_files[i] = DatumGetCString(binaryheap_remove_first(arch_files->arch_heap));
@@ -867,10 +867,6 @@ ProcessPgArchInterrupts(void)
if (LogMemoryContextPending)
ProcessLogMemoryContextInterrupt();
- /* Publish memory contexts of this process */
- if (PublishMemoryContextPending)
- ProcessGetMemoryContextInterrupt();
-
if (ConfigReloadPending)
{
char *archiveLib = pstrdup(XLogArchiveLibrary);
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 7149a67fcbc..27e86cf393f 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -192,10 +192,6 @@ ProcessStartupProcInterrupts(void)
/* Perform logging of memory contexts of this process */
if (LogMemoryContextPending)
ProcessLogMemoryContextInterrupt();
-
- /* Publish memory contexts of this process */
- if (PublishMemoryContextPending)
- ProcessGetMemoryContextInterrupt();
}
diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c
index c7a76711cc5..0fec4f1f871 100644
--- a/src/backend/postmaster/walsummarizer.c
+++ b/src/backend/postmaster/walsummarizer.c
@@ -879,10 +879,6 @@ ProcessWalSummarizerInterrupts(void)
/* Perform logging of memory contexts of this process */
if (LogMemoryContextPending)
ProcessLogMemoryContextInterrupt();
-
- /* Publish memory contexts of this process */
- if (PublishMemoryContextPending)
- ProcessGetMemoryContextInterrupt();
}
/*
diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index 78193cfb964..d9eab5357bc 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -20,58 +20,13 @@
#include "common/unicode_category.h"
#include "utils/pg_locale.h"
-/*
- * For the libc provider, to provide as much functionality as possible on a
- * variety of platforms without going so far as to implement everything from
- * scratch, we use several implementation strategies depending on the
- * situation:
- *
- * 1. In C/POSIX collations, we use hard-wired code. We can't depend on
- * the <ctype.h> functions since those will obey LC_CTYPE. Note that these
- * collations don't give a fig about multibyte characters.
- *
- * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
- * This assumes that every platform uses Unicode codepoints directly
- * as the wchar_t representation of Unicode. (XXX: ICU makes this assumption
- * even for non-UTF8 encodings, which may be a problem.) On some platforms
- * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
- *
- * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
- * values up to 255, and punt for values above that. This is 100% correct
- * only in single-byte encodings such as LATINn. However, non-Unicode
- * multibyte encodings are mostly Far Eastern character sets for which the
- * properties being tested here aren't very relevant for higher code values
- * anyway. The difficulty with using the <wctype.h> functions with
- * non-Unicode multibyte encodings is that we can have no certainty that
- * the platform's wchar_t representation matches what we do in pg_wchar
- * conversions.
- *
- * As a special case, in the "default" collation, (2) and (3) force ASCII
- * letters to follow ASCII upcase/downcase rules, while in a non-default
- * collation we just let the library functions do what they will. The case
- * where this matters is treatment of I/i in Turkish, and the behavior is
- * meant to match the upper()/lower() SQL functions.
- *
- * We store the active collation setting in static variables. In principle
- * it could be passed down to here via the regex library's "struct vars" data
- * structure; but that would require somewhat invasive changes in the regex
- * library, and right now there's no real benefit to be gained from that.
- *
- * NB: the coding here assumes pg_wchar is an unsigned type.
- */
-
-typedef enum
-{
- PG_REGEX_STRATEGY_C, /* C locale (encoding independent) */
- PG_REGEX_STRATEGY_BUILTIN, /* built-in Unicode semantics */
- PG_REGEX_STRATEGY_LIBC_WIDE, /* Use locale_t <wctype.h> functions */
- PG_REGEX_STRATEGY_LIBC_1BYTE, /* Use locale_t <ctype.h> functions */
- PG_REGEX_STRATEGY_ICU, /* Use ICU uchar.h functions */
-} PG_Locale_Strategy;
-
-static PG_Locale_Strategy pg_regex_strategy;
static pg_locale_t pg_regex_locale;
+static struct pg_locale_struct dummy_c_locale = {
+ .collate_is_c = true,
+ .ctype_is_c = true,
+};
+
/*
* Hard-wired character properties for C locale
*/
@@ -228,7 +183,6 @@ void
pg_set_regex_collation(Oid collation)
{
pg_locale_t locale = 0;
- PG_Locale_Strategy strategy;
if (!OidIsValid(collation))
{
@@ -249,8 +203,7 @@ pg_set_regex_collation(Oid collation)
* catalog access is available, so we can't call
* pg_newlocale_from_collation().
*/
- strategy = PG_REGEX_STRATEGY_C;
- locale = 0;
+ locale = &dummy_c_locale;
}
else
{
@@ -267,113 +220,41 @@ pg_set_regex_collation(Oid collation)
* C/POSIX collations use this path regardless of database
* encoding
*/
- strategy = PG_REGEX_STRATEGY_C;
- locale = 0;
- }
- else if (locale->provider == COLLPROVIDER_BUILTIN)
- {
- Assert(GetDatabaseEncoding() == PG_UTF8);
- strategy = PG_REGEX_STRATEGY_BUILTIN;
- }
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- {
- strategy = PG_REGEX_STRATEGY_ICU;
- }
-#endif
- else
- {
- Assert(locale->provider == COLLPROVIDER_LIBC);
- if (GetDatabaseEncoding() == PG_UTF8)
- strategy = PG_REGEX_STRATEGY_LIBC_WIDE;
- else
- strategy = PG_REGEX_STRATEGY_LIBC_1BYTE;
+ locale = &dummy_c_locale;
}
}
- pg_regex_strategy = strategy;
pg_regex_locale = locale;
}
static int
pg_wc_isdigit(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISDIGIT));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isdigit(c, !pg_regex_locale->info.builtin.casemap_full);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isdigit(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISDIGIT));
+ else
+ return pg_regex_locale->ctype->wc_isdigit(c, pg_regex_locale);
}
static int
pg_wc_isalpha(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISALPHA));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isalpha(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isalpha(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISALPHA));
+ else
+ return pg_regex_locale->ctype->wc_isalpha(c, pg_regex_locale);
}
static int
pg_wc_isalnum(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISALNUM));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isalnum(c, !pg_regex_locale->info.builtin.casemap_full);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isalnum(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISALNUM));
+ else
+ return pg_regex_locale->ctype->wc_isalnum(c, pg_regex_locale);
}
static int
@@ -388,231 +269,87 @@ pg_wc_isword(pg_wchar c)
static int
pg_wc_isupper(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISUPPER));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isupper(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isupper_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isupper(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISUPPER));
+ else
+ return pg_regex_locale->ctype->wc_isupper(c, pg_regex_locale);
}
static int
pg_wc_islower(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISLOWER));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_islower(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- islower_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_islower(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISLOWER));
+ else
+ return pg_regex_locale->ctype->wc_islower(c, pg_regex_locale);
}
static int
pg_wc_isgraph(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISGRAPH));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isgraph(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isgraph(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISGRAPH));
+ else
+ return pg_regex_locale->ctype->wc_isgraph(c, pg_regex_locale);
}
static int
pg_wc_isprint(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISPRINT));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isprint(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isprint_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isprint(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISPRINT));
+ else
+ return pg_regex_locale->ctype->wc_isprint(c, pg_regex_locale);
}
static int
pg_wc_ispunct(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISPUNCT));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_ispunct(c, !pg_regex_locale->info.builtin.casemap_full);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_ispunct(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISPUNCT));
+ else
+ return pg_regex_locale->ctype->wc_ispunct(c, pg_regex_locale);
}
static int
pg_wc_isspace(pg_wchar c)
{
- switch (pg_regex_strategy)
- {
- case PG_REGEX_STRATEGY_C:
- return (c <= (pg_wchar) 127 &&
- (pg_char_properties[c] & PG_ISSPACE));
- case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isspace(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- return (c <= (pg_wchar) UCHAR_MAX &&
- isspace_l((unsigned char) c, pg_regex_locale->info.lt));
- break;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_isspace(c);
-#endif
- break;
- }
- return 0; /* can't get here, but keep compiler quiet */
+ if (pg_regex_locale->ctype_is_c)
+ return (c <= (pg_wchar) 127 &&
+ (pg_char_properties[c] & PG_ISSPACE));
+ else
+ return pg_regex_locale->ctype->wc_isspace(c, pg_regex_locale);
}
static pg_wchar
pg_wc_toupper(pg_wchar c)
{
- switch (pg_regex_strategy)
+ if (pg_regex_locale->ctype_is_c)
{
- case PG_REGEX_STRATEGY_C:
- if (c <= (pg_wchar) 127)
- return pg_ascii_toupper((unsigned char) c);
- return c;
- case PG_REGEX_STRATEGY_BUILTIN:
- return unicode_uppercase_simple(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- /* force C behavior for ASCII characters, per comments above */
- if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
- return pg_ascii_toupper((unsigned char) c);
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return towupper_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- /* force C behavior for ASCII characters, per comments above */
- if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
- return pg_ascii_toupper((unsigned char) c);
- if (c <= (pg_wchar) UCHAR_MAX)
- return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
- return c;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_toupper(c);
-#endif
- break;
+ if (c <= (pg_wchar) 127)
+ return pg_ascii_toupper((unsigned char) c);
+ return c;
}
- return 0; /* can't get here, but keep compiler quiet */
+ else
+ return pg_regex_locale->ctype->wc_toupper(c, pg_regex_locale);
}
static pg_wchar
pg_wc_tolower(pg_wchar c)
{
- switch (pg_regex_strategy)
+ if (pg_regex_locale->ctype_is_c)
{
- case PG_REGEX_STRATEGY_C:
- if (c <= (pg_wchar) 127)
- return pg_ascii_tolower((unsigned char) c);
- return c;
- case PG_REGEX_STRATEGY_BUILTIN:
- return unicode_lowercase_simple(c);
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- /* force C behavior for ASCII characters, per comments above */
- if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
- return pg_ascii_tolower((unsigned char) c);
- if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
- return towlower_l((wint_t) c, pg_regex_locale->info.lt);
- /* FALL THRU */
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
- /* force C behavior for ASCII characters, per comments above */
- if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
- return pg_ascii_tolower((unsigned char) c);
- if (c <= (pg_wchar) UCHAR_MAX)
- return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
- return c;
- case PG_REGEX_STRATEGY_ICU:
-#ifdef USE_ICU
- return u_tolower(c);
-#endif
- break;
+ if (c <= (pg_wchar) 127)
+ return pg_ascii_tolower((unsigned char) c);
+ return c;
}
- return 0; /* can't get here, but keep compiler quiet */
+ else
+ return pg_regex_locale->ctype->wc_tolower(c, pg_regex_locale);
}
@@ -738,37 +475,25 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
* would always be true for production values of MAX_SIMPLE_CHR, but it's
* useful to allow it to be small for testing purposes.)
*/
- switch (pg_regex_strategy)
+ if (pg_regex_locale->ctype_is_c)
{
- case PG_REGEX_STRATEGY_C:
#if MAX_SIMPLE_CHR >= 127
- max_chr = (pg_wchar) 127;
- pcc->cv.cclasscode = -1;
+ max_chr = (pg_wchar) 127;
+ pcc->cv.cclasscode = -1;
#else
- max_chr = (pg_wchar) MAX_SIMPLE_CHR;
+ max_chr = (pg_wchar) MAX_SIMPLE_CHR;
#endif
- break;
- case PG_REGEX_STRATEGY_BUILTIN:
- max_chr = (pg_wchar) MAX_SIMPLE_CHR;
- break;
- case PG_REGEX_STRATEGY_LIBC_WIDE:
- max_chr = (pg_wchar) MAX_SIMPLE_CHR;
- break;
- case PG_REGEX_STRATEGY_LIBC_1BYTE:
-#if MAX_SIMPLE_CHR >= UCHAR_MAX
- max_chr = (pg_wchar) UCHAR_MAX;
+ }
+ else
+ {
+ if (pg_regex_locale->ctype->max_chr != 0 &&
+ pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR)
+ {
+ max_chr = pg_regex_locale->ctype->max_chr;
pcc->cv.cclasscode = -1;
-#else
- max_chr = (pg_wchar) MAX_SIMPLE_CHR;
-#endif
- break;
- case PG_REGEX_STRATEGY_ICU:
+ }
+ else
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
- break;
- default:
- Assert(false);
- max_chr = 0; /* can't get here, but keep compiler quiet */
- break;
}
/*
diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c
index 10677da56b2..4aed0dfcebb 100644
--- a/src/backend/replication/logical/launcher.c
+++ b/src/backend/replication/logical/launcher.c
@@ -175,12 +175,14 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
uint16 generation,
BackgroundWorkerHandle *handle)
{
- BgwHandleStatus status;
- int rc;
+ bool result = false;
+ bool dropped_latch = false;
for (;;)
{
+ BgwHandleStatus status;
pid_t pid;
+ int rc;
CHECK_FOR_INTERRUPTS();
@@ -189,8 +191,9 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
/* Worker either died or has started. Return false if died. */
if (!worker->in_use || worker->proc)
{
+ result = worker->in_use;
LWLockRelease(LogicalRepWorkerLock);
- return worker->in_use;
+ break;
}
LWLockRelease(LogicalRepWorkerLock);
@@ -205,7 +208,7 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
if (generation == worker->generation)
logicalrep_worker_cleanup(worker);
LWLockRelease(LogicalRepWorkerLock);
- return false;
+ break; /* result is already false */
}
/*
@@ -220,8 +223,18 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker,
{
ResetLatch(MyLatch);
CHECK_FOR_INTERRUPTS();
+ dropped_latch = true;
}
}
+
+ /*
+ * If we had to clear a latch event in order to wait, be sure to restore
+ * it before exiting. Otherwise caller may miss events.
+ */
+ if (dropped_latch)
+ SetLatch(MyLatch);
+
+ return result;
}
/*
@@ -328,7 +341,7 @@ logicalrep_worker_launch(LogicalRepWorkerType wtype,
if (max_active_replication_origins == 0)
ereport(ERROR,
(errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
- errmsg("cannot start logical replication workers when \"max_active_replication_origins\"=0")));
+ errmsg("cannot start logical replication workers when \"max_active_replication_origins\" is 0")));
/*
* We need to do the modification of the shared memory under lock so that
@@ -1016,7 +1029,7 @@ logicalrep_launcher_attach_dshmem(void)
last_start_times_dsa = dsa_attach(LogicalRepCtx->last_start_dsa);
dsa_pin_mapping(last_start_times_dsa);
last_start_times = dshash_attach(last_start_times_dsa, &dsh_params,
- LogicalRepCtx->last_start_dsh, 0);
+ LogicalRepCtx->last_start_dsh, NULL);
}
MemoryContextSwitchTo(oldcontext);
@@ -1194,10 +1207,21 @@ ApplyLauncherMain(Datum main_arg)
(elapsed = TimestampDifferenceMilliseconds(last_start, now)) >= wal_retrieve_retry_interval)
{
ApplyLauncherSetWorkerStartTime(sub->oid, now);
- logicalrep_worker_launch(WORKERTYPE_APPLY,
- sub->dbid, sub->oid, sub->name,
- sub->owner, InvalidOid,
- DSM_HANDLE_INVALID);
+ if (!logicalrep_worker_launch(WORKERTYPE_APPLY,
+ sub->dbid, sub->oid, sub->name,
+ sub->owner, InvalidOid,
+ DSM_HANDLE_INVALID))
+ {
+ /*
+ * We get here either if we failed to launch a worker
+ * (perhaps for resource-exhaustion reasons) or if we
+ * launched one but it immediately quit. Either way, it
+ * seems appropriate to try again after
+ * wal_retrieve_retry_interval.
+ */
+ wait_time = Min(wait_time,
+ wal_retrieve_retry_interval);
+ }
}
else
{
diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c
index a8d2e024d34..f1eb798f3e9 100644
--- a/src/backend/replication/logical/logical.c
+++ b/src/backend/replication/logical/logical.c
@@ -29,6 +29,7 @@
#include "postgres.h"
#include "access/xact.h"
+#include "access/xlog_internal.h"
#include "access/xlogutils.h"
#include "fmgr.h"
#include "miscadmin.h"
@@ -41,6 +42,7 @@
#include "storage/proc.h"
#include "storage/procarray.h"
#include "utils/builtins.h"
+#include "utils/injection_point.h"
#include "utils/inval.h"
#include "utils/memutils.h"
@@ -1825,10 +1827,26 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn)
{
bool updated_xmin = false;
bool updated_restart = false;
+ XLogRecPtr restart_lsn pg_attribute_unused();
SpinLockAcquire(&MyReplicationSlot->mutex);
- MyReplicationSlot->data.confirmed_flush = lsn;
+ /* remember the old restart lsn */
+ restart_lsn = MyReplicationSlot->data.restart_lsn;
+
+ /*
+ * Prevent moving the confirmed_flush backwards, as this could lead to
+ * data duplication issues caused by replicating already replicated
+ * changes.
+ *
+ * This can happen when a client acknowledges an LSN it doesn't have
+ * to do anything for, and thus didn't store persistently. After a
+ * restart, the client can send the prior LSN that it stored
+ * persistently as an acknowledgement, but we need to ignore such an
+ * LSN. See similar case handling in CreateDecodingContext.
+ */
+ if (lsn > MyReplicationSlot->data.confirmed_flush)
+ MyReplicationSlot->data.confirmed_flush = lsn;
/* if we're past the location required for bumping xmin, do so */
if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr &&
@@ -1869,6 +1887,18 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn)
/* first write new xmin to disk, so we know what's up after a crash */
if (updated_xmin || updated_restart)
{
+#ifdef USE_INJECTION_POINTS
+ XLogSegNo seg1,
+ seg2;
+
+ XLByteToSeg(restart_lsn, seg1, wal_segment_size);
+ XLByteToSeg(MyReplicationSlot->data.restart_lsn, seg2, wal_segment_size);
+
+ /* trigger injection point, but only if segment changes */
+ if (seg1 != seg2)
+ INJECTION_POINT("logical-replication-slot-advance-segment", NULL);
+#endif
+
ReplicationSlotMarkDirty();
ReplicationSlotSave();
elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart);
@@ -1893,7 +1923,14 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn)
else
{
SpinLockAcquire(&MyReplicationSlot->mutex);
- MyReplicationSlot->data.confirmed_flush = lsn;
+
+ /*
+ * Prevent moving the confirmed_flush backwards. See comments above
+ * for the details.
+ */
+ if (lsn > MyReplicationSlot->data.confirmed_flush)
+ MyReplicationSlot->data.confirmed_flush = lsn;
+
SpinLockRelease(&MyReplicationSlot->mutex);
}
}
diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c
index 67655111875..7b4e8629553 100644
--- a/src/backend/replication/logical/reorderbuffer.c
+++ b/src/backend/replication/logical/reorderbuffer.c
@@ -109,10 +109,22 @@
#include "storage/procarray.h"
#include "storage/sinval.h"
#include "utils/builtins.h"
+#include "utils/inval.h"
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/relfilenumbermap.h"
+/*
+ * Each transaction has an 8MB limit for invalidation messages distributed from
+ * other transactions. This limit is set considering scenarios with many
+ * concurrent logical decoding operations. When the distributed invalidation
+ * messages reach this threshold, the transaction is marked as
+ * RBTXN_DISTR_INVAL_OVERFLOWED to invalidate the complete cache as we have lost
+ * some inval messages and hence don't know what needs to be invalidated.
+ */
+#define MAX_DISTR_INVAL_MSG_PER_TXN \
+ ((8 * 1024 * 1024) / sizeof(SharedInvalidationMessage))
+
/* entry for a hash table we use to map from xid to our transaction state */
typedef struct ReorderBufferTXNByIdEnt
{
@@ -472,6 +484,12 @@ ReorderBufferFreeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn)
txn->invalidations = NULL;
}
+ if (txn->invalidations_distributed)
+ {
+ pfree(txn->invalidations_distributed);
+ txn->invalidations_distributed = NULL;
+ }
+
/* Reset the toast hash */
ReorderBufferToastReset(rb, txn);
@@ -1397,7 +1415,7 @@ ReorderBufferIterTXNNext(ReorderBuffer *rb, ReorderBufferIterTXNState *state)
int32 off;
/* nothing there anymore */
- if (state->heap->bh_size == 0)
+ if (binaryheap_empty(state->heap))
return NULL;
off = DatumGetInt32(binaryheap_first(state->heap));
@@ -2661,7 +2679,17 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
AbortCurrentTransaction();
/* make sure there's no cache pollution */
- ReorderBufferExecuteInvalidations(txn->ninvalidations, txn->invalidations);
+ if (rbtxn_distr_inval_overflowed(txn))
+ {
+ Assert(txn->ninvalidations_distributed == 0);
+ InvalidateSystemCaches();
+ }
+ else
+ {
+ ReorderBufferExecuteInvalidations(txn->ninvalidations, txn->invalidations);
+ ReorderBufferExecuteInvalidations(txn->ninvalidations_distributed,
+ txn->invalidations_distributed);
+ }
if (using_subtxn)
RollbackAndReleaseCurrentSubTransaction();
@@ -2710,8 +2738,17 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn,
AbortCurrentTransaction();
/* make sure there's no cache pollution */
- ReorderBufferExecuteInvalidations(txn->ninvalidations,
- txn->invalidations);
+ if (rbtxn_distr_inval_overflowed(txn))
+ {
+ Assert(txn->ninvalidations_distributed == 0);
+ InvalidateSystemCaches();
+ }
+ else
+ {
+ ReorderBufferExecuteInvalidations(txn->ninvalidations, txn->invalidations);
+ ReorderBufferExecuteInvalidations(txn->ninvalidations_distributed,
+ txn->invalidations_distributed);
+ }
if (using_subtxn)
RollbackAndReleaseCurrentSubTransaction();
@@ -3060,7 +3097,8 @@ ReorderBufferAbort(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn,
* We might have decoded changes for this transaction that could load
* the cache as per the current transaction's view (consider DDL's
* happened in this transaction). We don't want the decoding of future
- * transactions to use those cache entries so execute invalidations.
+ * transactions to use those cache entries so execute only the inval
+ * messages in this transaction.
*/
if (txn->ninvalidations > 0)
ReorderBufferImmediateInvalidation(rb, txn->ninvalidations,
@@ -3147,9 +3185,10 @@ ReorderBufferForget(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn)
txn->final_lsn = lsn;
/*
- * Process cache invalidation messages if there are any. Even if we're not
- * interested in the transaction's contents, it could have manipulated the
- * catalog and we need to update the caches according to that.
+ * Process only cache invalidation messages in this transaction if there
+ * are any. Even if we're not interested in the transaction's contents, it
+ * could have manipulated the catalog and we need to update the caches
+ * according to that.
*/
if (txn->base_snapshot != NULL && txn->ninvalidations > 0)
ReorderBufferImmediateInvalidation(rb, txn->ninvalidations,
@@ -3422,6 +3461,57 @@ ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid,
}
/*
+ * Add new invalidation messages to the reorder buffer queue.
+ */
+static void
+ReorderBufferQueueInvalidations(ReorderBuffer *rb, TransactionId xid,
+ XLogRecPtr lsn, Size nmsgs,
+ SharedInvalidationMessage *msgs)
+{
+ ReorderBufferChange *change;
+
+ change = ReorderBufferAllocChange(rb);
+ change->action = REORDER_BUFFER_CHANGE_INVALIDATION;
+ change->data.inval.ninvalidations = nmsgs;
+ change->data.inval.invalidations = (SharedInvalidationMessage *)
+ palloc(sizeof(SharedInvalidationMessage) * nmsgs);
+ memcpy(change->data.inval.invalidations, msgs,
+ sizeof(SharedInvalidationMessage) * nmsgs);
+
+ ReorderBufferQueueChange(rb, xid, lsn, change, false);
+}
+
+/*
+ * A helper function for ReorderBufferAddInvalidations() and
+ * ReorderBufferAddDistributedInvalidations() to accumulate the invalidation
+ * messages to the **invals_out.
+ */
+static void
+ReorderBufferAccumulateInvalidations(SharedInvalidationMessage **invals_out,
+ uint32 *ninvals_out,
+ SharedInvalidationMessage *msgs_new,
+ Size nmsgs_new)
+{
+ if (*ninvals_out == 0)
+ {
+ *ninvals_out = nmsgs_new;
+ *invals_out = (SharedInvalidationMessage *)
+ palloc(sizeof(SharedInvalidationMessage) * nmsgs_new);
+ memcpy(*invals_out, msgs_new, sizeof(SharedInvalidationMessage) * nmsgs_new);
+ }
+ else
+ {
+ /* Enlarge the array of inval messages */
+ *invals_out = (SharedInvalidationMessage *)
+ repalloc(*invals_out, sizeof(SharedInvalidationMessage) *
+ (*ninvals_out + nmsgs_new));
+ memcpy(*invals_out + *ninvals_out, msgs_new,
+ nmsgs_new * sizeof(SharedInvalidationMessage));
+ *ninvals_out += nmsgs_new;
+ }
+}
+
+/*
* Accumulate the invalidations for executing them later.
*
* This needs to be called for each XLOG_XACT_INVALIDATIONS message and
@@ -3441,7 +3531,6 @@ ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid,
{
ReorderBufferTXN *txn;
MemoryContext oldcontext;
- ReorderBufferChange *change;
txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
@@ -3456,35 +3545,76 @@ ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid,
Assert(nmsgs > 0);
- /* Accumulate invalidations. */
- if (txn->ninvalidations == 0)
- {
- txn->ninvalidations = nmsgs;
- txn->invalidations = (SharedInvalidationMessage *)
- palloc(sizeof(SharedInvalidationMessage) * nmsgs);
- memcpy(txn->invalidations, msgs,
- sizeof(SharedInvalidationMessage) * nmsgs);
- }
- else
+ ReorderBufferAccumulateInvalidations(&txn->invalidations,
+ &txn->ninvalidations,
+ msgs, nmsgs);
+
+ ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Accumulate the invalidations distributed by other committed transactions
+ * for executing them later.
+ *
+ * This function is similar to ReorderBufferAddInvalidations() but stores
+ * the given inval messages to the txn->invalidations_distributed with the
+ * overflow check.
+ *
+ * This needs to be called by committed transactions to distribute their
+ * inval messages to in-progress transactions.
+ */
+void
+ReorderBufferAddDistributedInvalidations(ReorderBuffer *rb, TransactionId xid,
+ XLogRecPtr lsn, Size nmsgs,
+ SharedInvalidationMessage *msgs)
+{
+ ReorderBufferTXN *txn;
+ MemoryContext oldcontext;
+
+ txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true);
+
+ oldcontext = MemoryContextSwitchTo(rb->context);
+
+ /*
+ * Collect all the invalidations under the top transaction, if available,
+ * so that we can execute them all together. See comments
+ * ReorderBufferAddInvalidations.
+ */
+ txn = rbtxn_get_toptxn(txn);
+
+ Assert(nmsgs > 0);
+
+ if (!rbtxn_distr_inval_overflowed(txn))
{
- txn->invalidations = (SharedInvalidationMessage *)
- repalloc(txn->invalidations, sizeof(SharedInvalidationMessage) *
- (txn->ninvalidations + nmsgs));
+ /*
+ * Check the transaction has enough space for storing distributed
+ * invalidation messages.
+ */
+ if (txn->ninvalidations_distributed + nmsgs >= MAX_DISTR_INVAL_MSG_PER_TXN)
+ {
+ /*
+ * Mark the invalidation message as overflowed and free up the
+ * messages accumulated so far.
+ */
+ txn->txn_flags |= RBTXN_DISTR_INVAL_OVERFLOWED;
- memcpy(txn->invalidations + txn->ninvalidations, msgs,
- nmsgs * sizeof(SharedInvalidationMessage));
- txn->ninvalidations += nmsgs;
+ if (txn->invalidations_distributed)
+ {
+ pfree(txn->invalidations_distributed);
+ txn->invalidations_distributed = NULL;
+ txn->ninvalidations_distributed = 0;
+ }
+ }
+ else
+ ReorderBufferAccumulateInvalidations(&txn->invalidations_distributed,
+ &txn->ninvalidations_distributed,
+ msgs, nmsgs);
}
- change = ReorderBufferAllocChange(rb);
- change->action = REORDER_BUFFER_CHANGE_INVALIDATION;
- change->data.inval.ninvalidations = nmsgs;
- change->data.inval.invalidations = (SharedInvalidationMessage *)
- palloc(sizeof(SharedInvalidationMessage) * nmsgs);
- memcpy(change->data.inval.invalidations, msgs,
- sizeof(SharedInvalidationMessage) * nmsgs);
-
- ReorderBufferQueueChange(rb, xid, lsn, change, false);
+ /* Queue the invalidation messages into the transaction */
+ ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs);
MemoryContextSwitchTo(oldcontext);
}
diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c
index 656e66e0ae0..3ec3abfa3da 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -211,9 +211,9 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid,
* impact the users, so we used DEBUG1 level to log the message.
*/
ereport(slot->data.persistency == RS_TEMPORARY ? LOG : DEBUG1,
- errmsg("could not synchronize replication slot \"%s\" because remote slot precedes local slot",
+ errmsg("could not synchronize replication slot \"%s\"",
remote_slot->name),
- errdetail("The remote slot has LSN %X/%X and catalog xmin %u, but the local slot has LSN %X/%X and catalog xmin %u.",
+ errdetail("Synchronization could lead to data loss, because the remote slot needs WAL at LSN %X/%X and catalog xmin %u, but the standby has LSN %X/%X and catalog xmin %u.",
LSN_FORMAT_ARGS(remote_slot->restart_lsn),
remote_slot->catalog_xmin,
LSN_FORMAT_ARGS(slot->data.restart_lsn),
@@ -593,7 +593,7 @@ update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
{
ereport(LOG,
errmsg("could not synchronize replication slot \"%s\"", remote_slot->name),
- errdetail("Logical decoding could not find consistent point from local slot's LSN %X/%X.",
+ errdetail("Synchronization could lead to data loss, because the standby could not build a consistent snapshot to decode WALs at LSN %X/%X.",
LSN_FORMAT_ARGS(slot->data.restart_lsn)));
return false;
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 0d7bddbe4ed..adf18c397db 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -794,6 +794,13 @@ SnapBuildDistributeSnapshotAndInval(SnapBuild *builder, XLogRecPtr lsn, Transact
* contents built by the current transaction even after its decoding,
* which should have been invalidated due to concurrent catalog
* changing transaction.
+ *
+ * Distribute only the invalidation messages generated by the current
+ * committed transaction. Invalidation messages received from other
+ * transactions would have already been propagated to the relevant
+ * in-progress transactions. This transaction would have processed
+ * those invalidations, ensuring that subsequent transactions observe
+ * a consistent cache state.
*/
if (txn->xid != xid)
{
@@ -807,8 +814,9 @@ SnapBuildDistributeSnapshotAndInval(SnapBuild *builder, XLogRecPtr lsn, Transact
{
Assert(msgs != NULL);
- ReorderBufferAddInvalidations(builder->reorder, txn->xid, lsn,
- ninvalidations, msgs);
+ ReorderBufferAddDistributedInvalidations(builder->reorder,
+ txn->xid, lsn,
+ ninvalidations, msgs);
}
}
}
diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c
index 8e1e8762f62..c90f23ee5b0 100644
--- a/src/backend/replication/logical/tablesync.c
+++ b/src/backend/replication/logical/tablesync.c
@@ -603,14 +603,19 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn)
TimestampDifferenceExceeds(hentry->last_start_time, now,
wal_retrieve_retry_interval))
{
- logicalrep_worker_launch(WORKERTYPE_TABLESYNC,
- MyLogicalRepWorker->dbid,
- MySubscription->oid,
- MySubscription->name,
- MyLogicalRepWorker->userid,
- rstate->relid,
- DSM_HANDLE_INVALID);
+ /*
+ * Set the last_start_time even if we fail to start
+ * the worker, so that we won't retry until
+ * wal_retrieve_retry_interval has elapsed.
+ */
hentry->last_start_time = now;
+ (void) logicalrep_worker_launch(WORKERTYPE_TABLESYNC,
+ MyLogicalRepWorker->dbid,
+ MySubscription->oid,
+ MySubscription->name,
+ MyLogicalRepWorker->userid,
+ rstate->relid,
+ DSM_HANDLE_INVALID);
}
}
}
diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c
index 4151a4b2a96..fd11805a44c 100644
--- a/src/backend/replication/logical/worker.c
+++ b/src/backend/replication/logical/worker.c
@@ -109,13 +109,6 @@
* If ever a user needs to be aware of the tri-state value, they can fetch it
* from the pg_subscription catalog (see column subtwophasestate).
*
- * We don't allow to toggle two_phase option of a subscription because it can
- * lead to an inconsistent replica. Consider, initially, it was on and we have
- * received some prepare then we turn it off, now at commit time the server
- * will send the entire transaction data along with the commit. With some more
- * analysis, we can allow changing this option from off to on but not sure if
- * that alone would be useful.
- *
* Finally, to avoid problems mentioned in previous paragraphs from any
* subsequent (not READY) tablesyncs (need to toggle two_phase option from 'on'
* to 'off' and then again back to 'on') there is a restriction for
@@ -4626,8 +4619,16 @@ run_apply_worker()
walrcv_startstreaming(LogRepWorkerWalRcvConn, &options);
StartTransactionCommand();
+
+ /*
+ * Updating pg_subscription might involve TOAST table access, so
+ * ensure we have a valid snapshot.
+ */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
UpdateTwoPhaseState(MySubscription->oid, LOGICALREP_TWOPHASE_STATE_ENABLED);
MySubscription->twophasestate = LOGICALREP_TWOPHASE_STATE_ENABLED;
+ PopActiveSnapshot();
CommitTransactionCommand();
}
else
@@ -4843,7 +4844,15 @@ DisableSubscriptionAndExit(void)
/* Disable the subscription */
StartTransactionCommand();
+
+ /*
+ * Updating pg_subscription might involve TOAST table access, so ensure we
+ * have a valid snapshot.
+ */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
DisableSubscription(MySubscription->oid);
+ PopActiveSnapshot();
CommitTransactionCommand();
/* Ensure we remove no-longer-useful entry for worker's start time */
@@ -4948,6 +4957,12 @@ clear_subscription_skip_lsn(XLogRecPtr finish_lsn)
}
/*
+ * Updating pg_subscription might involve TOAST table access, so ensure we
+ * have a valid snapshot.
+ */
+ PushActiveSnapshot(GetTransactionSnapshot());
+
+ /*
* Protect subskiplsn of pg_subscription from being concurrently updated
* while clearing it.
*/
@@ -5005,6 +5020,8 @@ clear_subscription_skip_lsn(XLogRecPtr finish_lsn)
heap_freetuple(tup);
table_close(rel, NoLock);
+ PopActiveSnapshot();
+
if (started_tx)
CommitTransactionCommand();
}
diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c
index 693a766e6d7..082b4d9d327 100644
--- a/src/backend/replication/pgoutput/pgoutput.c
+++ b/src/backend/replication/pgoutput/pgoutput.c
@@ -1789,7 +1789,7 @@ LoadPublications(List *pubnames)
else
ereport(WARNING,
errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
- errmsg("skipped loading publication: %s", pubname),
+ errmsg("skipped loading publication \"%s\"", pubname),
errdetail("The publication does not exist at this point in the WAL."),
errhint("Create the publication if it does not exist."));
}
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index 600b87fa9cb..f9fec50ae88 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -424,6 +424,7 @@ ReplicationSlotCreate(const char *name, bool db_specific,
slot->candidate_restart_valid = InvalidXLogRecPtr;
slot->candidate_restart_lsn = InvalidXLogRecPtr;
slot->last_saved_confirmed_flush = InvalidXLogRecPtr;
+ slot->last_saved_restart_lsn = InvalidXLogRecPtr;
slot->inactive_since = 0;
/*
@@ -1165,20 +1166,41 @@ ReplicationSlotsComputeRequiredLSN(void)
{
ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
XLogRecPtr restart_lsn;
+ XLogRecPtr last_saved_restart_lsn;
bool invalidated;
+ ReplicationSlotPersistency persistency;
if (!s->in_use)
continue;
SpinLockAcquire(&s->mutex);
+ persistency = s->data.persistency;
restart_lsn = s->data.restart_lsn;
invalidated = s->data.invalidated != RS_INVAL_NONE;
+ last_saved_restart_lsn = s->last_saved_restart_lsn;
SpinLockRelease(&s->mutex);
/* invalidated slots need not apply */
if (invalidated)
continue;
+ /*
+ * For persistent slot use last_saved_restart_lsn to compute the
+ * oldest LSN for removal of WAL segments. The segments between
+ * last_saved_restart_lsn and restart_lsn might be needed by a
+ * persistent slot in the case of database crash. Non-persistent
+ * slots can't survive the database crash, so we don't care about
+ * last_saved_restart_lsn for them.
+ */
+ if (persistency == RS_PERSISTENT)
+ {
+ if (last_saved_restart_lsn != InvalidXLogRecPtr &&
+ restart_lsn > last_saved_restart_lsn)
+ {
+ restart_lsn = last_saved_restart_lsn;
+ }
+ }
+
if (restart_lsn != InvalidXLogRecPtr &&
(min_required == InvalidXLogRecPtr ||
restart_lsn < min_required))
@@ -1216,7 +1238,9 @@ ReplicationSlotsComputeLogicalRestartLSN(void)
{
ReplicationSlot *s;
XLogRecPtr restart_lsn;
+ XLogRecPtr last_saved_restart_lsn;
bool invalidated;
+ ReplicationSlotPersistency persistency;
s = &ReplicationSlotCtl->replication_slots[i];
@@ -1230,14 +1254,33 @@ ReplicationSlotsComputeLogicalRestartLSN(void)
/* read once, it's ok if it increases while we're checking */
SpinLockAcquire(&s->mutex);
+ persistency = s->data.persistency;
restart_lsn = s->data.restart_lsn;
invalidated = s->data.invalidated != RS_INVAL_NONE;
+ last_saved_restart_lsn = s->last_saved_restart_lsn;
SpinLockRelease(&s->mutex);
/* invalidated slots need not apply */
if (invalidated)
continue;
+ /*
+ * For persistent slot use last_saved_restart_lsn to compute the
+ * oldest LSN for removal of WAL segments. The segments between
+ * last_saved_restart_lsn and restart_lsn might be needed by a
+ * persistent slot in the case of database crash. Non-persistent
+ * slots can't survive the database crash, so we don't care about
+ * last_saved_restart_lsn for them.
+ */
+ if (persistency == RS_PERSISTENT)
+ {
+ if (last_saved_restart_lsn != InvalidXLogRecPtr &&
+ restart_lsn > last_saved_restart_lsn)
+ {
+ restart_lsn = last_saved_restart_lsn;
+ }
+ }
+
if (restart_lsn == InvalidXLogRecPtr)
continue;
@@ -1455,6 +1498,7 @@ ReplicationSlotReserveWal(void)
Assert(slot != NULL);
Assert(slot->data.restart_lsn == InvalidXLogRecPtr);
+ Assert(slot->last_saved_restart_lsn == InvalidXLogRecPtr);
/*
* The replication slot mechanism is used to prevent removal of required
@@ -1835,7 +1879,10 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes,
* just rely on .invalidated.
*/
if (invalidation_cause == RS_INVAL_WAL_REMOVED)
+ {
s->data.restart_lsn = InvalidXLogRecPtr;
+ s->last_saved_restart_lsn = InvalidXLogRecPtr;
+ }
/* Let caller know */
*invalidated = true;
@@ -2032,6 +2079,7 @@ void
CheckPointReplicationSlots(bool is_shutdown)
{
int i;
+ bool last_saved_restart_lsn_updated = false;
elog(DEBUG1, "performing replication slot checkpoint");
@@ -2076,9 +2124,23 @@ CheckPointReplicationSlots(bool is_shutdown)
SpinLockRelease(&s->mutex);
}
+ /*
+ * Track if we're going to update slot's last_saved_restart_lsn. We
+ * need this to know if we need to recompute the required LSN.
+ */
+ if (s->last_saved_restart_lsn != s->data.restart_lsn)
+ last_saved_restart_lsn_updated = true;
+
SaveSlotToPath(s, path, LOG);
}
LWLockRelease(ReplicationSlotAllocationLock);
+
+ /*
+ * Recompute the required LSN if SaveSlotToPath() updated
+ * last_saved_restart_lsn for any slot.
+ */
+ if (last_saved_restart_lsn_updated)
+ ReplicationSlotsComputeRequiredLSN();
}
/*
@@ -2354,6 +2416,7 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel)
if (!slot->just_dirtied)
slot->dirty = false;
slot->last_saved_confirmed_flush = cp.slotdata.confirmed_flush;
+ slot->last_saved_restart_lsn = cp.slotdata.restart_lsn;
SpinLockRelease(&slot->mutex);
LWLockRelease(&slot->io_in_progress_lock);
@@ -2569,6 +2632,7 @@ RestoreSlotFromDisk(const char *name)
slot->effective_xmin = cp.slotdata.xmin;
slot->effective_catalog_xmin = cp.slotdata.catalog_xmin;
slot->last_saved_confirmed_flush = cp.slotdata.confirmed_flush;
+ slot->last_saved_restart_lsn = cp.slotdata.restart_lsn;
slot->candidate_catalog_xmin = InvalidTransactionId;
slot->candidate_xmin_lsn = InvalidXLogRecPtr;
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index 9fa8beb6103..f2c33250e8b 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -3449,8 +3449,16 @@ XLogSendLogical(void)
if (flushPtr == InvalidXLogRecPtr ||
logical_decoding_ctx->reader->EndRecPtr >= flushPtr)
{
+ /*
+ * For cascading logical WAL senders, we use the replay LSN instead of
+ * the flush LSN, since logical decoding on a standby only processes
+ * WAL that has been replayed. This distinction becomes particularly
+ * important during shutdown, as new WAL is no longer replayed and the
+ * last replayed LSN marks the furthest point up to which decoding can
+ * proceed.
+ */
if (am_cascading_walsender)
- flushPtr = GetStandbyFlushRecPtr(NULL);
+ flushPtr = GetXLogReplayRecPtr(NULL);
else
flushPtr = GetFlushRecPtr(NULL);
}
diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c
index f0bce5f9ed9..adc9e7600e1 100644
--- a/src/backend/rewrite/rewriteHandler.c
+++ b/src/backend/rewrite/rewriteHandler.c
@@ -923,8 +923,9 @@ rewriteTargetListIU(List *targetList,
apply_default = true;
/*
- * Can only insert DEFAULT into generated columns, regardless of
- * any OVERRIDING clauses.
+ * Can only insert DEFAULT into generated columns. (The
+ * OVERRIDING clause does not apply to generated columns, so we
+ * don't consider it here.)
*/
if (att_tup->attgenerated && !apply_default)
{
@@ -4544,7 +4545,7 @@ build_generation_expression(Relation rel, int attrno)
List *
QueryRewrite(Query *parsetree)
{
- uint64 input_query_id = parsetree->queryId;
+ int64 input_query_id = parsetree->queryId;
List *querylist;
List *results;
ListCell *l;
diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c
index ebb5a771bfd..3643f27ad6e 100644
--- a/src/backend/storage/aio/aio.c
+++ b/src/backend/storage/aio/aio.c
@@ -184,6 +184,8 @@ pgaio_io_acquire(struct ResourceOwnerData *resowner, PgAioReturn *ret)
PgAioHandle *
pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
{
+ PgAioHandle *ioh = NULL;
+
if (pgaio_my_backend->num_staged_ios >= PGAIO_SUBMIT_BATCH_SIZE)
{
Assert(pgaio_my_backend->num_staged_ios == PGAIO_SUBMIT_BATCH_SIZE);
@@ -193,10 +195,17 @@ pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
if (pgaio_my_backend->handed_out_io)
elog(ERROR, "API violation: Only one IO can be handed out");
+ /*
+ * Probably not needed today, as interrupts should not process this IO,
+ * but...
+ */
+ HOLD_INTERRUPTS();
+
if (!dclist_is_empty(&pgaio_my_backend->idle_ios))
{
dlist_node *ion = dclist_pop_head_node(&pgaio_my_backend->idle_ios);
- PgAioHandle *ioh = dclist_container(PgAioHandle, node, ion);
+
+ ioh = dclist_container(PgAioHandle, node, ion);
Assert(ioh->state == PGAIO_HS_IDLE);
Assert(ioh->owner_procno == MyProcNumber);
@@ -212,11 +221,11 @@ pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
ioh->report_return = ret;
ret->result.status = PGAIO_RS_UNKNOWN;
}
-
- return ioh;
}
- return NULL;
+ RESUME_INTERRUPTS();
+
+ return ioh;
}
/*
@@ -233,6 +242,12 @@ pgaio_io_release(PgAioHandle *ioh)
Assert(ioh->resowner);
pgaio_my_backend->handed_out_io = NULL;
+
+ /*
+ * Note that no interrupts are processed between the handed_out_io
+ * check and the call to reclaim - that's important as otherwise an
+ * interrupt could have already reclaimed the handle.
+ */
pgaio_io_reclaim(ioh);
}
else
@@ -251,6 +266,12 @@ pgaio_io_release_resowner(dlist_node *ioh_node, bool on_error)
Assert(ioh->resowner);
+ /*
+ * Otherwise an interrupt, in the middle of releasing the IO, could end up
+ * trying to wait for the IO, leading to state confusion.
+ */
+ HOLD_INTERRUPTS();
+
ResourceOwnerForgetAioHandle(ioh->resowner, &ioh->resowner_node);
ioh->resowner = NULL;
@@ -291,6 +312,8 @@ pgaio_io_release_resowner(dlist_node *ioh_node, bool on_error)
*/
if (ioh->report_return)
ioh->report_return = NULL;
+
+ RESUME_INTERRUPTS();
}
/*
@@ -359,6 +382,13 @@ pgaio_io_get_wref(PgAioHandle *ioh, PgAioWaitRef *iow)
static inline void
pgaio_io_update_state(PgAioHandle *ioh, PgAioHandleState new_state)
{
+ /*
+ * All callers need to have held interrupts in some form, otherwise
+ * interrupt processing could wait for the IO to complete, while in an
+ * intermediary state.
+ */
+ Assert(!INTERRUPTS_CAN_BE_PROCESSED());
+
pgaio_debug_io(DEBUG5, ioh,
"updating state to %s",
pgaio_io_state_get_name(new_state));
@@ -396,6 +426,13 @@ pgaio_io_stage(PgAioHandle *ioh, PgAioOp op)
Assert(pgaio_my_backend->handed_out_io == ioh);
Assert(pgaio_io_has_target(ioh));
+ /*
+ * Otherwise an interrupt, in the middle of staging and possibly executing
+ * the IO, could end up trying to wait for the IO, leading to state
+ * confusion.
+ */
+ HOLD_INTERRUPTS();
+
ioh->op = op;
ioh->result = 0;
@@ -435,6 +472,8 @@ pgaio_io_stage(PgAioHandle *ioh, PgAioOp op)
pgaio_io_prepare_submit(ioh);
pgaio_io_perform_synchronously(ioh);
}
+
+ RESUME_INTERRUPTS();
}
bool
@@ -517,6 +556,13 @@ bool
pgaio_io_was_recycled(PgAioHandle *ioh, uint64 ref_generation, PgAioHandleState *state)
{
*state = ioh->state;
+
+ /*
+ * Ensure that we don't see an earlier state of the handle than ioh->state
+ * due to compiler or CPU reordering. This protects both ->generation as
+ * directly used here, and other fields in the handle accessed in the
+ * caller if the handle was not reused.
+ */
pg_read_barrier();
return ioh->generation != ref_generation;
@@ -544,8 +590,8 @@ pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
&& state != PGAIO_HS_COMPLETED_SHARED
&& state != PGAIO_HS_COMPLETED_LOCAL)
{
- elog(PANIC, "waiting for own IO in wrong state: %d",
- state);
+ elog(PANIC, "waiting for own IO %d in wrong state: %s",
+ pgaio_io_get_id(ioh), pgaio_io_get_state_name(ioh));
}
}
@@ -599,7 +645,13 @@ pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
case PGAIO_HS_COMPLETED_SHARED:
case PGAIO_HS_COMPLETED_LOCAL:
- /* see above */
+
+ /*
+ * Note that no interrupts are processed between
+ * pgaio_io_was_recycled() and this check - that's important
+ * as otherwise an interrupt could have already reclaimed the
+ * handle.
+ */
if (am_owner)
pgaio_io_reclaim(ioh);
return;
@@ -610,6 +662,11 @@ pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
/*
* Make IO handle ready to be reused after IO has completed or after the
* handle has been released without being used.
+ *
+ * Note that callers need to be careful about only calling this in the right
+ * state and that no interrupts can be processed between the state check and
+ * the call to pgaio_io_reclaim(). Otherwise interrupt processing could
+ * already have reclaimed the handle.
*/
static void
pgaio_io_reclaim(PgAioHandle *ioh)
@@ -618,6 +675,9 @@ pgaio_io_reclaim(PgAioHandle *ioh)
Assert(ioh->owner_procno == MyProcNumber);
Assert(ioh->state != PGAIO_HS_IDLE);
+ /* see comment in function header */
+ HOLD_INTERRUPTS();
+
/*
* It's a bit ugly, but right now the easiest place to put the execution
* of local completion callbacks is this function, as we need to execute
@@ -685,6 +745,8 @@ pgaio_io_reclaim(PgAioHandle *ioh)
* efficient in cases where only a few IOs are used.
*/
dclist_push_head(&pgaio_my_backend->idle_ios, &ioh->node);
+
+ RESUME_INTERRUPTS();
}
/*
@@ -697,10 +759,10 @@ pgaio_io_wait_for_free(void)
{
int reclaimed = 0;
- pgaio_debug(DEBUG2, "waiting for free IO with %d pending, %d in-flight, %d idle IOs",
+ pgaio_debug(DEBUG2, "waiting for free IO with %d pending, %u in-flight, %u idle IOs",
pgaio_my_backend->num_staged_ios,
dclist_count(&pgaio_my_backend->in_flight_ios),
- dclist_is_empty(&pgaio_my_backend->idle_ios));
+ dclist_count(&pgaio_my_backend->idle_ios));
/*
* First check if any of our IOs actually have completed - when using
@@ -714,6 +776,16 @@ pgaio_io_wait_for_free(void)
if (ioh->state == PGAIO_HS_COMPLETED_SHARED)
{
+ /*
+ * Note that no interrupts are processed between the state check
+ * and the call to reclaim - that's important as otherwise an
+ * interrupt could have already reclaimed the handle.
+ *
+ * Need to ensure that there's no reordering, in the more common
+ * paths, where we wait for IO, that's done by
+ * pgaio_io_was_recycled().
+ */
+ pg_read_barrier();
pgaio_io_reclaim(ioh);
reclaimed++;
}
@@ -730,13 +802,17 @@ pgaio_io_wait_for_free(void)
if (pgaio_my_backend->num_staged_ios > 0)
pgaio_submit_staged();
+ /* possibly some IOs finished during submission */
+ if (!dclist_is_empty(&pgaio_my_backend->idle_ios))
+ return;
+
if (dclist_count(&pgaio_my_backend->in_flight_ios) == 0)
ereport(ERROR,
errmsg_internal("no free IOs despite no in-flight IOs"),
- errdetail_internal("%d pending, %d in-flight, %d idle IOs",
+ errdetail_internal("%d pending, %u in-flight, %u idle IOs",
pgaio_my_backend->num_staged_ios,
dclist_count(&pgaio_my_backend->in_flight_ios),
- dclist_is_empty(&pgaio_my_backend->idle_ios)));
+ dclist_count(&pgaio_my_backend->idle_ios)));
/*
* Wait for the oldest in-flight IO to complete.
@@ -747,6 +823,7 @@ pgaio_io_wait_for_free(void)
{
PgAioHandle *ioh = dclist_head_element(PgAioHandle, node,
&pgaio_my_backend->in_flight_ios);
+ uint64 generation = ioh->generation;
switch (ioh->state)
{
@@ -763,20 +840,36 @@ pgaio_io_wait_for_free(void)
case PGAIO_HS_COMPLETED_IO:
case PGAIO_HS_SUBMITTED:
pgaio_debug_io(DEBUG2, ioh,
- "waiting for free io with %d in flight",
+ "waiting for free io with %u in flight",
dclist_count(&pgaio_my_backend->in_flight_ios));
/*
* In a more general case this would be racy, because the
* generation could increase after we read ioh->state above.
* But we are only looking at IOs by the current backend and
- * the IO can only be recycled by this backend.
+ * the IO can only be recycled by this backend. Even this is
+ * only OK because we get the handle's generation before
+ * potentially processing interrupts, e.g. as part of
+ * pgaio_debug_io().
*/
- pgaio_io_wait(ioh, ioh->generation);
+ pgaio_io_wait(ioh, generation);
break;
case PGAIO_HS_COMPLETED_SHARED:
- /* it's possible that another backend just finished this IO */
+
+ /*
+ * It's possible that another backend just finished this IO.
+ *
+ * Note that no interrupts are processed between the state
+ * check and the call to reclaim - that's important as
+ * otherwise an interrupt could have already reclaimed the
+ * handle.
+ *
+ * Need to ensure that there's no reordering, in the more
+ * common paths, where we wait for IO, that's done by
+ * pgaio_io_was_recycled().
+ */
+ pg_read_barrier();
pgaio_io_reclaim(ioh);
break;
}
@@ -926,6 +1019,11 @@ pgaio_wref_check_done(PgAioWaitRef *iow)
if (state == PGAIO_HS_COMPLETED_SHARED ||
state == PGAIO_HS_COMPLETED_LOCAL)
{
+ /*
+ * Note that no interrupts are processed between
+ * pgaio_io_was_recycled() and this check - that's important as
+ * otherwise an interrupt could have already reclaimed the handle.
+ */
if (am_owner)
pgaio_io_reclaim(ioh);
return true;
@@ -1153,11 +1251,14 @@ pgaio_closing_fd(int fd)
{
dlist_iter iter;
PgAioHandle *ioh = NULL;
+ uint64 generation;
dclist_foreach(iter, &pgaio_my_backend->in_flight_ios)
{
ioh = dclist_container(PgAioHandle, node, iter.cur);
+ generation = ioh->generation;
+
if (pgaio_io_uses_fd(ioh, fd))
break;
else
@@ -1168,11 +1269,11 @@ pgaio_closing_fd(int fd)
break;
pgaio_debug_io(DEBUG2, ioh,
- "waiting for IO before FD %d gets closed, %d in-flight IOs",
+ "waiting for IO before FD %d gets closed, %u in-flight IOs",
fd, dclist_count(&pgaio_my_backend->in_flight_ios));
/* see comment in pgaio_io_wait_for_free() about raciness */
- pgaio_io_wait(ioh, ioh->generation);
+ pgaio_io_wait(ioh, generation);
}
}
}
@@ -1201,13 +1302,14 @@ pgaio_shutdown(int code, Datum arg)
while (!dclist_is_empty(&pgaio_my_backend->in_flight_ios))
{
PgAioHandle *ioh = dclist_head_element(PgAioHandle, node, &pgaio_my_backend->in_flight_ios);
+ uint64 generation = ioh->generation;
pgaio_debug_io(DEBUG2, ioh,
- "waiting for IO to complete during shutdown, %d in-flight IOs",
+ "waiting for IO to complete during shutdown, %u in-flight IOs",
dclist_count(&pgaio_my_backend->in_flight_ios));
/* see comment in pgaio_io_wait_for_free() about raciness */
- pgaio_io_wait(ioh, ioh->generation);
+ pgaio_io_wait(ioh, generation);
}
pgaio_my_backend = NULL;
diff --git a/src/backend/storage/aio/aio_callback.c b/src/backend/storage/aio/aio_callback.c
index 0ad9795bb7e..03c9bba0802 100644
--- a/src/backend/storage/aio/aio_callback.c
+++ b/src/backend/storage/aio/aio_callback.c
@@ -256,6 +256,9 @@ pgaio_io_call_complete_shared(PgAioHandle *ioh)
pgaio_result_status_string(result.status),
result.id, result.error_data, result.result);
result = ce->cb->complete_shared(ioh, result, cb_data);
+
+ /* the callback should never transition to unknown */
+ Assert(result.status != PGAIO_RS_UNKNOWN);
}
ioh->distilled_result = result;
@@ -290,6 +293,7 @@ pgaio_io_call_complete_local(PgAioHandle *ioh)
/* start with distilled result from shared callback */
result = ioh->distilled_result;
+ Assert(result.status != PGAIO_RS_UNKNOWN);
for (int i = ioh->num_callbacks; i > 0; i--)
{
@@ -306,6 +310,9 @@ pgaio_io_call_complete_local(PgAioHandle *ioh)
pgaio_result_status_string(result.status),
result.id, result.error_data, result.result);
result = ce->cb->complete_local(ioh, result, cb_data);
+
+ /* the callback should never transition to unknown */
+ Assert(result.status != PGAIO_RS_UNKNOWN);
}
/*
diff --git a/src/backend/storage/aio/aio_io.c b/src/backend/storage/aio/aio_io.c
index 00e176135a6..520b5077df2 100644
--- a/src/backend/storage/aio/aio_io.c
+++ b/src/backend/storage/aio/aio_io.c
@@ -181,9 +181,9 @@ pgaio_io_get_op_name(PgAioHandle *ioh)
case PGAIO_OP_INVALID:
return "invalid";
case PGAIO_OP_READV:
- return "read";
+ return "readv";
case PGAIO_OP_WRITEV:
- return "write";
+ return "writev";
}
return NULL; /* silence compiler */
diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c
index c719ba2727a..b78048328e1 100644
--- a/src/backend/storage/aio/method_io_uring.c
+++ b/src/backend/storage/aio/method_io_uring.c
@@ -126,7 +126,7 @@ pgaio_uring_shmem_size(void)
static void
pgaio_uring_shmem_init(bool first_time)
{
- int TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS - MAX_IO_WORKERS;
+ int TotalProcs = pgaio_uring_procs();
bool found;
pgaio_uring_contexts = (PgAioUringContext *)
@@ -400,9 +400,9 @@ pgaio_uring_wait_one(PgAioHandle *ioh, uint64 ref_generation)
while (true)
{
pgaio_debug_io(DEBUG3, ioh,
- "wait_one io_gen: %llu, ref_gen: %llu, cycle %d",
- (long long unsigned) ioh->generation,
- (long long unsigned) ref_generation,
+ "wait_one io_gen: %" PRIu64 ", ref_gen: %" PRIu64 ", cycle %d",
+ ioh->generation,
+ ref_generation,
waited);
if (pgaio_io_was_recycled(ioh, ref_generation, &state) ||
diff --git a/src/backend/storage/aio/method_worker.c b/src/backend/storage/aio/method_worker.c
index 743cccc2acd..36be179678d 100644
--- a/src/backend/storage/aio/method_worker.c
+++ b/src/backend/storage/aio/method_worker.c
@@ -461,7 +461,12 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len)
int nwakeups = 0;
int worker;
- /* Try to get a job to do. */
+ /*
+ * Try to get a job to do.
+ *
+ * The lwlock acquisition also provides the necessary memory barrier
+ * to ensure that we don't see an outdated data in the handle.
+ */
LWLockAcquire(AioWorkerSubmissionQueueLock, LW_EXCLUSIVE);
if ((io_index = pgaio_worker_submission_queue_consume()) == UINT32_MAX)
{
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index f93131a645e..bd68d7e0ca9 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -4550,11 +4550,9 @@ DropRelationBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum,
if (RelFileLocatorBackendIsTemp(rlocator))
{
if (rlocator.backend == MyProcNumber)
- {
- for (j = 0; j < nforks; j++)
- DropRelationLocalBuffers(rlocator.locator, forkNum[j],
- firstDelBlock[j]);
- }
+ DropRelationLocalBuffers(rlocator.locator, forkNum, nforks,
+ firstDelBlock);
+
return;
}
@@ -7320,7 +7318,7 @@ buffer_readv_report(PgAioResult result, const PgAioTargetData *td,
affected_count > 1 ?
errdetail("Block %u held first zeroed page.",
first + first_off) : 0,
- errhint("See server log for details about the other %u invalid block(s).",
+ errhint("See server log for details about the other %d invalid block(s).",
affected_count + checkfail_count - 1));
return;
}
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index 63101d56a07..3da9c41ee1d 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -629,7 +629,7 @@ InvalidateLocalBuffer(BufferDesc *bufHdr, bool check_unreferenced)
*/
if (check_unreferenced &&
(LocalRefCount[bufid] != 0 || BUF_STATE_GET_REFCOUNT(buf_state) != 0))
- elog(ERROR, "block %u of %s is still referenced (local %u)",
+ elog(ERROR, "block %u of %s is still referenced (local %d)",
bufHdr->tag.blockNum,
relpathbackend(BufTagGetRelFileLocator(&bufHdr->tag),
MyProcNumber,
@@ -660,10 +660,11 @@ InvalidateLocalBuffer(BufferDesc *bufHdr, bool check_unreferenced)
* See DropRelationBuffers in bufmgr.c for more notes.
*/
void
-DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum,
- BlockNumber firstDelBlock)
+DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber *forkNum,
+ int nforks, BlockNumber *firstDelBlock)
{
int i;
+ int j;
for (i = 0; i < NLocBuffer; i++)
{
@@ -672,12 +673,18 @@ DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum,
buf_state = pg_atomic_read_u32(&bufHdr->state);
- if ((buf_state & BM_TAG_VALID) &&
- BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator) &&
- BufTagGetForkNum(&bufHdr->tag) == forkNum &&
- bufHdr->tag.blockNum >= firstDelBlock)
+ if (!(buf_state & BM_TAG_VALID) ||
+ !BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator))
+ continue;
+
+ for (j = 0; j < nforks; j++)
{
- InvalidateLocalBuffer(bufHdr, true);
+ if (BufTagGetForkNum(&bufHdr->tag) == forkNum[j] &&
+ bufHdr->tag.blockNum >= firstDelBlock[j])
+ {
+ InvalidateLocalBuffer(bufHdr, true);
+ break;
+ }
}
}
}
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 0e8299dd556..a4ec7959f31 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -400,25 +400,22 @@ pg_fsync(int fd)
* portable, even if it runs ok on the current system.
*
* We assert here that a descriptor for a file was opened with write
- * permissions (either O_RDWR or O_WRONLY) and for a directory without
- * write permissions (O_RDONLY).
+ * permissions (i.e., not O_RDONLY) and for a directory without write
+ * permissions (O_RDONLY). Notice that the assertion check is made even
+ * if fsync() is disabled.
*
- * Ignore any fstat errors and let the follow-up fsync() do its work.
- * Doing this sanity check here counts for the case where fsync() is
- * disabled.
+ * If fstat() fails, ignore it and let the follow-up fsync() complain.
*/
if (fstat(fd, &st) == 0)
{
int desc_flags = fcntl(fd, F_GETFL);
- /*
- * O_RDONLY is historically 0, so just make sure that for directories
- * no write flags are used.
- */
+ desc_flags &= O_ACCMODE;
+
if (S_ISDIR(st.st_mode))
- Assert((desc_flags & (O_RDWR | O_WRONLY)) == 0);
+ Assert(desc_flags == O_RDONLY);
else
- Assert((desc_flags & (O_RDWR | O_WRONLY)) != 0);
+ Assert(desc_flags != O_RDONLY);
}
errno = 0;
#endif
diff --git a/src/backend/storage/ipc/dsm_registry.c b/src/backend/storage/ipc/dsm_registry.c
index 1d4fd31ffed..828c2ff0c7f 100644
--- a/src/backend/storage/ipc/dsm_registry.c
+++ b/src/backend/storage/ipc/dsm_registry.c
@@ -15,6 +15,20 @@
* current backend. This function guarantees that only one backend
* initializes the segment and that all other backends just attach it.
*
+ * A DSA can be created in or retrieved from the registry by calling
+ * GetNamedDSA(). As with GetNamedDSMSegment(), if a DSA with the provided
+ * name does not yet exist, it is created. Otherwise, GetNamedDSA()
+ * ensures the DSA is attached to the current backend. This function
+ * guarantees that only one backend initializes the DSA and that all other
+ * backends just attach it.
+ *
+ * A dshash table can be created in or retrieved from the registry by
+ * calling GetNamedDSHash(). As with GetNamedDSMSegment(), if a hash
+ * table with the provided name does not yet exist, it is created.
+ * Otherwise, GetNamedDSHash() ensures the hash table is attached to the
+ * current backend. This function guarantees that only one backend
+ * initializes the table and that all other backends just attach it.
+ *
* Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
@@ -32,6 +46,12 @@
#include "storage/shmem.h"
#include "utils/memutils.h"
+#define DSMR_NAME_LEN 128
+
+#define DSMR_DSA_TRANCHE_SUFFIX " DSA"
+#define DSMR_DSA_TRANCHE_SUFFIX_LEN (sizeof(DSMR_DSA_TRANCHE_SUFFIX) - 1)
+#define DSMR_DSA_TRANCHE_NAME_LEN (DSMR_NAME_LEN + DSMR_DSA_TRANCHE_SUFFIX_LEN)
+
typedef struct DSMRegistryCtxStruct
{
dsa_handle dsah;
@@ -40,15 +60,48 @@ typedef struct DSMRegistryCtxStruct
static DSMRegistryCtxStruct *DSMRegistryCtx;
-typedef struct DSMRegistryEntry
+typedef struct NamedDSMState
{
- char name[64];
dsm_handle handle;
size_t size;
+} NamedDSMState;
+
+typedef struct NamedDSAState
+{
+ dsa_handle handle;
+ int tranche;
+ char tranche_name[DSMR_DSA_TRANCHE_NAME_LEN];
+} NamedDSAState;
+
+typedef struct NamedDSHState
+{
+ NamedDSAState dsa;
+ dshash_table_handle handle;
+ int tranche;
+ char tranche_name[DSMR_NAME_LEN];
+} NamedDSHState;
+
+typedef enum DSMREntryType
+{
+ DSMR_ENTRY_TYPE_DSM,
+ DSMR_ENTRY_TYPE_DSA,
+ DSMR_ENTRY_TYPE_DSH,
+} DSMREntryType;
+
+typedef struct DSMRegistryEntry
+{
+ char name[DSMR_NAME_LEN];
+ DSMREntryType type;
+ union
+ {
+ NamedDSMState dsm;
+ NamedDSAState dsa;
+ NamedDSHState dsh;
+ } data;
} DSMRegistryEntry;
static const dshash_parameters dsh_params = {
- offsetof(DSMRegistryEntry, handle),
+ offsetof(DSMRegistryEntry, type),
sizeof(DSMRegistryEntry),
dshash_strcmp,
dshash_strhash,
@@ -141,7 +194,7 @@ GetNamedDSMSegment(const char *name, size_t size,
ereport(ERROR,
(errmsg("DSM segment name cannot be empty")));
- if (strlen(name) >= offsetof(DSMRegistryEntry, handle))
+ if (strlen(name) >= offsetof(DSMRegistryEntry, type))
ereport(ERROR,
(errmsg("DSM segment name too long")));
@@ -158,32 +211,39 @@ GetNamedDSMSegment(const char *name, size_t size,
entry = dshash_find_or_insert(dsm_registry_table, name, found);
if (!(*found))
{
+ NamedDSMState *state = &entry->data.dsm;
+ dsm_segment *seg;
+
+ entry->type = DSMR_ENTRY_TYPE_DSM;
+
/* Initialize the segment. */
- dsm_segment *seg = dsm_create(size, 0);
+ seg = dsm_create(size, 0);
dsm_pin_segment(seg);
dsm_pin_mapping(seg);
- entry->handle = dsm_segment_handle(seg);
- entry->size = size;
+ state->handle = dsm_segment_handle(seg);
+ state->size = size;
ret = dsm_segment_address(seg);
if (init_callback)
(*init_callback) (ret);
}
- else if (entry->size != size)
- {
+ else if (entry->type != DSMR_ENTRY_TYPE_DSM)
ereport(ERROR,
- (errmsg("requested DSM segment size does not match size of "
- "existing segment")));
- }
+ (errmsg("requested DSM segment does not match type of existing entry")));
+ else if (entry->data.dsm.size != size)
+ ereport(ERROR,
+ (errmsg("requested DSM segment size does not match size of existing segment")));
else
{
- dsm_segment *seg = dsm_find_mapping(entry->handle);
+ NamedDSMState *state = &entry->data.dsm;
+ dsm_segment *seg;
/* If the existing segment is not already attached, attach it now. */
+ seg = dsm_find_mapping(state->handle);
if (seg == NULL)
{
- seg = dsm_attach(entry->handle);
+ seg = dsm_attach(state->handle);
if (seg == NULL)
elog(ERROR, "could not map dynamic shared memory segment");
@@ -198,3 +258,180 @@ GetNamedDSMSegment(const char *name, size_t size,
return ret;
}
+
+/*
+ * Initialize or attach a named DSA.
+ *
+ * This routine returns a pointer to the DSA. A new LWLock tranche ID will be
+ * generated if needed. Note that the lock tranche will be registered with the
+ * provided name. Also note that this should be called at most once for a
+ * given DSA in each backend.
+ */
+dsa_area *
+GetNamedDSA(const char *name, bool *found)
+{
+ DSMRegistryEntry *entry;
+ MemoryContext oldcontext;
+ dsa_area *ret;
+
+ Assert(found);
+
+ if (!name || *name == '\0')
+ ereport(ERROR,
+ (errmsg("DSA name cannot be empty")));
+
+ if (strlen(name) >= offsetof(DSMRegistryEntry, type))
+ ereport(ERROR,
+ (errmsg("DSA name too long")));
+
+ /* Be sure any local memory allocated by DSM/DSA routines is persistent. */
+ oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+
+ /* Connect to the registry. */
+ init_dsm_registry();
+
+ entry = dshash_find_or_insert(dsm_registry_table, name, found);
+ if (!(*found))
+ {
+ NamedDSAState *state = &entry->data.dsa;
+
+ entry->type = DSMR_ENTRY_TYPE_DSA;
+
+ /* Initialize the LWLock tranche for the DSA. */
+ state->tranche = LWLockNewTrancheId();
+ strcpy(state->tranche_name, name);
+ LWLockRegisterTranche(state->tranche, state->tranche_name);
+
+ /* Initialize the DSA. */
+ ret = dsa_create(state->tranche);
+ dsa_pin(ret);
+ dsa_pin_mapping(ret);
+
+ /* Store handle for other backends to use. */
+ state->handle = dsa_get_handle(ret);
+ }
+ else if (entry->type != DSMR_ENTRY_TYPE_DSA)
+ ereport(ERROR,
+ (errmsg("requested DSA does not match type of existing entry")));
+ else
+ {
+ NamedDSAState *state = &entry->data.dsa;
+
+ if (dsa_is_attached(state->handle))
+ ereport(ERROR,
+ (errmsg("requested DSA already attached to current process")));
+
+ /* Initialize existing LWLock tranche for the DSA. */
+ LWLockRegisterTranche(state->tranche, state->tranche_name);
+
+ /* Attach to existing DSA. */
+ ret = dsa_attach(state->handle);
+ dsa_pin_mapping(ret);
+ }
+
+ dshash_release_lock(dsm_registry_table, entry);
+ MemoryContextSwitchTo(oldcontext);
+
+ return ret;
+}
+
+/*
+ * Initialize or attach a named dshash table.
+ *
+ * This routine returns the address of the table. The tranche_id member of
+ * params is ignored; new tranche IDs will be generated if needed. Note that
+ * the DSA lock tranche will be registered with the provided name with " DSA"
+ * appended. The dshash lock tranche will be registered with the provided
+ * name. Also note that this should be called at most once for a given table
+ * in each backend.
+ */
+dshash_table *
+GetNamedDSHash(const char *name, const dshash_parameters *params, bool *found)
+{
+ DSMRegistryEntry *entry;
+ MemoryContext oldcontext;
+ dshash_table *ret;
+
+ Assert(params);
+ Assert(found);
+
+ if (!name || *name == '\0')
+ ereport(ERROR,
+ (errmsg("DSHash name cannot be empty")));
+
+ if (strlen(name) >= offsetof(DSMRegistryEntry, type))
+ ereport(ERROR,
+ (errmsg("DSHash name too long")));
+
+ /* Be sure any local memory allocated by DSM/DSA routines is persistent. */
+ oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+
+ /* Connect to the registry. */
+ init_dsm_registry();
+
+ entry = dshash_find_or_insert(dsm_registry_table, name, found);
+ if (!(*found))
+ {
+ NamedDSAState *dsa_state = &entry->data.dsh.dsa;
+ NamedDSHState *dsh_state = &entry->data.dsh;
+ dshash_parameters params_copy;
+ dsa_area *dsa;
+
+ entry->type = DSMR_ENTRY_TYPE_DSH;
+
+ /* Initialize the LWLock tranche for the DSA. */
+ dsa_state->tranche = LWLockNewTrancheId();
+ sprintf(dsa_state->tranche_name, "%s%s", name, DSMR_DSA_TRANCHE_SUFFIX);
+ LWLockRegisterTranche(dsa_state->tranche, dsa_state->tranche_name);
+
+ /* Initialize the LWLock tranche for the dshash table. */
+ dsh_state->tranche = LWLockNewTrancheId();
+ strcpy(dsh_state->tranche_name, name);
+ LWLockRegisterTranche(dsh_state->tranche, dsh_state->tranche_name);
+
+ /* Initialize the DSA for the hash table. */
+ dsa = dsa_create(dsa_state->tranche);
+ dsa_pin(dsa);
+ dsa_pin_mapping(dsa);
+
+ /* Initialize the dshash table. */
+ memcpy(&params_copy, params, sizeof(dshash_parameters));
+ params_copy.tranche_id = dsh_state->tranche;
+ ret = dshash_create(dsa, &params_copy, NULL);
+
+ /* Store handles for other backends to use. */
+ dsa_state->handle = dsa_get_handle(dsa);
+ dsh_state->handle = dshash_get_hash_table_handle(ret);
+ }
+ else if (entry->type != DSMR_ENTRY_TYPE_DSH)
+ ereport(ERROR,
+ (errmsg("requested DSHash does not match type of existing entry")));
+ else
+ {
+ NamedDSAState *dsa_state = &entry->data.dsh.dsa;
+ NamedDSHState *dsh_state = &entry->data.dsh;
+ dsa_area *dsa;
+
+ /* XXX: Should we verify params matches what table was created with? */
+
+ if (dsa_is_attached(dsa_state->handle))
+ ereport(ERROR,
+ (errmsg("requested DSHash already attached to current process")));
+
+ /* Initialize existing LWLock tranches for the DSA and dshash table. */
+ LWLockRegisterTranche(dsa_state->tranche, dsa_state->tranche_name);
+ LWLockRegisterTranche(dsh_state->tranche, dsh_state->tranche_name);
+
+ /* Attach to existing DSA for the hash table. */
+ dsa = dsa_attach(dsa_state->handle);
+ dsa_pin_mapping(dsa);
+
+ /* Attach to existing dshash table. */
+ ret = dshash_attach(dsa, params, dsh_state->handle, NULL);
+ }
+
+ dshash_release_lock(dsm_registry_table, entry);
+ MemoryContextSwitchTo(oldcontext);
+
+ return ret;
+}
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 00c76d05356..2fa045e6b0f 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -51,7 +51,6 @@
#include "storage/sinvaladt.h"
#include "utils/guc.h"
#include "utils/injection_point.h"
-#include "utils/memutils.h"
/* GUCs */
int shared_memory_type = DEFAULT_SHARED_MEMORY_TYPE;
@@ -151,7 +150,6 @@ CalculateShmemSize(int *num_semaphores)
size = add_size(size, InjectionPointShmemSize());
size = add_size(size, SlotSyncShmemSize());
size = add_size(size, AioShmemSize());
- size = add_size(size, MemoryContextReportingShmemSize());
/* include additional requested shmem from preload libraries */
size = add_size(size, total_addin_request);
@@ -345,7 +343,6 @@ CreateOrAttachShmemStructs(void)
WaitEventCustomShmemInit();
InjectionPointShmemInit();
AioShmemInit();
- MemoryContextReportingShmemInit();
}
/*
diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c
index ce69e26d720..a9bb540b55a 100644
--- a/src/backend/storage/ipc/procsignal.c
+++ b/src/backend/storage/ipc/procsignal.c
@@ -691,9 +691,6 @@ procsignal_sigusr1_handler(SIGNAL_ARGS)
if (CheckProcSignal(PROCSIG_LOG_MEMORY_CONTEXT))
HandleLogMemoryContextInterrupt();
- if (CheckProcSignal(PROCSIG_GET_MEMORY_CONTEXT))
- HandleGetMemoryContextInterrupt();
-
if (CheckProcSignal(PROCSIG_PARALLEL_APPLY_MESSAGE))
HandleParallelApplyMessageInterrupt();
diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
index c9ae3b45b76..ca3656fc76f 100644
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -679,12 +679,10 @@ pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS)
*/
for (i = 0; i < shm_ent_page_count; i++)
{
- volatile uint64 touch pg_attribute_unused();
-
page_ptrs[i] = startptr + (i * os_page_size);
if (firstNumaTouch)
- pg_numa_touch_mem_if_required(touch, page_ptrs[i]);
+ pg_numa_touch_mem_if_required(page_ptrs[i]);
CHECK_FOR_INTERRUPTS();
}
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index f50962983c3..3f6bf70bd3c 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -717,7 +717,10 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid,
* through, to avoid slowing down the normal case.)
*/
if (!first)
+ {
+ CHECK_FOR_INTERRUPTS();
pg_usleep(1000L);
+ }
first = false;
xid = SubTransGetTopmostTransaction(xid);
}
@@ -757,7 +760,10 @@ ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure)
/* See XactLockTableWait about this case */
if (!first)
+ {
+ CHECK_FOR_INTERRUPTS();
pg_usleep(1000L);
+ }
first = false;
xid = SubTransGetTopmostTransaction(xid);
}
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 86b06b9223f..2776ceb295b 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -51,7 +51,7 @@
/* GUC variables */
int max_locks_per_xact; /* used to set the lock table size */
-bool log_lock_failure = false;
+bool log_lock_failures = false;
#define NLOCKENTS() \
mul_size(max_locks_per_xact, add_size(MaxBackends, max_prepared_xacts))
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 5148ef982e3..46f44bc4511 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -178,8 +178,6 @@ static const char *const BuiltinTrancheNames[] = {
[LWTRANCHE_XACT_SLRU] = "XactSLRU",
[LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA",
[LWTRANCHE_AIO_URING_COMPLETION] = "AioUringCompletion",
- [LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE] = "MemoryContextReportingState",
- [LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC] = "MemoryContextReportingPerProcess",
};
StaticAssertDecl(lengthof(BuiltinTrancheNames) ==
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index f194e6b3dcc..e9ef0fbfe32 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -50,7 +50,6 @@
#include "storage/procsignal.h"
#include "storage/spin.h"
#include "storage/standby.h"
-#include "utils/memutils.h"
#include "utils/timeout.h"
#include "utils/timestamp.h"
diff --git a/src/backend/tcop/backend_startup.c b/src/backend/tcop/backend_startup.c
index a7d1fec981f..ad0af5edc1f 100644
--- a/src/backend/tcop/backend_startup.c
+++ b/src/backend/tcop/backend_startup.c
@@ -881,7 +881,7 @@ ProcessCancelRequestPacket(Port *port, void *pkt, int pktlen)
{
ereport(COMMERROR,
(errcode(ERRCODE_PROTOCOL_VIOLATION),
- errmsg("invalid length of query cancel packet")));
+ errmsg("invalid length of cancel request packet")));
return;
}
len = pktlen - offsetof(CancelRequestPacket, cancelAuthCode);
@@ -889,7 +889,7 @@ ProcessCancelRequestPacket(Port *port, void *pkt, int pktlen)
{
ereport(COMMERROR,
(errcode(ERRCODE_PROTOCOL_VIOLATION),
- errmsg("invalid length of query cancel key")));
+ errmsg("invalid length of cancel key in cancel request packet")));
return;
}
@@ -1077,7 +1077,7 @@ check_log_connections(char **newval, void **extra, GucSource source)
if (!SplitIdentifierString(rawstring, ',', &elemlist))
{
- GUC_check_errdetail("Invalid list syntax in parameter \"log_connections\".");
+ GUC_check_errdetail("Invalid list syntax in parameter \"%s\".", "log_connections");
pfree(rawstring);
list_free(elemlist);
return false;
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 1ae51b1b391..2f8c3d5f918 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -1226,7 +1226,6 @@ exec_simple_query(const char *query_string)
query_string,
commandTag,
plantree_list,
- NULL,
NULL);
/*
@@ -1683,7 +1682,7 @@ exec_bind_message(StringInfo input_message)
{
Query *query = lfirst_node(Query, lc);
- if (query->queryId != UINT64CONST(0))
+ if (query->queryId != INT64CONST(0))
{
pgstat_report_query_id(query->queryId, false);
break;
@@ -2028,15 +2027,14 @@ exec_bind_message(StringInfo input_message)
query_string,
psrc->commandTag,
cplan->stmt_list,
- cplan,
- psrc);
+ cplan);
/* Portal is defined, set the plan ID based on its contents. */
foreach(lc, portal->stmts)
{
PlannedStmt *plan = lfirst_node(PlannedStmt, lc);
- if (plan->planId != UINT64CONST(0))
+ if (plan->planId != INT64CONST(0))
{
pgstat_report_plan_id(plan->planId, false);
break;
@@ -2176,7 +2174,7 @@ exec_execute_message(const char *portal_name, long max_rows)
{
PlannedStmt *stmt = lfirst_node(PlannedStmt, lc);
- if (stmt->queryId != UINT64CONST(0))
+ if (stmt->queryId != INT64CONST(0))
{
pgstat_report_query_id(stmt->queryId, false);
break;
@@ -2187,7 +2185,7 @@ exec_execute_message(const char *portal_name, long max_rows)
{
PlannedStmt *stmt = lfirst_node(PlannedStmt, lc);
- if (stmt->planId != UINT64CONST(0))
+ if (stmt->planId != INT64CONST(0))
{
pgstat_report_plan_id(stmt->planId, false);
break;
@@ -3535,9 +3533,6 @@ ProcessInterrupts(void)
if (LogMemoryContextPending)
ProcessLogMemoryContextInterrupt();
- if (PublishMemoryContextPending)
- ProcessGetMemoryContextInterrupt();
-
if (ParallelApplyMessagePending)
ProcessParallelApplyMessages();
}
@@ -3695,7 +3690,7 @@ set_debug_options(int debug_flag, GucContext context, GucSource source)
if (debug_flag >= 1 && context == PGC_POSTMASTER)
{
- SetConfigOption("log_connections", "true", context, source);
+ SetConfigOption("log_connections", "all", context, source);
SetConfigOption("log_disconnections", "true", context, source);
}
if (debug_flag >= 2)
diff --git a/src/backend/tcop/pquery.c b/src/backend/tcop/pquery.c
index 8164d0fbb4f..d1593f38b35 100644
--- a/src/backend/tcop/pquery.c
+++ b/src/backend/tcop/pquery.c
@@ -19,7 +19,6 @@
#include "access/xact.h"
#include "commands/prepare.h"
-#include "executor/execdesc.h"
#include "executor/executor.h"
#include "executor/tstoreReceiver.h"
#include "miscadmin.h"
@@ -38,9 +37,6 @@ Portal ActivePortal = NULL;
static void ProcessQuery(PlannedStmt *plan,
- CachedPlan *cplan,
- CachedPlanSource *plansource,
- int query_index,
const char *sourceText,
ParamListInfo params,
QueryEnvironment *queryEnv,
@@ -70,7 +66,6 @@ static void DoPortalRewind(Portal portal);
*/
QueryDesc *
CreateQueryDesc(PlannedStmt *plannedstmt,
- CachedPlan *cplan,
const char *sourceText,
Snapshot snapshot,
Snapshot crosscheck_snapshot,
@@ -83,7 +78,6 @@ CreateQueryDesc(PlannedStmt *plannedstmt,
qd->operation = plannedstmt->commandType; /* operation */
qd->plannedstmt = plannedstmt; /* plan */
- qd->cplan = cplan; /* CachedPlan supplying the plannedstmt */
qd->sourceText = sourceText; /* query text */
qd->snapshot = RegisterSnapshot(snapshot); /* snapshot */
/* RI check snapshot */
@@ -129,9 +123,6 @@ FreeQueryDesc(QueryDesc *qdesc)
* PORTAL_ONE_RETURNING, or PORTAL_ONE_MOD_WITH portal
*
* plan: the plan tree for the query
- * cplan: CachedPlan supplying the plan
- * plansource: CachedPlanSource supplying the cplan
- * query_index: index of the query in plansource->query_list
* sourceText: the source text of the query
* params: any parameters needed
* dest: where to send results
@@ -144,9 +135,6 @@ FreeQueryDesc(QueryDesc *qdesc)
*/
static void
ProcessQuery(PlannedStmt *plan,
- CachedPlan *cplan,
- CachedPlanSource *plansource,
- int query_index,
const char *sourceText,
ParamListInfo params,
QueryEnvironment *queryEnv,
@@ -158,23 +146,14 @@ ProcessQuery(PlannedStmt *plan,
/*
* Create the QueryDesc object
*/
- queryDesc = CreateQueryDesc(plan, cplan, sourceText,
+ queryDesc = CreateQueryDesc(plan, sourceText,
GetActiveSnapshot(), InvalidSnapshot,
dest, params, queryEnv, 0);
/*
- * Prepare the plan for execution
+ * Call ExecutorStart to prepare the plan for execution
*/
- if (queryDesc->cplan)
- {
- ExecutorStartCachedPlan(queryDesc, 0, plansource, query_index);
- Assert(queryDesc->planstate);
- }
- else
- {
- if (!ExecutorStart(queryDesc, 0))
- elog(ERROR, "ExecutorStart() failed unexpectedly");
- }
+ ExecutorStart(queryDesc, 0);
/*
* Run the plan to completion.
@@ -515,7 +494,6 @@ PortalStart(Portal portal, ParamListInfo params,
* the destination to DestNone.
*/
queryDesc = CreateQueryDesc(linitial_node(PlannedStmt, portal->stmts),
- portal->cplan,
portal->sourceText,
GetActiveSnapshot(),
InvalidSnapshot,
@@ -535,19 +513,9 @@ PortalStart(Portal portal, ParamListInfo params,
myeflags = eflags;
/*
- * Prepare the plan for execution.
+ * Call ExecutorStart to prepare the plan for execution
*/
- if (portal->cplan)
- {
- ExecutorStartCachedPlan(queryDesc, myeflags,
- portal->plansource, 0);
- Assert(queryDesc->planstate);
- }
- else
- {
- if (!ExecutorStart(queryDesc, myeflags))
- elog(ERROR, "ExecutorStart() failed unexpectedly");
- }
+ ExecutorStart(queryDesc, myeflags);
/*
* This tells PortalCleanup to shut down the executor
@@ -1221,7 +1189,6 @@ PortalRunMulti(Portal portal,
{
bool active_snapshot_set = false;
ListCell *stmtlist_item;
- int query_index = 0;
/*
* If the destination is DestRemoteExecute, change to DestNone. The
@@ -1303,9 +1270,6 @@ PortalRunMulti(Portal portal,
{
/* statement can set tag string */
ProcessQuery(pstmt,
- portal->cplan,
- portal->plansource,
- query_index,
portal->sourceText,
portal->portalParams,
portal->queryEnv,
@@ -1315,9 +1279,6 @@ PortalRunMulti(Portal portal,
{
/* stmt added by rewrite cannot set tag */
ProcessQuery(pstmt,
- portal->cplan,
- portal->plansource,
- query_index,
portal->sourceText,
portal->portalParams,
portal->queryEnv,
@@ -1382,8 +1343,6 @@ PortalRunMulti(Portal portal,
*/
if (lnext(portal->stmts, stmtlist_item) != NULL)
CommandCounterIncrement();
-
- query_index++;
}
/* Pop the snapshot if we pushed one. */
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 25fe3d58016..aff8510755f 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -1343,7 +1343,7 @@ ProcessUtilitySlow(ParseState *pstate,
*/
switch (stmt->subtype)
{
- case 'T': /* ALTER DOMAIN DEFAULT */
+ case AD_AlterDefault:
/*
* Recursively alter column default for table and,
@@ -1353,30 +1353,30 @@ ProcessUtilitySlow(ParseState *pstate,
AlterDomainDefault(stmt->typeName,
stmt->def);
break;
- case 'N': /* ALTER DOMAIN DROP NOT NULL */
+ case AD_DropNotNull:
address =
AlterDomainNotNull(stmt->typeName,
false);
break;
- case 'O': /* ALTER DOMAIN SET NOT NULL */
+ case AD_SetNotNull:
address =
AlterDomainNotNull(stmt->typeName,
true);
break;
- case 'C': /* ADD CONSTRAINT */
+ case AD_AddConstraint:
address =
AlterDomainAddConstraint(stmt->typeName,
stmt->def,
&secondaryObject);
break;
- case 'X': /* DROP CONSTRAINT */
+ case AD_DropConstraint:
address =
AlterDomainDropConstraint(stmt->typeName,
stmt->name,
stmt->behavior,
stmt->missing_ok);
break;
- case 'V': /* VALIDATE CONSTRAINT */
+ case AD_ValidateConstraint:
address =
AlterDomainValidateConstraint(stmt->typeName,
stmt->name);
diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c
index e1576e64b6d..a290cc4c975 100644
--- a/src/backend/utils/activity/backend_status.c
+++ b/src/backend/utils/activity/backend_status.c
@@ -320,8 +320,8 @@ pgstat_bestart_initial(void)
lbeentry.st_state = STATE_STARTING;
lbeentry.st_progress_command = PROGRESS_COMMAND_INVALID;
lbeentry.st_progress_command_target = InvalidOid;
- lbeentry.st_query_id = UINT64CONST(0);
- lbeentry.st_plan_id = UINT64CONST(0);
+ lbeentry.st_query_id = INT64CONST(0);
+ lbeentry.st_plan_id = INT64CONST(0);
/*
* we don't zero st_progress_param here to save cycles; nobody should
@@ -599,8 +599,8 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
beentry->st_activity_start_timestamp = 0;
/* st_xact_start_timestamp and wait_event_info are also disabled */
beentry->st_xact_start_timestamp = 0;
- beentry->st_query_id = UINT64CONST(0);
- beentry->st_plan_id = UINT64CONST(0);
+ beentry->st_query_id = INT64CONST(0);
+ beentry->st_plan_id = INT64CONST(0);
proc->wait_event_info = 0;
PGSTAT_END_WRITE_ACTIVITY(beentry);
}
@@ -662,8 +662,8 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
*/
if (state == STATE_RUNNING)
{
- beentry->st_query_id = UINT64CONST(0);
- beentry->st_plan_id = UINT64CONST(0);
+ beentry->st_query_id = INT64CONST(0);
+ beentry->st_plan_id = INT64CONST(0);
}
if (cmd_str != NULL)
@@ -683,7 +683,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
* --------
*/
void
-pgstat_report_query_id(uint64 query_id, bool force)
+pgstat_report_query_id(int64 query_id, bool force)
{
volatile PgBackendStatus *beentry = MyBEEntry;
@@ -702,7 +702,7 @@ pgstat_report_query_id(uint64 query_id, bool force)
* command, so ignore the one provided unless it's an explicit call to
* reset the identifier.
*/
- if (beentry->st_query_id != 0 && !force)
+ if (beentry->st_query_id != INT64CONST(0) && !force)
return;
/*
@@ -722,7 +722,7 @@ pgstat_report_query_id(uint64 query_id, bool force)
* --------
*/
void
-pgstat_report_plan_id(uint64 plan_id, bool force)
+pgstat_report_plan_id(int64 plan_id, bool force)
{
volatile PgBackendStatus *beentry = MyBEEntry;
@@ -1134,7 +1134,7 @@ pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen)
*
* Return current backend's query identifier.
*/
-uint64
+int64
pgstat_get_my_query_id(void)
{
if (!MyBEEntry)
@@ -1154,7 +1154,7 @@ pgstat_get_my_query_id(void)
*
* Return current backend's plan identifier.
*/
-uint64
+int64
pgstat_get_my_plan_id(void)
{
if (!MyBEEntry)
diff --git a/src/backend/utils/activity/pgstat_shmem.c b/src/backend/utils/activity/pgstat_shmem.c
index 2e33293b000..53e7d534270 100644
--- a/src/backend/utils/activity/pgstat_shmem.c
+++ b/src/backend/utils/activity/pgstat_shmem.c
@@ -183,7 +183,7 @@ StatsShmemInit(void)
p += MAXALIGN(pgstat_dsa_init_size());
dsa = dsa_create_in_place(ctl->raw_dsa_area,
pgstat_dsa_init_size(),
- LWTRANCHE_PGSTATS_DSA, 0);
+ LWTRANCHE_PGSTATS_DSA, NULL);
dsa_pin(dsa);
/*
@@ -255,7 +255,8 @@ pgstat_attach_shmem(void)
dsa_pin_mapping(pgStatLocal.dsa);
pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
- pgStatLocal.shmem->hash_handle, 0);
+ pgStatLocal.shmem->hash_handle,
+ NULL);
MemoryContextSwitchTo(oldcontext);
}
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index 930321905f1..4da68312b5f 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -161,7 +161,6 @@ WAL_RECEIVER_EXIT "Waiting for the WAL receiver to exit."
WAL_RECEIVER_WAIT_START "Waiting for startup process to send initial data for streaming replication."
WAL_SUMMARY_READY "Waiting for a new WAL summary to be generated."
XACT_GROUP_UPDATE "Waiting for the group leader to update transaction status at transaction end."
-MEM_CXT_PUBLISH "Waiting for a process to publish memory information."
ABI_compatibility:
@@ -402,6 +401,7 @@ SerialSLRU "Waiting to access the serializable transaction conflict SLRU cache."
SubtransSLRU "Waiting to access the sub-transaction SLRU cache."
XactSLRU "Waiting to access the transaction status SLRU cache."
ParallelVacuumDSA "Waiting for parallel vacuum dynamic shared memory allocation."
+AioUringCompletion "Waiting for another process to complete IO via io_uring."
# No "ABI_compatibility" region here as WaitEventLWLock has its own C code.
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index 4a233b63c32..ffeacf2b819 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -23,6 +23,7 @@ OBJS = \
arrayutils.o \
ascii.o \
bool.o \
+ bytea.o \
cash.o \
char.o \
cryptohashfuncs.o \
diff --git a/src/backend/utils/adt/bytea.c b/src/backend/utils/adt/bytea.c
new file mode 100644
index 00000000000..2e539c2504e
--- /dev/null
+++ b/src/backend/utils/adt/bytea.c
@@ -0,0 +1,1143 @@
+/*-------------------------------------------------------------------------
+ *
+ * bytea.c
+ * Functions for the bytea type.
+ *
+ * Portions Copyright (c) 2025, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/bytea.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/detoast.h"
+#include "catalog/pg_collation_d.h"
+#include "catalog/pg_type_d.h"
+#include "common/int.h"
+#include "fmgr.h"
+#include "libpq/pqformat.h"
+#include "port/pg_bitutils.h"
+#include "utils/builtins.h"
+#include "utils/bytea.h"
+#include "utils/fmgrprotos.h"
+#include "utils/memutils.h"
+#include "utils/sortsupport.h"
+#include "utils/varlena.h"
+#include "varatt.h"
+
+/* GUC variable */
+int bytea_output = BYTEA_OUTPUT_HEX;
+
+static bytea *bytea_catenate(bytea *t1, bytea *t2);
+static bytea *bytea_substring(Datum str, int S, int L,
+ bool length_not_specified);
+static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
+
+/*
+ * bytea_catenate
+ * Guts of byteacat(), broken out so it can be used by other functions
+ *
+ * Arguments can be in short-header form, but not compressed or out-of-line
+ */
+static bytea *
+bytea_catenate(bytea *t1, bytea *t2)
+{
+ bytea *result;
+ int len1,
+ len2,
+ len;
+ char *ptr;
+
+ len1 = VARSIZE_ANY_EXHDR(t1);
+ len2 = VARSIZE_ANY_EXHDR(t2);
+
+ /* paranoia ... probably should throw error instead? */
+ if (len1 < 0)
+ len1 = 0;
+ if (len2 < 0)
+ len2 = 0;
+
+ len = len1 + len2 + VARHDRSZ;
+ result = (bytea *) palloc(len);
+
+ /* Set size of result string... */
+ SET_VARSIZE(result, len);
+
+ /* Fill data field of result string... */
+ ptr = VARDATA(result);
+ if (len1 > 0)
+ memcpy(ptr, VARDATA_ANY(t1), len1);
+ if (len2 > 0)
+ memcpy(ptr + len1, VARDATA_ANY(t2), len2);
+
+ return result;
+}
+
+#define PG_STR_GET_BYTEA(str_) \
+ DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
+
+static bytea *
+bytea_substring(Datum str,
+ int S,
+ int L,
+ bool length_not_specified)
+{
+ int32 S1; /* adjusted start position */
+ int32 L1; /* adjusted substring length */
+ int32 E; /* end position */
+
+ /*
+ * The logic here should generally match text_substring().
+ */
+ S1 = Max(S, 1);
+
+ if (length_not_specified)
+ {
+ /*
+ * Not passed a length - DatumGetByteaPSlice() grabs everything to the
+ * end of the string if we pass it a negative value for length.
+ */
+ L1 = -1;
+ }
+ else if (L < 0)
+ {
+ /* SQL99 says to throw an error for E < S, i.e., negative length */
+ ereport(ERROR,
+ (errcode(ERRCODE_SUBSTRING_ERROR),
+ errmsg("negative substring length not allowed")));
+ L1 = -1; /* silence stupider compilers */
+ }
+ else if (pg_add_s32_overflow(S, L, &E))
+ {
+ /*
+ * L could be large enough for S + L to overflow, in which case the
+ * substring must run to end of string.
+ */
+ L1 = -1;
+ }
+ else
+ {
+ /*
+ * A zero or negative value for the end position can happen if the
+ * start was negative or one. SQL99 says to return a zero-length
+ * string.
+ */
+ if (E < 1)
+ return PG_STR_GET_BYTEA("");
+
+ L1 = E - S1;
+ }
+
+ /*
+ * If the start position is past the end of the string, SQL99 says to
+ * return a zero-length string -- DatumGetByteaPSlice() will do that for
+ * us. We need only convert S1 to zero-based starting position.
+ */
+ return DatumGetByteaPSlice(str, S1 - 1, L1);
+}
+
+static bytea *
+bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
+{
+ bytea *result;
+ bytea *s1;
+ bytea *s2;
+ int sp_pl_sl;
+
+ /*
+ * Check for possible integer-overflow cases. For negative sp, throw a
+ * "substring length" error because that's what should be expected
+ * according to the spec's definition of OVERLAY().
+ */
+ if (sp <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SUBSTRING_ERROR),
+ errmsg("negative substring length not allowed")));
+ if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+
+ s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
+ s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
+ result = bytea_catenate(s1, t2);
+ result = bytea_catenate(result, s2);
+
+ return result;
+}
+
+/*****************************************************************************
+ * USER I/O ROUTINES *
+ *****************************************************************************/
+
+#define VAL(CH) ((CH) - '0')
+#define DIG(VAL) ((VAL) + '0')
+
+/*
+ * byteain - converts from printable representation of byte array
+ *
+ * Non-printable characters must be passed as '\nnn' (octal) and are
+ * converted to internal form. '\' must be passed as '\\'.
+ * ereport(ERROR, ...) if bad form.
+ *
+ * BUGS:
+ * The input is scanned twice.
+ * The error checking of input is minimal.
+ */
+Datum
+byteain(PG_FUNCTION_ARGS)
+{
+ char *inputText = PG_GETARG_CSTRING(0);
+ Node *escontext = fcinfo->context;
+ char *tp;
+ char *rp;
+ int bc;
+ bytea *result;
+
+ /* Recognize hex input */
+ if (inputText[0] == '\\' && inputText[1] == 'x')
+ {
+ size_t len = strlen(inputText);
+
+ bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
+ result = palloc(bc);
+ bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
+ escontext);
+ SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
+
+ PG_RETURN_BYTEA_P(result);
+ }
+
+ /* Else, it's the traditional escaped style */
+ for (bc = 0, tp = inputText; *tp != '\0'; bc++)
+ {
+ if (tp[0] != '\\')
+ tp++;
+ else if ((tp[0] == '\\') &&
+ (tp[1] >= '0' && tp[1] <= '3') &&
+ (tp[2] >= '0' && tp[2] <= '7') &&
+ (tp[3] >= '0' && tp[3] <= '7'))
+ tp += 4;
+ else if ((tp[0] == '\\') &&
+ (tp[1] == '\\'))
+ tp += 2;
+ else
+ {
+ /*
+ * one backslash, not followed by another or ### valid octal
+ */
+ ereturn(escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s", "bytea")));
+ }
+ }
+
+ bc += VARHDRSZ;
+
+ result = (bytea *) palloc(bc);
+ SET_VARSIZE(result, bc);
+
+ tp = inputText;
+ rp = VARDATA(result);
+ while (*tp != '\0')
+ {
+ if (tp[0] != '\\')
+ *rp++ = *tp++;
+ else if ((tp[0] == '\\') &&
+ (tp[1] >= '0' && tp[1] <= '3') &&
+ (tp[2] >= '0' && tp[2] <= '7') &&
+ (tp[3] >= '0' && tp[3] <= '7'))
+ {
+ bc = VAL(tp[1]);
+ bc <<= 3;
+ bc += VAL(tp[2]);
+ bc <<= 3;
+ *rp++ = bc + VAL(tp[3]);
+
+ tp += 4;
+ }
+ else if ((tp[0] == '\\') &&
+ (tp[1] == '\\'))
+ {
+ *rp++ = '\\';
+ tp += 2;
+ }
+ else
+ {
+ /*
+ * We should never get here. The first pass should not allow it.
+ */
+ ereturn(escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s", "bytea")));
+ }
+ }
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+/*
+ * byteaout - converts to printable representation of byte array
+ *
+ * In the traditional escaped format, non-printable characters are
+ * printed as '\nnn' (octal) and '\' as '\\'.
+ */
+Datum
+byteaout(PG_FUNCTION_ARGS)
+{
+ bytea *vlena = PG_GETARG_BYTEA_PP(0);
+ char *result;
+ char *rp;
+
+ if (bytea_output == BYTEA_OUTPUT_HEX)
+ {
+ /* Print hex format */
+ rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
+ *rp++ = '\\';
+ *rp++ = 'x';
+ rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
+ }
+ else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
+ {
+ /* Print traditional escaped format */
+ char *vp;
+ uint64 len;
+ int i;
+
+ len = 1; /* empty string has 1 char */
+ vp = VARDATA_ANY(vlena);
+ for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
+ {
+ if (*vp == '\\')
+ len += 2;
+ else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
+ len += 4;
+ else
+ len++;
+ }
+
+ /*
+ * In principle len can't overflow uint32 if the input fit in 1GB, but
+ * for safety let's check rather than relying on palloc's internal
+ * check.
+ */
+ if (len > MaxAllocSize)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg_internal("result of bytea output conversion is too large")));
+ rp = result = (char *) palloc(len);
+
+ vp = VARDATA_ANY(vlena);
+ for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
+ {
+ if (*vp == '\\')
+ {
+ *rp++ = '\\';
+ *rp++ = '\\';
+ }
+ else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
+ {
+ int val; /* holds unprintable chars */
+
+ val = *vp;
+ rp[0] = '\\';
+ rp[3] = DIG(val & 07);
+ val >>= 3;
+ rp[2] = DIG(val & 07);
+ val >>= 3;
+ rp[1] = DIG(val & 03);
+ rp += 4;
+ }
+ else
+ *rp++ = *vp;
+ }
+ }
+ else
+ {
+ elog(ERROR, "unrecognized \"bytea_output\" setting: %d",
+ bytea_output);
+ rp = result = NULL; /* keep compiler quiet */
+ }
+ *rp = '\0';
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * bytearecv - converts external binary format to bytea
+ */
+Datum
+bytearecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ bytea *result;
+ int nbytes;
+
+ nbytes = buf->len - buf->cursor;
+ result = (bytea *) palloc(nbytes + VARHDRSZ);
+ SET_VARSIZE(result, nbytes + VARHDRSZ);
+ pq_copymsgbytes(buf, VARDATA(result), nbytes);
+ PG_RETURN_BYTEA_P(result);
+}
+
+/*
+ * byteasend - converts bytea to binary format
+ *
+ * This is a special case: just copy the input...
+ */
+Datum
+byteasend(PG_FUNCTION_ARGS)
+{
+ bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
+
+ PG_RETURN_BYTEA_P(vlena);
+}
+
+Datum
+bytea_string_agg_transfn(PG_FUNCTION_ARGS)
+{
+ StringInfo state;
+
+ state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
+
+ /* Append the value unless null, preceding it with the delimiter. */
+ if (!PG_ARGISNULL(1))
+ {
+ bytea *value = PG_GETARG_BYTEA_PP(1);
+ bool isfirst = false;
+
+ /*
+ * You might think we can just throw away the first delimiter, however
+ * we must keep it as we may be a parallel worker doing partial
+ * aggregation building a state to send to the main process. We need
+ * to keep the delimiter of every aggregation so that the combine
+ * function can properly join up the strings of two separately
+ * partially aggregated results. The first delimiter is only stripped
+ * off in the final function. To know how much to strip off the front
+ * of the string, we store the length of the first delimiter in the
+ * StringInfo's cursor field, which we don't otherwise need here.
+ */
+ if (state == NULL)
+ {
+ MemoryContext aggcontext;
+ MemoryContext oldcontext;
+
+ if (!AggCheckCallContext(fcinfo, &aggcontext))
+ {
+ /* cannot be called directly because of internal-type argument */
+ elog(ERROR, "bytea_string_agg_transfn called in non-aggregate context");
+ }
+
+ /*
+ * Create state in aggregate context. It'll stay there across
+ * subsequent calls.
+ */
+ oldcontext = MemoryContextSwitchTo(aggcontext);
+ state = makeStringInfo();
+ MemoryContextSwitchTo(oldcontext);
+
+ isfirst = true;
+ }
+
+ if (!PG_ARGISNULL(2))
+ {
+ bytea *delim = PG_GETARG_BYTEA_PP(2);
+
+ appendBinaryStringInfo(state, VARDATA_ANY(delim),
+ VARSIZE_ANY_EXHDR(delim));
+ if (isfirst)
+ state->cursor = VARSIZE_ANY_EXHDR(delim);
+ }
+
+ appendBinaryStringInfo(state, VARDATA_ANY(value),
+ VARSIZE_ANY_EXHDR(value));
+ }
+
+ /*
+ * The transition type for string_agg() is declared to be "internal",
+ * which is a pass-by-value type the same size as a pointer.
+ */
+ if (state)
+ PG_RETURN_POINTER(state);
+ PG_RETURN_NULL();
+}
+
+Datum
+bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
+{
+ StringInfo state;
+
+ /* cannot be called directly because of internal-type argument */
+ Assert(AggCheckCallContext(fcinfo, NULL));
+
+ state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
+
+ if (state != NULL)
+ {
+ /* As per comment in transfn, strip data before the cursor position */
+ bytea *result;
+ int strippedlen = state->len - state->cursor;
+
+ result = (bytea *) palloc(strippedlen + VARHDRSZ);
+ SET_VARSIZE(result, strippedlen + VARHDRSZ);
+ memcpy(VARDATA(result), &state->data[state->cursor], strippedlen);
+ PG_RETURN_BYTEA_P(result);
+ }
+ else
+ PG_RETURN_NULL();
+}
+
+/*-------------------------------------------------------------
+ * byteaoctetlen
+ *
+ * get the number of bytes contained in an instance of type 'bytea'
+ *-------------------------------------------------------------
+ */
+Datum
+byteaoctetlen(PG_FUNCTION_ARGS)
+{
+ Datum str = PG_GETARG_DATUM(0);
+
+ /* We need not detoast the input at all */
+ PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
+}
+
+/*
+ * byteacat -
+ * takes two bytea* and returns a bytea* that is the concatenation of
+ * the two.
+ *
+ * Cloned from textcat and modified as required.
+ */
+Datum
+byteacat(PG_FUNCTION_ARGS)
+{
+ bytea *t1 = PG_GETARG_BYTEA_PP(0);
+ bytea *t2 = PG_GETARG_BYTEA_PP(1);
+
+ PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
+}
+
+/*
+ * byteaoverlay
+ * Replace specified substring of first string with second
+ *
+ * The SQL standard defines OVERLAY() in terms of substring and concatenation.
+ * This code is a direct implementation of what the standard says.
+ */
+Datum
+byteaoverlay(PG_FUNCTION_ARGS)
+{
+ bytea *t1 = PG_GETARG_BYTEA_PP(0);
+ bytea *t2 = PG_GETARG_BYTEA_PP(1);
+ int sp = PG_GETARG_INT32(2); /* substring start position */
+ int sl = PG_GETARG_INT32(3); /* substring length */
+
+ PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
+}
+
+Datum
+byteaoverlay_no_len(PG_FUNCTION_ARGS)
+{
+ bytea *t1 = PG_GETARG_BYTEA_PP(0);
+ bytea *t2 = PG_GETARG_BYTEA_PP(1);
+ int sp = PG_GETARG_INT32(2); /* substring start position */
+ int sl;
+
+ sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
+ PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
+}
+
+/*
+ * bytea_substr()
+ * Return a substring starting at the specified position.
+ * Cloned from text_substr and modified as required.
+ *
+ * Input:
+ * - string
+ * - starting position (is one-based)
+ * - string length (optional)
+ *
+ * If the starting position is zero or less, then return from the start of the string
+ * adjusting the length to be consistent with the "negative start" per SQL.
+ * If the length is less than zero, an ERROR is thrown. If no third argument
+ * (length) is provided, the length to the end of the string is assumed.
+ */
+Datum
+bytea_substr(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
+ PG_GETARG_INT32(1),
+ PG_GETARG_INT32(2),
+ false));
+}
+
+/*
+ * bytea_substr_no_len -
+ * Wrapper to avoid opr_sanity failure due to
+ * one function accepting a different number of args.
+ */
+Datum
+bytea_substr_no_len(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
+ PG_GETARG_INT32(1),
+ -1,
+ true));
+}
+
+/*
+ * bit_count
+ */
+Datum
+bytea_bit_count(PG_FUNCTION_ARGS)
+{
+ bytea *t1 = PG_GETARG_BYTEA_PP(0);
+
+ PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
+}
+
+/*
+ * byteapos -
+ * Return the position of the specified substring.
+ * Implements the SQL POSITION() function.
+ * Cloned from textpos and modified as required.
+ */
+Datum
+byteapos(PG_FUNCTION_ARGS)
+{
+ bytea *t1 = PG_GETARG_BYTEA_PP(0);
+ bytea *t2 = PG_GETARG_BYTEA_PP(1);
+ int pos;
+ int px,
+ p;
+ int len1,
+ len2;
+ char *p1,
+ *p2;
+
+ len1 = VARSIZE_ANY_EXHDR(t1);
+ len2 = VARSIZE_ANY_EXHDR(t2);
+
+ if (len2 <= 0)
+ PG_RETURN_INT32(1); /* result for empty pattern */
+
+ p1 = VARDATA_ANY(t1);
+ p2 = VARDATA_ANY(t2);
+
+ pos = 0;
+ px = (len1 - len2);
+ for (p = 0; p <= px; p++)
+ {
+ if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
+ {
+ pos = p + 1;
+ break;
+ };
+ p1++;
+ };
+
+ PG_RETURN_INT32(pos);
+}
+
+/*-------------------------------------------------------------
+ * byteaGetByte
+ *
+ * this routine treats "bytea" as an array of bytes.
+ * It returns the Nth byte (a number between 0 and 255).
+ *-------------------------------------------------------------
+ */
+Datum
+byteaGetByte(PG_FUNCTION_ARGS)
+{
+ bytea *v = PG_GETARG_BYTEA_PP(0);
+ int32 n = PG_GETARG_INT32(1);
+ int len;
+ int byte;
+
+ len = VARSIZE_ANY_EXHDR(v);
+
+ if (n < 0 || n >= len)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("index %d out of valid range, 0..%d",
+ n, len - 1)));
+
+ byte = ((unsigned char *) VARDATA_ANY(v))[n];
+
+ PG_RETURN_INT32(byte);
+}
+
+/*-------------------------------------------------------------
+ * byteaGetBit
+ *
+ * This routine treats a "bytea" type like an array of bits.
+ * It returns the value of the Nth bit (0 or 1).
+ *
+ *-------------------------------------------------------------
+ */
+Datum
+byteaGetBit(PG_FUNCTION_ARGS)
+{
+ bytea *v = PG_GETARG_BYTEA_PP(0);
+ int64 n = PG_GETARG_INT64(1);
+ int byteNo,
+ bitNo;
+ int len;
+ int byte;
+
+ len = VARSIZE_ANY_EXHDR(v);
+
+ if (n < 0 || n >= (int64) len * 8)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
+ n, (int64) len * 8 - 1)));
+
+ /* n/8 is now known < len, so safe to cast to int */
+ byteNo = (int) (n / 8);
+ bitNo = (int) (n % 8);
+
+ byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
+
+ if (byte & (1 << bitNo))
+ PG_RETURN_INT32(1);
+ else
+ PG_RETURN_INT32(0);
+}
+
+/*-------------------------------------------------------------
+ * byteaSetByte
+ *
+ * Given an instance of type 'bytea' creates a new one with
+ * the Nth byte set to the given value.
+ *
+ *-------------------------------------------------------------
+ */
+Datum
+byteaSetByte(PG_FUNCTION_ARGS)
+{
+ bytea *res = PG_GETARG_BYTEA_P_COPY(0);
+ int32 n = PG_GETARG_INT32(1);
+ int32 newByte = PG_GETARG_INT32(2);
+ int len;
+
+ len = VARSIZE(res) - VARHDRSZ;
+
+ if (n < 0 || n >= len)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("index %d out of valid range, 0..%d",
+ n, len - 1)));
+
+ /*
+ * Now set the byte.
+ */
+ ((unsigned char *) VARDATA(res))[n] = newByte;
+
+ PG_RETURN_BYTEA_P(res);
+}
+
+/*-------------------------------------------------------------
+ * byteaSetBit
+ *
+ * Given an instance of type 'bytea' creates a new one with
+ * the Nth bit set to the given value.
+ *
+ *-------------------------------------------------------------
+ */
+Datum
+byteaSetBit(PG_FUNCTION_ARGS)
+{
+ bytea *res = PG_GETARG_BYTEA_P_COPY(0);
+ int64 n = PG_GETARG_INT64(1);
+ int32 newBit = PG_GETARG_INT32(2);
+ int len;
+ int oldByte,
+ newByte;
+ int byteNo,
+ bitNo;
+
+ len = VARSIZE(res) - VARHDRSZ;
+
+ if (n < 0 || n >= (int64) len * 8)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
+ n, (int64) len * 8 - 1)));
+
+ /* n/8 is now known < len, so safe to cast to int */
+ byteNo = (int) (n / 8);
+ bitNo = (int) (n % 8);
+
+ /*
+ * sanity check!
+ */
+ if (newBit != 0 && newBit != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("new bit must be 0 or 1")));
+
+ /*
+ * Update the byte.
+ */
+ oldByte = ((unsigned char *) VARDATA(res))[byteNo];
+
+ if (newBit == 0)
+ newByte = oldByte & (~(1 << bitNo));
+ else
+ newByte = oldByte | (1 << bitNo);
+
+ ((unsigned char *) VARDATA(res))[byteNo] = newByte;
+
+ PG_RETURN_BYTEA_P(res);
+}
+
+/*
+ * Return reversed bytea
+ */
+Datum
+bytea_reverse(PG_FUNCTION_ARGS)
+{
+ bytea *v = PG_GETARG_BYTEA_PP(0);
+ const char *p = VARDATA_ANY(v);
+ int len = VARSIZE_ANY_EXHDR(v);
+ const char *endp = p + len;
+ bytea *result = palloc(len + VARHDRSZ);
+ char *dst = (char *) VARDATA(result) + len;
+
+ SET_VARSIZE(result, len + VARHDRSZ);
+
+ while (p < endp)
+ *(--dst) = *p++;
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+
+/*****************************************************************************
+ * Comparison Functions used for bytea
+ *
+ * Note: btree indexes need these routines not to leak memory; therefore,
+ * be careful to free working copies of toasted datums. Most places don't
+ * need to be so careful.
+ *****************************************************************************/
+
+Datum
+byteaeq(PG_FUNCTION_ARGS)
+{
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ bool result;
+ Size len1,
+ len2;
+
+ /*
+ * We can use a fast path for unequal lengths, which might save us from
+ * having to detoast one or both values.
+ */
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len1 != len2)
+ result = false;
+ else
+ {
+ bytea *barg1 = DatumGetByteaPP(arg1);
+ bytea *barg2 = DatumGetByteaPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
+ len1 - VARHDRSZ) == 0);
+
+ PG_FREE_IF_COPY(barg1, 0);
+ PG_FREE_IF_COPY(barg2, 1);
+ }
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+byteane(PG_FUNCTION_ARGS)
+{
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ bool result;
+ Size len1,
+ len2;
+
+ /*
+ * We can use a fast path for unequal lengths, which might save us from
+ * having to detoast one or both values.
+ */
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len1 != len2)
+ result = true;
+ else
+ {
+ bytea *barg1 = DatumGetByteaPP(arg1);
+ bytea *barg2 = DatumGetByteaPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
+ len1 - VARHDRSZ) != 0);
+
+ PG_FREE_IF_COPY(barg1, 0);
+ PG_FREE_IF_COPY(barg2, 1);
+ }
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+bytealt(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
+}
+
+Datum
+byteale(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
+}
+
+Datum
+byteagt(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
+}
+
+Datum
+byteage(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
+}
+
+Datum
+byteacmp(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ if ((cmp == 0) && (len1 != len2))
+ cmp = (len1 < len2) ? -1 : 1;
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_INT32(cmp);
+}
+
+Datum
+bytea_larger(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ bytea *result;
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+Datum
+bytea_smaller(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ bytea *result;
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+Datum
+bytea_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ /* Use generic string SortSupport, forcing "C" collation */
+ varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_VOID();
+}
+
+/* Cast bytea -> int2 */
+Datum
+bytea_int2(PG_FUNCTION_ARGS)
+{
+ bytea *v = PG_GETARG_BYTEA_PP(0);
+ int len = VARSIZE_ANY_EXHDR(v);
+ uint16 result;
+
+ /* Check that the byte array is not too long */
+ if (len > sizeof(result))
+ ereport(ERROR,
+ errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("smallint out of range"));
+
+ /* Convert it to an integer; most significant bytes come first */
+ result = 0;
+ for (int i = 0; i < len; i++)
+ {
+ result <<= BITS_PER_BYTE;
+ result |= ((unsigned char *) VARDATA_ANY(v))[i];
+ }
+
+ PG_RETURN_INT16(result);
+}
+
+/* Cast bytea -> int4 */
+Datum
+bytea_int4(PG_FUNCTION_ARGS)
+{
+ bytea *v = PG_GETARG_BYTEA_PP(0);
+ int len = VARSIZE_ANY_EXHDR(v);
+ uint32 result;
+
+ /* Check that the byte array is not too long */
+ if (len > sizeof(result))
+ ereport(ERROR,
+ errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range"));
+
+ /* Convert it to an integer; most significant bytes come first */
+ result = 0;
+ for (int i = 0; i < len; i++)
+ {
+ result <<= BITS_PER_BYTE;
+ result |= ((unsigned char *) VARDATA_ANY(v))[i];
+ }
+
+ PG_RETURN_INT32(result);
+}
+
+/* Cast bytea -> int8 */
+Datum
+bytea_int8(PG_FUNCTION_ARGS)
+{
+ bytea *v = PG_GETARG_BYTEA_PP(0);
+ int len = VARSIZE_ANY_EXHDR(v);
+ uint64 result;
+
+ /* Check that the byte array is not too long */
+ if (len > sizeof(result))
+ ereport(ERROR,
+ errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range"));
+
+ /* Convert it to an integer; most significant bytes come first */
+ result = 0;
+ for (int i = 0; i < len; i++)
+ {
+ result <<= BITS_PER_BYTE;
+ result |= ((unsigned char *) VARDATA_ANY(v))[i];
+ }
+
+ PG_RETURN_INT64(result);
+}
+
+/* Cast int2 -> bytea; can just use int2send() */
+Datum
+int2_bytea(PG_FUNCTION_ARGS)
+{
+ return int2send(fcinfo);
+}
+
+/* Cast int4 -> bytea; can just use int4send() */
+Datum
+int4_bytea(PG_FUNCTION_ARGS)
+{
+ return int4send(fcinfo);
+}
+
+/* Cast int8 -> bytea; can just use int8send() */
+Datum
+int8_bytea(PG_FUNCTION_ARGS)
+{
+ return int8send(fcinfo);
+}
diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index 4227ab1a72b..344f58b92f7 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -1363,10 +1363,35 @@ timestamp_date(PG_FUNCTION_ARGS)
{
Timestamp timestamp = PG_GETARG_TIMESTAMP(0);
DateADT result;
+
+ result = timestamp2date_opt_overflow(timestamp, NULL);
+ PG_RETURN_DATEADT(result);
+}
+
+/*
+ * Convert timestamp to date.
+ *
+ * On successful conversion, *overflow is set to zero if it's not NULL.
+ *
+ * If the timestamp is finite but out of the valid range for date, then:
+ * if overflow is NULL, we throw an out-of-range error.
+ * if overflow is not NULL, we store +1 or -1 there to indicate the sign
+ * of the overflow, and return the appropriate date infinity.
+ *
+ * Note: given the ranges of the types, overflow is only possible at
+ * the minimum end of the range, but we don't assume that in this code.
+ */
+DateADT
+timestamp2date_opt_overflow(Timestamp timestamp, int *overflow)
+{
+ DateADT result;
struct pg_tm tt,
*tm = &tt;
fsec_t fsec;
+ if (overflow)
+ *overflow = 0;
+
if (TIMESTAMP_IS_NOBEGIN(timestamp))
DATE_NOBEGIN(result);
else if (TIMESTAMP_IS_NOEND(timestamp))
@@ -1374,14 +1399,30 @@ timestamp_date(PG_FUNCTION_ARGS)
else
{
if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0)
+ {
+ if (overflow)
+ {
+ if (timestamp < 0)
+ {
+ *overflow = -1;
+ DATE_NOBEGIN(result);
+ }
+ else
+ {
+ *overflow = 1; /* not actually reachable */
+ DATE_NOEND(result);
+ }
+ return result;
+ }
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
+ }
result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE;
}
- PG_RETURN_DATEADT(result);
+ return result;
}
@@ -1408,11 +1449,36 @@ timestamptz_date(PG_FUNCTION_ARGS)
{
TimestampTz timestamp = PG_GETARG_TIMESTAMP(0);
DateADT result;
+
+ result = timestamptz2date_opt_overflow(timestamp, NULL);
+ PG_RETURN_DATEADT(result);
+}
+
+/*
+ * Convert timestamptz to date.
+ *
+ * On successful conversion, *overflow is set to zero if it's not NULL.
+ *
+ * If the timestamptz is finite but out of the valid range for date, then:
+ * if overflow is NULL, we throw an out-of-range error.
+ * if overflow is not NULL, we store +1 or -1 there to indicate the sign
+ * of the overflow, and return the appropriate date infinity.
+ *
+ * Note: given the ranges of the types, overflow is only possible at
+ * the minimum end of the range, but we don't assume that in this code.
+ */
+DateADT
+timestamptz2date_opt_overflow(TimestampTz timestamp, int *overflow)
+{
+ DateADT result;
struct pg_tm tt,
*tm = &tt;
fsec_t fsec;
int tz;
+ if (overflow)
+ *overflow = 0;
+
if (TIMESTAMP_IS_NOBEGIN(timestamp))
DATE_NOBEGIN(result);
else if (TIMESTAMP_IS_NOEND(timestamp))
@@ -1420,14 +1486,30 @@ timestamptz_date(PG_FUNCTION_ARGS)
else
{
if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0)
+ {
+ if (overflow)
+ {
+ if (timestamp < 0)
+ {
+ *overflow = -1;
+ DATE_NOBEGIN(result);
+ }
+ else
+ {
+ *overflow = 1; /* not actually reachable */
+ DATE_NOEND(result);
+ }
+ return result;
+ }
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
+ }
result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE;
}
- PG_RETURN_DATEADT(result);
+ return result;
}
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 793d8a9adcc..680fee2a844 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -702,9 +702,18 @@ ParseFraction(char *cp, double *frac)
}
else
{
+ /*
+ * On the other hand, let's reject anything that's not digits after
+ * the ".". strtod is happy with input like ".123e9", but that'd
+ * break callers' expectation that the result is in 0..1. (It's quite
+ * difficult to get here with such input, but not impossible.)
+ */
+ if (strspn(cp + 1, "0123456789") != strlen(cp + 1))
+ return DTERR_BAD_FORMAT;
+
errno = 0;
*frac = strtod(cp, &cp);
- /* check for parse failure */
+ /* check for parse failure (probably redundant given prior check) */
if (*cp != '\0' || errno != 0)
return DTERR_BAD_FORMAT;
}
@@ -2959,30 +2968,27 @@ DecodeNumberField(int len, char *str, int fmask,
char *cp;
/*
+ * This function was originally meant to cope only with DTK_NUMBER fields,
+ * but we now sometimes abuse it to parse (parts of) DTK_DATE fields,
+ * which can contain letters and other punctuation. Reject if it's not a
+ * valid DTK_NUMBER, that is digits and decimal point(s). (ParseFraction
+ * will reject if there's more than one decimal point.)
+ */
+ if (strspn(str, "0123456789.") != len)
+ return DTERR_BAD_FORMAT;
+
+ /*
* Have a decimal point? Then this is a date or something with a seconds
* field...
*/
if ((cp = strchr(str, '.')) != NULL)
{
- /*
- * Can we use ParseFractionalSecond here? Not clear whether trailing
- * junk should be rejected ...
- */
- if (cp[1] == '\0')
- {
- /* avoid assuming that strtod will accept "." */
- *fsec = 0;
- }
- else
- {
- double frac;
+ int dterr;
- errno = 0;
- frac = strtod(cp, NULL);
- if (errno != 0)
- return DTERR_BAD_FORMAT;
- *fsec = rint(frac * 1000000);
- }
+ /* Convert the fraction and store at *fsec */
+ dterr = ParseFractionalSecond(cp, fsec);
+ if (dterr)
+ return dterr;
/* Now truncate off the fraction for further processing */
*cp = '\0';
len = strlen(str);
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
index 6d20ae07ae7..7b97d2be6ca 100644
--- a/src/backend/utils/adt/float.c
+++ b/src/backend/utils/adt/float.c
@@ -4065,10 +4065,11 @@ float84ge(PG_FUNCTION_ARGS)
* in the histogram. width_bucket() returns an integer indicating the
* bucket number that 'operand' belongs to in an equiwidth histogram
* with the specified characteristics. An operand smaller than the
- * lower bound is assigned to bucket 0. An operand greater than the
- * upper bound is assigned to an additional bucket (with number
- * count+1). We don't allow "NaN" for any of the float8 inputs, and we
- * don't allow either of the histogram bounds to be +/- infinity.
+ * lower bound is assigned to bucket 0. An operand greater than or equal
+ * to the upper bound is assigned to an additional bucket (with number
+ * count+1). We don't allow the histogram bounds to be NaN or +/- infinity,
+ * but we do allow those values for the operand (taking NaN to be larger
+ * than any other value, as we do in comparisons).
*/
Datum
width_bucket_float8(PG_FUNCTION_ARGS)
@@ -4084,12 +4085,11 @@ width_bucket_float8(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
errmsg("count must be greater than zero")));
- if (isnan(operand) || isnan(bound1) || isnan(bound2))
+ if (isnan(bound1) || isnan(bound2))
ereport(ERROR,
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
- errmsg("operand, lower bound, and upper bound cannot be NaN")));
+ errmsg("lower and upper bounds cannot be NaN")));
- /* Note that we allow "operand" to be infinite */
if (isinf(bound1) || isinf(bound2))
ereport(ERROR,
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
@@ -4097,15 +4097,15 @@ width_bucket_float8(PG_FUNCTION_ARGS)
if (bound1 < bound2)
{
- if (operand < bound1)
- result = 0;
- else if (operand >= bound2)
+ if (isnan(operand) || operand >= bound2)
{
if (pg_add_s32_overflow(count, 1, &result))
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("integer out of range")));
}
+ else if (operand < bound1)
+ result = 0;
else
{
if (!isinf(bound2 - bound1))
@@ -4135,7 +4135,7 @@ width_bucket_float8(PG_FUNCTION_ARGS)
}
else if (bound1 > bound2)
{
- if (operand > bound1)
+ if (isnan(operand) || operand > bound1)
result = 0;
else if (operand <= bound2)
{
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
index 5bd1e01f7e4..1d05481181d 100644
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -3590,14 +3590,15 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
if (matched < 2)
ereturn(escontext,,
(errcode(ERRCODE_INVALID_DATETIME_FORMAT),
- errmsg("invalid input string for \"Y,YYY\"")));
+ errmsg("invalid value \"%s\" for \"%s\"",
+ s, "Y,YYY")));
/* years += (millennia * 1000); */
if (pg_mul_s32_overflow(millennia, 1000, &millennia) ||
pg_add_s32_overflow(years, millennia, &years))
ereturn(escontext,,
(errcode(ERRCODE_DATETIME_FIELD_OVERFLOW),
- errmsg("value for \"Y,YYY\" in source string is out of range")));
+ errmsg("value for \"%s\" in source string is out of range", "Y,YYY")));
if (!from_char_set_int(&out->year, years, n, escontext))
return;
diff --git a/src/backend/utils/adt/inet_net_pton.c b/src/backend/utils/adt/inet_net_pton.c
index ef2236d9f04..3b0db2a3799 100644
--- a/src/backend/utils/adt/inet_net_pton.c
+++ b/src/backend/utils/adt/inet_net_pton.c
@@ -115,8 +115,7 @@ inet_cidr_pton_ipv4(const char *src, u_char *dst, size_t size)
src++; /* skip x or X. */
while ((ch = *src++) != '\0' && isxdigit((unsigned char) ch))
{
- if (isupper((unsigned char) ch))
- ch = tolower((unsigned char) ch);
+ ch = pg_ascii_tolower((unsigned char) ch);
n = strchr(xdigits, ch) - xdigits;
assert(n >= 0 && n <= 15);
if (dirty == 0)
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
index 7f4cf614585..4216ac17f43 100644
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -98,7 +98,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale)
else if (locale->is_default)
return pg_tolower(c);
else
- return tolower_l(c, locale->info.lt);
+ return char_tolower(c, locale);
}
@@ -209,7 +209,17 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
* way.
*/
- if (pg_database_encoding_max_length() > 1 || (locale->provider == COLLPROVIDER_ICU))
+ if (locale->ctype_is_c ||
+ (char_tolower_enabled(locale) &&
+ pg_database_encoding_max_length() == 1))
+ {
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+ s = VARDATA_ANY(str);
+ slen = VARSIZE_ANY_EXHDR(str);
+ return SB_IMatchText(s, slen, p, plen, locale);
+ }
+ else
{
pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
PointerGetDatum(pat)));
@@ -224,14 +234,6 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
else
return MB_MatchText(s, slen, p, plen, 0);
}
- else
- {
- p = VARDATA_ANY(pat);
- plen = VARSIZE_ANY_EXHDR(pat);
- s = VARDATA_ANY(str);
- slen = VARSIZE_ANY_EXHDR(str);
- return SB_IMatchText(s, slen, p, plen, locale);
- }
}
/*
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
index 8fdc677371f..999f23f86d5 100644
--- a/src/backend/utils/adt/like_support.c
+++ b/src/backend/utils/adt/like_support.c
@@ -1495,13 +1495,8 @@ pattern_char_isalpha(char c, bool is_multibyte,
{
if (locale->ctype_is_c)
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
- else if (is_multibyte && IS_HIGHBIT_SET(c))
- return true;
- else if (locale->provider != COLLPROVIDER_LIBC)
- return IS_HIGHBIT_SET(c) ||
- (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
else
- return isalpha_l((unsigned char) c, locale->info.lt);
+ return char_is_cased(c, locale);
}
diff --git a/src/backend/utils/adt/mcxtfuncs.c b/src/backend/utils/adt/mcxtfuncs.c
index 7ec2c225016..fe6dce9cba3 100644
--- a/src/backend/utils/adt/mcxtfuncs.c
+++ b/src/backend/utils/adt/mcxtfuncs.c
@@ -15,27 +15,30 @@
#include "postgres.h"
-#include "access/twophase.h"
-#include "catalog/pg_authid_d.h"
#include "funcapi.h"
#include "mb/pg_wchar.h"
-#include "miscadmin.h"
#include "storage/proc.h"
#include "storage/procarray.h"
-#include "utils/acl.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/hsearch.h"
-#include "utils/memutils.h"
-#include "utils/wait_event_types.h"
/* ----------
* The max bytes for showing identifiers of MemoryContext.
* ----------
*/
#define MEMORY_CONTEXT_IDENT_DISPLAY_SIZE 1024
-struct MemoryStatsBackendState *memCxtState = NULL;
-struct MemoryStatsCtl *memCxtArea = NULL;
+
+/*
+ * MemoryContextId
+ * Used for storage of transient identifiers for
+ * pg_get_backend_memory_contexts.
+ */
+typedef struct MemoryContextId
+{
+ MemoryContext context;
+ int context_id;
+} MemoryContextId;
/*
* int_list_to_array
@@ -86,7 +89,7 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore,
*/
for (MemoryContext cur = context; cur != NULL; cur = cur->parent)
{
- MemoryStatsContextId *entry;
+ MemoryContextId *entry;
bool found;
entry = hash_search(context_id_lookup, &cur, HASH_FIND, &found);
@@ -140,51 +143,36 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore,
else
nulls[1] = true;
- type = ContextTypeToString(context->type);
-
- values[2] = CStringGetTextDatum(type);
- values[3] = Int32GetDatum(list_length(path)); /* level */
- values[4] = int_list_to_array(path);
- values[5] = Int64GetDatum(stat.totalspace);
- values[6] = Int64GetDatum(stat.nblocks);
- values[7] = Int64GetDatum(stat.freespace);
- values[8] = Int64GetDatum(stat.freechunks);
- values[9] = Int64GetDatum(stat.totalspace - stat.freespace);
-
- tuplestore_putvalues(tupstore, tupdesc, values, nulls);
- list_free(path);
-}
-
-/*
- * ContextTypeToString
- * Returns a textual representation of a context type
- *
- * This should cover the same types as MemoryContextIsValid.
- */
-const char *
-ContextTypeToString(NodeTag type)
-{
- const char *context_type;
-
- switch (type)
+ switch (context->type)
{
case T_AllocSetContext:
- context_type = "AllocSet";
+ type = "AllocSet";
break;
case T_GenerationContext:
- context_type = "Generation";
+ type = "Generation";
break;
case T_SlabContext:
- context_type = "Slab";
+ type = "Slab";
break;
case T_BumpContext:
- context_type = "Bump";
+ type = "Bump";
break;
default:
- context_type = "???";
+ type = "???";
break;
}
- return context_type;
+
+ values[2] = CStringGetTextDatum(type);
+ values[3] = Int32GetDatum(list_length(path)); /* level */
+ values[4] = int_list_to_array(path);
+ values[5] = Int64GetDatum(stat.totalspace);
+ values[6] = Int64GetDatum(stat.nblocks);
+ values[7] = Int64GetDatum(stat.freespace);
+ values[8] = Int64GetDatum(stat.freechunks);
+ values[9] = Int64GetDatum(stat.totalspace - stat.freespace);
+
+ tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+ list_free(path);
}
/*
@@ -201,7 +189,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS)
HTAB *context_id_lookup;
ctl.keysize = sizeof(MemoryContext);
- ctl.entrysize = sizeof(MemoryStatsContextId);
+ ctl.entrysize = sizeof(MemoryContextId);
ctl.hcxt = CurrentMemoryContext;
context_id_lookup = hash_create("pg_get_backend_memory_contexts",
@@ -228,7 +216,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS)
foreach_ptr(MemoryContextData, cur, contexts)
{
- MemoryStatsContextId *entry;
+ MemoryContextId *entry;
bool found;
/*
@@ -236,8 +224,8 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS)
* PutMemoryContextsStatsTupleStore needs this to populate the "path"
* column with the parent context_ids.
*/
- entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &cur,
- HASH_ENTER, &found);
+ entry = (MemoryContextId *) hash_search(context_id_lookup, &cur,
+ HASH_ENTER, &found);
entry->context_id = context_id++;
Assert(!found);
@@ -317,349 +305,3 @@ pg_log_backend_memory_contexts(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(true);
}
-
-/*
- * pg_get_process_memory_contexts
- * Signal a backend or an auxiliary process to send its memory contexts,
- * wait for the results and display them.
- *
- * By default, only superusers or users with ROLE_PG_READ_ALL_STATS are allowed
- * to signal a process to return the memory contexts. This is because allowing
- * any users to issue this request at an unbounded rate would cause lots of
- * requests to be sent, which can lead to denial of service. Additional roles
- * can be permitted with GRANT.
- *
- * On receipt of this signal, a backend or an auxiliary process sets the flag
- * in the signal handler, which causes the next CHECK_FOR_INTERRUPTS()
- * or process-specific interrupt handler to copy the memory context details
- * to a dynamic shared memory space.
- *
- * We have defined a limit on DSA memory that could be allocated per process -
- * if the process has more memory contexts than what can fit in the allocated
- * size, the excess contexts are summarized and represented as cumulative total
- * at the end of the buffer.
- *
- * After sending the signal, wait on a condition variable. The publishing
- * backend, after copying the data to shared memory, sends signal on that
- * condition variable. There is one condition variable per publishing backend.
- * Once the condition variable is signalled, check if the latest memory context
- * information is available and display.
- *
- * If the publishing backend does not respond before the condition variable
- * times out, which is set to MEMSTATS_WAIT_TIMEOUT, retry given that there is
- * time left within the timeout specified by the user, before giving up and
- * returning previously published statistics, if any. If no previous statistics
- * exist, return NULL.
- */
-#define MEMSTATS_WAIT_TIMEOUT 100
-Datum
-pg_get_process_memory_contexts(PG_FUNCTION_ARGS)
-{
- int pid = PG_GETARG_INT32(0);
- bool summary = PG_GETARG_BOOL(1);
- double timeout = PG_GETARG_FLOAT8(2);
- PGPROC *proc;
- ProcNumber procNumber = INVALID_PROC_NUMBER;
- bool proc_is_aux = false;
- ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
- MemoryStatsEntry *memcxt_info;
- TimestampTz start_timestamp;
-
- /*
- * See if the process with given pid is a backend or an auxiliary process
- * and remember the type for when we requery the process later.
- */
- proc = BackendPidGetProc(pid);
- if (proc == NULL)
- {
- proc = AuxiliaryPidGetProc(pid);
- proc_is_aux = true;
- }
-
- /*
- * BackendPidGetProc() and AuxiliaryPidGetProc() return NULL if the pid
- * isn't valid; this is however not a problem and leave with a WARNING.
- * See comment in pg_log_backend_memory_contexts for a discussion on this.
- */
- if (proc == NULL)
- {
- /*
- * This is just a warning so a loop-through-resultset will not abort
- * if one backend terminated on its own during the run.
- */
- ereport(WARNING,
- errmsg("PID %d is not a PostgreSQL server process", pid));
- PG_RETURN_NULL();
- }
-
- InitMaterializedSRF(fcinfo, 0);
-
- procNumber = GetNumberFromPGProc(proc);
-
- LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE);
- memCxtState[procNumber].summary = summary;
- LWLockRelease(&memCxtState[procNumber].lw_lock);
-
- start_timestamp = GetCurrentTimestamp();
-
- /*
- * Send a signal to a PostgreSQL process, informing it we want it to
- * produce information about its memory contexts.
- */
- if (SendProcSignal(pid, PROCSIG_GET_MEMORY_CONTEXT, procNumber) < 0)
- {
- ereport(WARNING,
- errmsg("could not send signal to process %d: %m", pid));
- PG_RETURN_NULL();
- }
-
- /*
- * Even if the proc has published statistics, the may not be due to the
- * current request, but previously published stats. Check if the stats
- * are updated by comparing the timestamp, if the stats are newer than our
- * previously recorded timestamp from before sending the procsignal, they
- * must by definition be updated. Wait for the timeout specified by the
- * user, following which display old statistics if available or return
- * NULL.
- */
- while (1)
- {
- long msecs;
-
- /*
- * We expect to come out of sleep when the requested process has
- * finished publishing the statistics, verified using the valid DSA
- * pointer.
- *
- * Make sure that the information belongs to pid we requested
- * information for, Otherwise loop back and wait for the server
- * process to finish publishing statistics.
- */
- LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE);
-
- /*
- * Note in procnumber.h file says that a procNumber can be re-used for
- * a different backend immediately after a backend exits. In case an
- * old process' data was there and not updated by the current process
- * in the slot identified by the procNumber, the pid of the requested
- * process and the proc_id might not match.
- */
- if (memCxtState[procNumber].proc_id == pid)
- {
- /*
- * Break if the latest stats have been read, indicated by
- * statistics timestamp being newer than the current request
- * timestamp.
- */
- msecs = TimestampDifferenceMilliseconds(start_timestamp,
- memCxtState[procNumber].stats_timestamp);
-
- if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer)
- && msecs > 0)
- break;
- }
- LWLockRelease(&memCxtState[procNumber].lw_lock);
-
- /*
- * Recheck the state of the backend before sleeping on the condition
- * variable to ensure the process is still alive. Only check the
- * relevant process type based on the earlier PID check.
- */
- if (proc_is_aux)
- proc = AuxiliaryPidGetProc(pid);
- else
- proc = BackendPidGetProc(pid);
-
- /*
- * The process ending during memory context processing is not an
- * error.
- */
- if (proc == NULL)
- {
- ereport(WARNING,
- errmsg("PID %d is no longer a PostgreSQL server process",
- pid));
- PG_RETURN_NULL();
- }
-
- msecs = TimestampDifferenceMilliseconds(start_timestamp, GetCurrentTimestamp());
-
- /*
- * If we haven't already exceeded the timeout value, sleep for the
- * remainder of the timeout on the condition variable.
- */
- if (msecs > 0 && msecs < (timeout * 1000))
- {
- /*
- * Wait for the timeout as defined by the user. If no updated
- * statistics are available within the allowed time then display
- * previously published statistics if there are any. If no
- * previous statistics are available then return NULL. The timer
- * is defined in milliseconds since that's what the condition
- * variable sleep uses.
- */
- if (ConditionVariableTimedSleep(&memCxtState[procNumber].memcxt_cv,
- ((timeout * 1000) - msecs), WAIT_EVENT_MEM_CXT_PUBLISH))
- {
- LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE);
- /* Displaying previously published statistics if available */
- if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer))
- break;
- else
- {
- LWLockRelease(&memCxtState[procNumber].lw_lock);
- PG_RETURN_NULL();
- }
- }
- }
- else
- {
- LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE);
- /* Displaying previously published statistics if available */
- if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer))
- break;
- else
- {
- LWLockRelease(&memCxtState[procNumber].lw_lock);
- PG_RETURN_NULL();
- }
- }
- }
-
- /*
- * We should only reach here with a valid DSA handle, either containing
- * updated statistics or previously published statistics (identified by
- * the timestamp.
- */
- Assert(memCxtArea->memstats_dsa_handle != DSA_HANDLE_INVALID);
- /* Attach to the dsa area if we have not already done so */
- if (MemoryStatsDsaArea == NULL)
- {
- MemoryContext oldcontext = CurrentMemoryContext;
-
- MemoryContextSwitchTo(TopMemoryContext);
- MemoryStatsDsaArea = dsa_attach(memCxtArea->memstats_dsa_handle);
- MemoryContextSwitchTo(oldcontext);
- dsa_pin_mapping(MemoryStatsDsaArea);
- }
-
- /*
- * Backend has finished publishing the stats, project them.
- */
- memcxt_info = (MemoryStatsEntry *)
- dsa_get_address(MemoryStatsDsaArea, memCxtState[procNumber].memstats_dsa_pointer);
-
-#define PG_GET_PROCESS_MEMORY_CONTEXTS_COLS 12
- for (int i = 0; i < memCxtState[procNumber].total_stats; i++)
- {
- ArrayType *path_array;
- int path_length;
- Datum values[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS];
- bool nulls[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS];
- char *name;
- char *ident;
- Datum *path_datum = NULL;
- int *path_int = NULL;
-
- memset(values, 0, sizeof(values));
- memset(nulls, 0, sizeof(nulls));
-
- if (DsaPointerIsValid(memcxt_info[i].name))
- {
- name = (char *) dsa_get_address(MemoryStatsDsaArea, memcxt_info[i].name);
- values[0] = CStringGetTextDatum(name);
- }
- else
- nulls[0] = true;
-
- if (DsaPointerIsValid(memcxt_info[i].ident))
- {
- ident = (char *) dsa_get_address(MemoryStatsDsaArea, memcxt_info[i].ident);
- values[1] = CStringGetTextDatum(ident);
- }
- else
- nulls[1] = true;
-
- values[2] = CStringGetTextDatum(ContextTypeToString(memcxt_info[i].type));
-
- path_length = memcxt_info[i].path_length;
- path_datum = (Datum *) palloc(path_length * sizeof(Datum));
- if (DsaPointerIsValid(memcxt_info[i].path))
- {
- path_int = (int *) dsa_get_address(MemoryStatsDsaArea, memcxt_info[i].path);
- for (int j = 0; j < path_length; j++)
- path_datum[j] = Int32GetDatum(path_int[j]);
- path_array = construct_array_builtin(path_datum, path_length, INT4OID);
- values[3] = PointerGetDatum(path_array);
- }
- else
- nulls[3] = true;
-
- values[4] = Int32GetDatum(memcxt_info[i].levels);
- values[5] = Int64GetDatum(memcxt_info[i].totalspace);
- values[6] = Int64GetDatum(memcxt_info[i].nblocks);
- values[7] = Int64GetDatum(memcxt_info[i].freespace);
- values[8] = Int64GetDatum(memcxt_info[i].freechunks);
- values[9] = Int64GetDatum(memcxt_info[i].totalspace -
- memcxt_info[i].freespace);
- values[10] = Int32GetDatum(memcxt_info[i].num_agg_stats);
- values[11] = TimestampTzGetDatum(memCxtState[procNumber].stats_timestamp);
-
- tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
- values, nulls);
- }
- LWLockRelease(&memCxtState[procNumber].lw_lock);
-
- ConditionVariableCancelSleep();
-
- PG_RETURN_NULL();
-}
-
-Size
-MemoryContextReportingShmemSize(void)
-{
- Size sz = 0;
- Size TotalProcs = 0;
-
- TotalProcs = add_size(TotalProcs, NUM_AUXILIARY_PROCS);
- TotalProcs = add_size(TotalProcs, MaxBackends);
- sz = add_size(sz, mul_size(TotalProcs, sizeof(MemoryStatsBackendState)));
-
- sz = add_size(sz, sizeof(MemoryStatsCtl));
-
- return sz;
-}
-
-/*
- * Initialize shared memory for displaying memory context statistics
- */
-void
-MemoryContextReportingShmemInit(void)
-{
- bool found;
-
- memCxtArea = (MemoryStatsCtl *)
- ShmemInitStruct("MemoryStatsCtl",
- sizeof(MemoryStatsCtl), &found);
-
- if (!found)
- {
- LWLockInitialize(&memCxtArea->lw_lock, LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE);
- memCxtArea->memstats_dsa_handle = DSA_HANDLE_INVALID;
- }
-
- memCxtState = (MemoryStatsBackendState *)
- ShmemInitStruct("MemoryStatsBackendState",
- ((MaxBackends + NUM_AUXILIARY_PROCS) * sizeof(MemoryStatsBackendState)),
- &found);
-
- if (found)
- return;
-
- for (int i = 0; i < (MaxBackends + NUM_AUXILIARY_PROCS); i++)
- {
- ConditionVariableInit(&memCxtState[i].memcxt_cv);
- LWLockInitialize(&memCxtState[i].lw_lock, LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC);
- memCxtState[i].memstats_dsa_pointer = InvalidDsaPointer;
- }
-}
diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build
index 244f48f4fd7..ed9bbd7b926 100644
--- a/src/backend/utils/adt/meson.build
+++ b/src/backend/utils/adt/meson.build
@@ -12,6 +12,7 @@ backend_sources += files(
'arrayutils.c',
'ascii.c',
'bool.c',
+ 'bytea.c',
'cash.c',
'char.c',
'cryptohashfuncs.c',
diff --git a/src/backend/utils/adt/network.c b/src/backend/utils/adt/network.c
index f03fcc1147b..9fd211b2d45 100644
--- a/src/backend/utils/adt/network.c
+++ b/src/backend/utils/adt/network.c
@@ -12,8 +12,6 @@
#include <netinet/in.h>
#include <arpa/inet.h>
-#include "access/stratnum.h"
-#include "catalog/pg_opfamily.h"
#include "catalog/pg_type.h"
#include "common/hashfn.h"
#include "common/ip.h"
diff --git a/src/backend/utils/adt/network_spgist.c b/src/backend/utils/adt/network_spgist.c
index a84747d9275..602276a35c3 100644
--- a/src/backend/utils/adt/network_spgist.c
+++ b/src/backend/utils/adt/network_spgist.c
@@ -37,7 +37,6 @@
#include "catalog/pg_type.h"
#include "utils/fmgrprotos.h"
#include "utils/inet.h"
-#include "varatt.h"
static int inet_spg_node_number(const inet *val, int commonbits);
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index 40dcbc7b671..c9233565d57 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -1958,9 +1958,11 @@ generate_series_numeric_support(PG_FUNCTION_ARGS)
* in the histogram. width_bucket() returns an integer indicating the
* bucket number that 'operand' belongs to in an equiwidth histogram
* with the specified characteristics. An operand smaller than the
- * lower bound is assigned to bucket 0. An operand greater than the
- * upper bound is assigned to an additional bucket (with number
- * count+1). We don't allow "NaN" for any of the numeric arguments.
+ * lower bound is assigned to bucket 0. An operand greater than or equal
+ * to the upper bound is assigned to an additional bucket (with number
+ * count+1). We don't allow the histogram bounds to be NaN or +/- infinity,
+ * but we do allow those values for the operand (taking NaN to be larger
+ * than any other value, as we do in comparisons).
*/
Datum
width_bucket_numeric(PG_FUNCTION_ARGS)
@@ -1978,17 +1980,13 @@ width_bucket_numeric(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
errmsg("count must be greater than zero")));
- if (NUMERIC_IS_SPECIAL(operand) ||
- NUMERIC_IS_SPECIAL(bound1) ||
- NUMERIC_IS_SPECIAL(bound2))
+ if (NUMERIC_IS_SPECIAL(bound1) || NUMERIC_IS_SPECIAL(bound2))
{
- if (NUMERIC_IS_NAN(operand) ||
- NUMERIC_IS_NAN(bound1) ||
- NUMERIC_IS_NAN(bound2))
+ if (NUMERIC_IS_NAN(bound1) || NUMERIC_IS_NAN(bound2))
ereport(ERROR,
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
- errmsg("operand, lower bound, and upper bound cannot be NaN")));
- /* We allow "operand" to be infinite; cmp_numerics will cope */
+ errmsg("lower and upper bounds cannot be NaN")));
+
if (NUMERIC_IS_INF(bound1) || NUMERIC_IS_INF(bound2))
ereport(ERROR,
(errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
index a858f27cadc..97c2ac1faf9 100644
--- a/src/backend/utils/adt/pg_locale.c
+++ b/src/backend/utils/adt/pg_locale.c
@@ -41,11 +41,11 @@
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "utils/builtins.h"
-#include "utils/formatting.h"
#include "utils/guc_hooks.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/pg_locale.h"
+#include "utils/relcache.h"
#include "utils/syscache.h"
#ifdef WIN32
@@ -79,31 +79,6 @@ extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
extern char *get_collation_actual_version_libc(const char *collcollate);
-extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_builtin(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
-extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_icu(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
-extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
/* GUC settings */
char *locale_messages;
char *locale_monetary;
@@ -1092,6 +1067,9 @@ create_pg_locale(Oid collid, MemoryContext context)
Assert((result->collate_is_c && result->collate == NULL) ||
(!result->collate_is_c && result->collate != NULL));
+ Assert((result->ctype_is_c && result->ctype == NULL) ||
+ (!result->ctype_is_c && result->ctype != NULL));
+
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
&isnull);
if (!isnull)
@@ -1256,77 +1234,31 @@ size_t
pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- if (locale->provider == COLLPROVIDER_BUILTIN)
- return strlower_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- return strlower_icu(dst, dstsize, src, srclen, locale);
-#endif
- else if (locale->provider == COLLPROVIDER_LIBC)
- return strlower_libc(dst, dstsize, src, srclen, locale);
- else
- /* shouldn't happen */
- PGLOCALE_SUPPORT_ERROR(locale->provider);
-
- return 0; /* keep compiler quiet */
+ return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
}
size_t
pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- if (locale->provider == COLLPROVIDER_BUILTIN)
- return strtitle_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- return strtitle_icu(dst, dstsize, src, srclen, locale);
-#endif
- else if (locale->provider == COLLPROVIDER_LIBC)
- return strtitle_libc(dst, dstsize, src, srclen, locale);
- else
- /* shouldn't happen */
- PGLOCALE_SUPPORT_ERROR(locale->provider);
-
- return 0; /* keep compiler quiet */
+ return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
}
size_t
pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- if (locale->provider == COLLPROVIDER_BUILTIN)
- return strupper_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- return strupper_icu(dst, dstsize, src, srclen, locale);
-#endif
- else if (locale->provider == COLLPROVIDER_LIBC)
- return strupper_libc(dst, dstsize, src, srclen, locale);
- else
- /* shouldn't happen */
- PGLOCALE_SUPPORT_ERROR(locale->provider);
-
- return 0; /* keep compiler quiet */
+ return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
}
size_t
pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
- if (locale->provider == COLLPROVIDER_BUILTIN)
- return strfold_builtin(dst, dstsize, src, srclen, locale);
-#ifdef USE_ICU
- else if (locale->provider == COLLPROVIDER_ICU)
- return strfold_icu(dst, dstsize, src, srclen, locale);
-#endif
- /* for libc, just use strlower */
- else if (locale->provider == COLLPROVIDER_LIBC)
- return strlower_libc(dst, dstsize, src, srclen, locale);
+ if (locale->ctype->strfold)
+ return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
else
- /* shouldn't happen */
- PGLOCALE_SUPPORT_ERROR(locale->provider);
-
- return 0; /* keep compiler quiet */
+ return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
}
/*
@@ -1464,6 +1396,41 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
}
/*
+ * char_is_cased()
+ *
+ * Fuzzy test of whether the given char is case-varying or not. The argument
+ * is a single byte, so in a multibyte encoding, just assume any non-ASCII
+ * char is case-varying.
+ */
+bool
+char_is_cased(char ch, pg_locale_t locale)
+{
+ return locale->ctype->char_is_cased(ch, locale);
+}
+
+/*
+ * char_tolower_enabled()
+ *
+ * Does the provider support char_tolower()?
+ */
+bool
+char_tolower_enabled(pg_locale_t locale)
+{
+ return (locale->ctype->char_tolower != NULL);
+}
+
+/*
+ * char_tolower()
+ *
+ * Convert char (single-byte encoding) to lowercase.
+ */
+char
+char_tolower(unsigned char ch, pg_locale_t locale)
+{
+ return locale->ctype->char_tolower(ch, locale);
+}
+
+/*
* Return required encoding ID for the given locale, or -1 if any encoding is
* valid for the locale.
*/
diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c
index f51768830cd..0c9fbdb40f2 100644
--- a/src/backend/utils/adt/pg_locale_builtin.c
+++ b/src/backend/utils/adt/pg_locale_builtin.c
@@ -18,22 +18,12 @@
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "utils/builtins.h"
-#include "utils/memutils.h"
#include "utils/pg_locale.h"
#include "utils/syscache.h"
extern pg_locale_t create_pg_locale_builtin(Oid collid,
MemoryContext context);
extern char *get_collation_actual_version_builtin(const char *collcollate);
-extern size_t strlower_builtin(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_builtin(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_builtin(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_builtin(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
struct WordBoundaryState
{
@@ -77,7 +67,7 @@ initcap_wbnext(void *state)
return wbstate->len;
}
-size_t
+static size_t
strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -85,7 +75,7 @@ strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
locale->info.builtin.casemap_full);
}
-size_t
+static size_t
strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -103,7 +93,7 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
initcap_wbnext, &wbstate);
}
-size_t
+static size_t
strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -111,7 +101,7 @@ strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
locale->info.builtin.casemap_full);
}
-size_t
+static size_t
strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -119,6 +109,98 @@ strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
locale->info.builtin.casemap_full);
}
+static bool
+wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isdigit(wc, !locale->info.builtin.casemap_full);
+}
+
+static bool
+wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isalpha(wc);
+}
+
+static bool
+wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isalnum(wc, !locale->info.builtin.casemap_full);
+}
+
+static bool
+wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isupper(wc);
+}
+
+static bool
+wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_islower(wc);
+}
+
+static bool
+wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isgraph(wc);
+}
+
+static bool
+wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isprint(wc);
+}
+
+static bool
+wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_ispunct(wc, !locale->info.builtin.casemap_full);
+}
+
+static bool
+wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return pg_u_isspace(wc);
+}
+
+static bool
+char_is_cased_builtin(char ch, pg_locale_t locale)
+{
+ return IS_HIGHBIT_SET(ch) ||
+ (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
+}
+
+static pg_wchar
+wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return unicode_uppercase_simple(wc);
+}
+
+static pg_wchar
+wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
+{
+ return unicode_lowercase_simple(wc);
+}
+
+static const struct ctype_methods ctype_methods_builtin = {
+ .strlower = strlower_builtin,
+ .strtitle = strtitle_builtin,
+ .strupper = strupper_builtin,
+ .strfold = strfold_builtin,
+ .wc_isdigit = wc_isdigit_builtin,
+ .wc_isalpha = wc_isalpha_builtin,
+ .wc_isalnum = wc_isalnum_builtin,
+ .wc_isupper = wc_isupper_builtin,
+ .wc_islower = wc_islower_builtin,
+ .wc_isgraph = wc_isgraph_builtin,
+ .wc_isprint = wc_isprint_builtin,
+ .wc_ispunct = wc_ispunct_builtin,
+ .wc_isspace = wc_isspace_builtin,
+ .char_is_cased = char_is_cased_builtin,
+ .wc_tolower = wc_tolower_builtin,
+ .wc_toupper = wc_toupper_builtin,
+};
+
pg_locale_t
create_pg_locale_builtin(Oid collid, MemoryContext context)
{
@@ -158,10 +240,11 @@ create_pg_locale_builtin(Oid collid, MemoryContext context)
result->info.builtin.locale = MemoryContextStrdup(context, locstr);
result->info.builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0);
- result->provider = COLLPROVIDER_BUILTIN;
result->deterministic = true;
result->collate_is_c = true;
result->ctype_is_c = (strcmp(locstr, "C") == 0);
+ if (!result->ctype_is_c)
+ result->ctype = &ctype_methods_builtin;
return result;
}
diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c
index a32c32a0744..96741e08269 100644
--- a/src/backend/utils/adt/pg_locale_icu.c
+++ b/src/backend/utils/adt/pg_locale_icu.c
@@ -48,19 +48,22 @@
#define TEXTBUFLEN 1024
extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
-extern size_t strlower_icu(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_icu(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_icu(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strfold_icu(char *dest, size_t destsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
#ifdef USE_ICU
extern UCollator *pg_ucol_open(const char *loc_str);
+static size_t strlower_icu(char *dest, size_t destsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+static size_t strtitle_icu(char *dest, size_t destsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+static size_t strupper_icu(char *dest, size_t destsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+static size_t strfold_icu(char *dest, size_t destsize, const char *src,
+ ssize_t srclen, pg_locale_t locale);
+static int strncoll_icu(const char *arg1, ssize_t len1,
+ const char *arg2, ssize_t len2,
+ pg_locale_t locale);
static size_t strnxfrm_icu(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
@@ -118,6 +121,25 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
const char *locale,
UErrorCode *pErrorCode);
+static bool
+char_is_cased_icu(char ch, pg_locale_t locale)
+{
+ return IS_HIGHBIT_SET(ch) ||
+ (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
+}
+
+static pg_wchar
+toupper_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_toupper(wc);
+}
+
+static pg_wchar
+tolower_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_tolower(wc);
+}
+
static const struct collate_methods collate_methods_icu = {
.strncoll = strncoll_icu,
.strnxfrm = strnxfrm_icu,
@@ -136,6 +158,78 @@ static const struct collate_methods collate_methods_icu_utf8 = {
.strxfrm_is_safe = true,
};
+static bool
+wc_isdigit_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isdigit(wc);
+}
+
+static bool
+wc_isalpha_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isalpha(wc);
+}
+
+static bool
+wc_isalnum_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isalnum(wc);
+}
+
+static bool
+wc_isupper_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isupper(wc);
+}
+
+static bool
+wc_islower_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_islower(wc);
+}
+
+static bool
+wc_isgraph_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isgraph(wc);
+}
+
+static bool
+wc_isprint_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isprint(wc);
+}
+
+static bool
+wc_ispunct_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_ispunct(wc);
+}
+
+static bool
+wc_isspace_icu(pg_wchar wc, pg_locale_t locale)
+{
+ return u_isspace(wc);
+}
+
+static const struct ctype_methods ctype_methods_icu = {
+ .strlower = strlower_icu,
+ .strtitle = strtitle_icu,
+ .strupper = strupper_icu,
+ .strfold = strfold_icu,
+ .wc_isdigit = wc_isdigit_icu,
+ .wc_isalpha = wc_isalpha_icu,
+ .wc_isalnum = wc_isalnum_icu,
+ .wc_isupper = wc_isupper_icu,
+ .wc_islower = wc_islower_icu,
+ .wc_isgraph = wc_isgraph_icu,
+ .wc_isprint = wc_isprint_icu,
+ .wc_ispunct = wc_ispunct_icu,
+ .wc_isspace = wc_isspace_icu,
+ .char_is_cased = char_is_cased_icu,
+ .wc_toupper = toupper_icu,
+ .wc_tolower = tolower_icu,
+};
#endif
pg_locale_t
@@ -198,7 +292,6 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
result->info.icu.locale = MemoryContextStrdup(context, iculocstr);
result->info.icu.ucol = collator;
- result->provider = COLLPROVIDER_ICU;
result->deterministic = deterministic;
result->collate_is_c = false;
result->ctype_is_c = false;
@@ -206,6 +299,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
result->collate = &collate_methods_icu_utf8;
else
result->collate = &collate_methods_icu;
+ result->ctype = &ctype_methods_icu;
return result;
#else
@@ -379,7 +473,7 @@ make_icu_collator(const char *iculocstr, const char *icurules)
}
}
-size_t
+static size_t
strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -399,7 +493,7 @@ strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
return result_len;
}
-size_t
+static size_t
strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -419,7 +513,7 @@ strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
return result_len;
}
-size_t
+static size_t
strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -439,7 +533,7 @@ strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
return result_len;
}
-size_t
+static size_t
strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
@@ -474,8 +568,6 @@ strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2
int result;
UErrorCode status;
- Assert(locale->provider == COLLPROVIDER_ICU);
-
Assert(GetDatabaseEncoding() == PG_UTF8);
status = U_ZERO_ERROR;
@@ -503,8 +595,6 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
size_t uchar_bsize;
Size result_bsize;
- Assert(locale->provider == COLLPROVIDER_ICU);
-
init_icu_converter();
ulen = uchar_length(icu_converter, src, srclen);
@@ -549,8 +639,6 @@ strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
uint32_t state[2];
UErrorCode status;
- Assert(locale->provider == COLLPROVIDER_ICU);
-
Assert(GetDatabaseEncoding() == PG_UTF8);
uiter_setUTF8(&iter, src, srclen);
@@ -749,8 +837,6 @@ strncoll_icu(const char *arg1, ssize_t len1,
*uchar2;
int result;
- Assert(locale->provider == COLLPROVIDER_ICU);
-
/* if encoding is UTF8, use more efficient strncoll_icu_utf8 */
#ifdef HAVE_UCOL_STRCOLLUTF8
Assert(GetDatabaseEncoding() != PG_UTF8);
@@ -799,8 +885,6 @@ strnxfrm_prefix_icu(char *dest, size_t destsize,
size_t uchar_bsize;
Size result_bsize;
- Assert(locale->provider == COLLPROVIDER_ICU);
-
/* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */
Assert(GetDatabaseEncoding() != PG_UTF8);
diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c
index 199857e22db..e9f9fc1e369 100644
--- a/src/backend/utils/adt/pg_locale_libc.c
+++ b/src/backend/utils/adt/pg_locale_libc.c
@@ -34,6 +34,46 @@
#endif
/*
+ * For the libc provider, to provide as much functionality as possible on a
+ * variety of platforms without going so far as to implement everything from
+ * scratch, we use several implementation strategies depending on the
+ * situation:
+ *
+ * 1. In C/POSIX collations, we use hard-wired code. We can't depend on
+ * the <ctype.h> functions since those will obey LC_CTYPE. Note that these
+ * collations don't give a fig about multibyte characters.
+ *
+ * 2. When working in UTF8 encoding, we use the <wctype.h> functions.
+ * This assumes that every platform uses Unicode codepoints directly
+ * as the wchar_t representation of Unicode. (XXX: ICU makes this assumption
+ * even for non-UTF8 encodings, which may be a problem.) On some platforms
+ * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
+ *
+ * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
+ * values up to 255, and punt for values above that. This is 100% correct
+ * only in single-byte encodings such as LATINn. However, non-Unicode
+ * multibyte encodings are mostly Far Eastern character sets for which the
+ * properties being tested here aren't very relevant for higher code values
+ * anyway. The difficulty with using the <wctype.h> functions with
+ * non-Unicode multibyte encodings is that we can have no certainty that
+ * the platform's wchar_t representation matches what we do in pg_wchar
+ * conversions.
+ *
+ * As a special case, in the "default" collation, (2) and (3) force ASCII
+ * letters to follow ASCII upcase/downcase rules, while in a non-default
+ * collation we just let the library functions do what they will. The case
+ * where this matters is treatment of I/i in Turkish, and the behavior is
+ * meant to match the upper()/lower() SQL functions.
+ *
+ * We store the active collation setting in static variables. In principle
+ * it could be passed down to here via the regex library's "struct vars" data
+ * structure; but that would require somewhat invasive changes in the regex
+ * library, and right now there's no real benefit to be gained from that.
+ *
+ * NB: the coding here assumes pg_wchar is an unsigned type.
+ */
+
+/*
* Size of stack buffer to use for string transformations, used to avoid heap
* allocations in typical cases. This should be large enough that most strings
* will fit, but small enough that we feel comfortable putting it on the
@@ -43,13 +83,6 @@
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
-extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale);
-
static int strncoll_libc(const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2,
pg_locale_t locale);
@@ -85,6 +118,251 @@ static size_t strupper_libc_mb(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
+static bool
+wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isdigit_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isalpha_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isalnum_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isupper_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return islower_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isgraph_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isprint_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return ispunct_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ return isspace_l((unsigned char) wc, locale->info.lt);
+}
+
+static bool
+wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswdigit_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswalpha_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswalnum_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswupper_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswlower_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswgraph_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswprint_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswpunct_l((wint_t) wc, locale->info.lt);
+}
+
+static bool
+wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ return iswspace_l((wint_t) wc, locale->info.lt);
+}
+
+static char
+char_tolower_libc(unsigned char ch, pg_locale_t locale)
+{
+ Assert(pg_database_encoding_max_length() == 1);
+ return tolower_l(ch, locale->info.lt);
+}
+
+static bool
+char_is_cased_libc(char ch, pg_locale_t locale)
+{
+ bool is_multibyte = pg_database_encoding_max_length() > 1;
+
+ if (is_multibyte && IS_HIGHBIT_SET(ch))
+ return true;
+ else
+ return isalpha_l((unsigned char) ch, locale->info.lt);
+}
+
+static pg_wchar
+toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ Assert(GetDatabaseEncoding() != PG_UTF8);
+
+ /* force C behavior for ASCII characters, per comments above */
+ if (locale->is_default && wc <= (pg_wchar) 127)
+ return pg_ascii_toupper((unsigned char) wc);
+ if (wc <= (pg_wchar) UCHAR_MAX)
+ return toupper_l((unsigned char) wc, locale->info.lt);
+ else
+ return wc;
+}
+
+static pg_wchar
+toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ Assert(GetDatabaseEncoding() == PG_UTF8);
+
+ /* force C behavior for ASCII characters, per comments above */
+ if (locale->is_default && wc <= (pg_wchar) 127)
+ return pg_ascii_toupper((unsigned char) wc);
+ if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
+ return towupper_l((wint_t) wc, locale->info.lt);
+ else
+ return wc;
+}
+
+static pg_wchar
+tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
+{
+ Assert(GetDatabaseEncoding() != PG_UTF8);
+
+ /* force C behavior for ASCII characters, per comments above */
+ if (locale->is_default && wc <= (pg_wchar) 127)
+ return pg_ascii_tolower((unsigned char) wc);
+ if (wc <= (pg_wchar) UCHAR_MAX)
+ return tolower_l((unsigned char) wc, locale->info.lt);
+ else
+ return wc;
+}
+
+static pg_wchar
+tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
+{
+ Assert(GetDatabaseEncoding() == PG_UTF8);
+
+ /* force C behavior for ASCII characters, per comments above */
+ if (locale->is_default && wc <= (pg_wchar) 127)
+ return pg_ascii_tolower((unsigned char) wc);
+ if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
+ return towlower_l((wint_t) wc, locale->info.lt);
+ else
+ return wc;
+}
+
+static const struct ctype_methods ctype_methods_libc_sb = {
+ .strlower = strlower_libc_sb,
+ .strtitle = strtitle_libc_sb,
+ .strupper = strupper_libc_sb,
+ .wc_isdigit = wc_isdigit_libc_sb,
+ .wc_isalpha = wc_isalpha_libc_sb,
+ .wc_isalnum = wc_isalnum_libc_sb,
+ .wc_isupper = wc_isupper_libc_sb,
+ .wc_islower = wc_islower_libc_sb,
+ .wc_isgraph = wc_isgraph_libc_sb,
+ .wc_isprint = wc_isprint_libc_sb,
+ .wc_ispunct = wc_ispunct_libc_sb,
+ .wc_isspace = wc_isspace_libc_sb,
+ .char_is_cased = char_is_cased_libc,
+ .char_tolower = char_tolower_libc,
+ .wc_toupper = toupper_libc_sb,
+ .wc_tolower = tolower_libc_sb,
+ .max_chr = UCHAR_MAX,
+};
+
+/*
+ * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but
+ * single-byte semantics for pattern matching.
+ */
+static const struct ctype_methods ctype_methods_libc_other_mb = {
+ .strlower = strlower_libc_mb,
+ .strtitle = strtitle_libc_mb,
+ .strupper = strupper_libc_mb,
+ .wc_isdigit = wc_isdigit_libc_sb,
+ .wc_isalpha = wc_isalpha_libc_sb,
+ .wc_isalnum = wc_isalnum_libc_sb,
+ .wc_isupper = wc_isupper_libc_sb,
+ .wc_islower = wc_islower_libc_sb,
+ .wc_isgraph = wc_isgraph_libc_sb,
+ .wc_isprint = wc_isprint_libc_sb,
+ .wc_ispunct = wc_ispunct_libc_sb,
+ .wc_isspace = wc_isspace_libc_sb,
+ .char_is_cased = char_is_cased_libc,
+ .char_tolower = char_tolower_libc,
+ .wc_toupper = toupper_libc_sb,
+ .wc_tolower = tolower_libc_sb,
+ .max_chr = UCHAR_MAX,
+};
+
+static const struct ctype_methods ctype_methods_libc_utf8 = {
+ .strlower = strlower_libc_mb,
+ .strtitle = strtitle_libc_mb,
+ .strupper = strupper_libc_mb,
+ .wc_isdigit = wc_isdigit_libc_mb,
+ .wc_isalpha = wc_isalpha_libc_mb,
+ .wc_isalnum = wc_isalnum_libc_mb,
+ .wc_isupper = wc_isupper_libc_mb,
+ .wc_islower = wc_islower_libc_mb,
+ .wc_isgraph = wc_isgraph_libc_mb,
+ .wc_isprint = wc_isprint_libc_mb,
+ .wc_ispunct = wc_ispunct_libc_mb,
+ .wc_isspace = wc_isspace_libc_mb,
+ .char_is_cased = char_is_cased_libc,
+ .char_tolower = char_tolower_libc,
+ .wc_toupper = toupper_libc_mb,
+ .wc_tolower = tolower_libc_mb,
+};
+
static const struct collate_methods collate_methods_libc = {
.strncoll = strncoll_libc,
.strnxfrm = strnxfrm_libc,
@@ -119,36 +397,6 @@ static const struct collate_methods collate_methods_libc_win32_utf8 = {
};
#endif
-size_t
-strlower_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale)
-{
- if (pg_database_encoding_max_length() > 1)
- return strlower_libc_mb(dst, dstsize, src, srclen, locale);
- else
- return strlower_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
-size_t
-strtitle_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale)
-{
- if (pg_database_encoding_max_length() > 1)
- return strtitle_libc_mb(dst, dstsize, src, srclen, locale);
- else
- return strtitle_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
-size_t
-strupper_libc(char *dst, size_t dstsize, const char *src,
- ssize_t srclen, pg_locale_t locale)
-{
- if (pg_database_encoding_max_length() > 1)
- return strupper_libc_mb(dst, dstsize, src, srclen, locale);
- else
- return strupper_libc_sb(dst, dstsize, src, srclen, locale);
-}
-
static size_t
strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
@@ -465,7 +713,6 @@ create_pg_locale_libc(Oid collid, MemoryContext context)
loc = make_libc_collator(collate, ctype);
result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
- result->provider = COLLPROVIDER_LIBC;
result->deterministic = true;
result->collate_is_c = (strcmp(collate, "C") == 0) ||
(strcmp(collate, "POSIX") == 0);
@@ -481,6 +728,15 @@ create_pg_locale_libc(Oid collid, MemoryContext context)
#endif
result->collate = &collate_methods_libc;
}
+ if (!result->ctype_is_c)
+ {
+ if (GetDatabaseEncoding() == PG_UTF8)
+ result->ctype = &ctype_methods_libc_utf8;
+ else if (pg_database_encoding_max_length() > 1)
+ result->ctype = &ctype_methods_libc_other_mb;
+ else
+ result->ctype = &ctype_methods_libc_sb;
+ }
return result;
}
@@ -576,8 +832,6 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
const char *arg2n;
int result;
- Assert(locale->provider == COLLPROVIDER_LIBC);
-
if (bufsize1 + bufsize2 > TEXTBUFLEN)
buf = palloc(bufsize1 + bufsize2);
@@ -632,8 +886,6 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
size_t bufsize = srclen + 1;
size_t result;
- Assert(locale->provider == COLLPROVIDER_LIBC);
-
if (srclen == -1)
return strxfrm_l(dest, src, destsize, locale->info.lt);
@@ -742,7 +994,6 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
int r;
int result;
- Assert(locale->provider == COLLPROVIDER_LIBC);
Assert(GetDatabaseEncoding() == PG_UTF8);
if (len1 == -1)
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 97af7c6554f..1c12ddbae49 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -640,10 +640,10 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
values[28] = BoolGetDatum(false); /* GSS credentials not
* delegated */
}
- if (beentry->st_query_id == 0)
+ if (beentry->st_query_id == INT64CONST(0))
nulls[30] = true;
else
- values[30] = UInt64GetDatum(beentry->st_query_id);
+ values[30] = Int64GetDatum(beentry->st_query_id);
}
else
{
@@ -1510,7 +1510,7 @@ pg_stat_io_build_tuples(ReturnSetInfo *rsinfo,
bktype_stats->bytes[io_obj][io_context][io_op];
/* Convert to numeric */
- snprintf(buf, sizeof buf, UINT64_FORMAT, byte);
+ snprintf(buf, sizeof buf, INT64_FORMAT, byte);
values[byte_idx] = DirectFunctionCall3(numeric_in,
CStringGetDatum(buf),
ObjectIdGetDatum(0),
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index edee1f7880b..6e2864cbbda 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -773,8 +773,11 @@ similar_escape_internal(text *pat_text, text *esc_text)
int plen,
elen;
bool afterescape = false;
- bool incharclass = false;
int nquotes = 0;
+ int charclass_depth = 0; /* Nesting level of character classes,
+ * encompassed by square brackets */
+ int charclass_start = 0; /* State of the character class start,
+ * for carets */
p = VARDATA_ANY(pat_text);
plen = VARSIZE_ANY_EXHDR(pat_text);
@@ -904,7 +907,7 @@ similar_escape_internal(text *pat_text, text *esc_text)
/* fast path */
if (afterescape)
{
- if (pchar == '"' && !incharclass) /* escape-double-quote? */
+ if (pchar == '"' && charclass_depth < 1) /* escape-double-quote? */
{
/* emit appropriate part separator, per notes above */
if (nquotes == 0)
@@ -953,18 +956,41 @@ similar_escape_internal(text *pat_text, text *esc_text)
/* SQL escape character; do not send to output */
afterescape = true;
}
- else if (incharclass)
+ else if (charclass_depth > 0)
{
if (pchar == '\\')
*r++ = '\\';
*r++ = pchar;
- if (pchar == ']')
- incharclass = false;
+
+ /*
+ * Ignore a closing bracket at the start of a character class.
+ * Such a bracket is taken literally rather than closing the
+ * class. "charclass_start" is 1 right at the beginning of a
+ * class and 2 after an initial caret.
+ */
+ if (pchar == ']' && charclass_start > 2)
+ charclass_depth--;
+ else if (pchar == '[')
+ charclass_depth++;
+
+ /*
+ * If there is a caret right after the opening bracket, it negates
+ * the character class, but a following closing bracket should
+ * still be treated as a normal character. That holds only for
+ * the first caret, so only the values 1 and 2 mean that closing
+ * brackets should be taken literally.
+ */
+ if (pchar == '^')
+ charclass_start++;
+ else
+ charclass_start = 3; /* definitely past the start */
}
else if (pchar == '[')
{
+ /* start of a character class */
*r++ = pchar;
- incharclass = true;
+ charclass_depth++;
+ charclass_start = 1;
}
else if (pchar == '%')
{
diff --git a/src/backend/utils/adt/regproc.c b/src/backend/utils/adt/regproc.c
index 5ee608a2b39..b8bbe95e82e 100644
--- a/src/backend/utils/adt/regproc.c
+++ b/src/backend/utils/adt/regproc.c
@@ -30,6 +30,7 @@
#include "catalog/pg_ts_config.h"
#include "catalog/pg_ts_dict.h"
#include "catalog/pg_type.h"
+#include "commands/dbcommands.h"
#include "lib/stringinfo.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
@@ -1764,6 +1765,123 @@ regnamespacesend(PG_FUNCTION_ARGS)
}
/*
+ * regdatabasein - converts database name to database OID
+ *
+ * We also accept a numeric OID, for symmetry with the output routine.
+ *
+ * '-' signifies unknown (OID 0). In all other cases, the input must
+ * match an existing pg_database entry.
+ */
+Datum
+regdatabasein(PG_FUNCTION_ARGS)
+{
+ char *db_name_or_oid = PG_GETARG_CSTRING(0);
+ Node *escontext = fcinfo->context;
+ Oid result;
+ List *names;
+
+ /* Handle "-" or numeric OID */
+ if (parseDashOrOid(db_name_or_oid, &result, escontext))
+ PG_RETURN_OID(result);
+
+ /* The rest of this wouldn't work in bootstrap mode */
+ if (IsBootstrapProcessingMode())
+ elog(ERROR, "regdatabase values must be OIDs in bootstrap mode");
+
+ /* Normal case: see if the name matches any pg_database entry. */
+ names = stringToQualifiedNameList(db_name_or_oid, escontext);
+ if (names == NIL)
+ PG_RETURN_NULL();
+
+ if (list_length(names) != 1)
+ ereturn(escontext, (Datum) 0,
+ (errcode(ERRCODE_INVALID_NAME),
+ errmsg("invalid name syntax")));
+
+ result = get_database_oid(strVal(linitial(names)), true);
+
+ if (!OidIsValid(result))
+ ereturn(escontext, (Datum) 0,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("database \"%s\" does not exist",
+ strVal(linitial(names)))));
+
+ PG_RETURN_OID(result);
+}
+
+/*
+ * to_regdatabase - converts database name to database OID
+ *
+ * If the name is not found, we return NULL.
+ */
+Datum
+to_regdatabase(PG_FUNCTION_ARGS)
+{
+ char *db_name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ Datum result;
+ ErrorSaveContext escontext = {T_ErrorSaveContext};
+
+ if (!DirectInputFunctionCallSafe(regdatabasein, db_name,
+ InvalidOid, -1,
+ (Node *) &escontext,
+ &result))
+ PG_RETURN_NULL();
+ PG_RETURN_DATUM(result);
+}
+
+/*
+ * regdatabaseout - converts database OID to database name
+ */
+Datum
+regdatabaseout(PG_FUNCTION_ARGS)
+{
+ Oid dboid = PG_GETARG_OID(0);
+ char *result;
+
+ if (dboid == InvalidOid)
+ {
+ result = pstrdup("-");
+ PG_RETURN_CSTRING(result);
+ }
+
+ result = get_database_name(dboid);
+
+ if (result)
+ {
+ /* pstrdup is not really necessary, but it avoids a compiler warning */
+ result = pstrdup(quote_identifier(result));
+ }
+ else
+ {
+ /* If OID doesn't match any database, return it numerically */
+ result = (char *) palloc(NAMEDATALEN);
+ snprintf(result, NAMEDATALEN, "%u", dboid);
+ }
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * regdatabaserecv - converts external binary format to regdatabase
+ */
+Datum
+regdatabaserecv(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidrecv, so share code */
+ return oidrecv(fcinfo);
+}
+
+/*
+ * regdatabasesend - converts regdatabase to binary format
+ */
+Datum
+regdatabasesend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidsend, so share code */
+ return oidsend(fcinfo);
+}
+
+/*
* text_regclass: convert text to regclass
*
* This could be replaced by CoerceViaIO, except that we need to treat
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
index 6239900fa28..059fc5ebf60 100644
--- a/src/backend/utils/adt/ri_triggers.c
+++ b/src/backend/utils/adt/ri_triggers.c
@@ -30,7 +30,6 @@
#include "access/xact.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_constraint.h"
-#include "catalog/pg_proc.h"
#include "commands/trigger.h"
#include "executor/executor.h"
#include "executor/spi.h"
@@ -46,7 +45,6 @@
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
-#include "utils/rangetypes.h"
#include "utils/rel.h"
#include "utils/rls.h"
#include "utils/ruleutils.h"
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 467b08198b8..3d6e6bdbfd2 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -5956,9 +5956,19 @@ get_select_query_def(Query *query, deparse_context *context)
{
if (query->limitOption == LIMIT_OPTION_WITH_TIES)
{
+ /*
+ * The limitCount arg is a c_expr, so it needs parens. Simple
+ * literals and function expressions would not need parens, but
+ * unfortunately it's hard to tell if the expression will be
+ * printed as a simple literal like 123 or as a typecast
+ * expression, like '-123'::int4. The grammar accepts the former
+ * without quoting, but not the latter.
+ */
appendContextKeyword(context, " FETCH FIRST ",
-PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
+ appendStringInfoChar(buf, '(');
get_rule_expr(query->limitCount, context, false);
+ appendStringInfoChar(buf, ')');
appendStringInfoString(buf, " ROWS WITH TIES");
}
else
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index a96b1b9c0bc..ce6a626eba2 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -103,7 +103,6 @@
#include "access/table.h"
#include "access/tableam.h"
#include "access/visibilitymap.h"
-#include "catalog/pg_am.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_statistic.h"
@@ -4620,6 +4619,7 @@ convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue,
case REGDICTIONARYOID:
case REGROLEOID:
case REGNAMESPACEOID:
+ case REGDATABASEOID:
*scaledvalue = convert_numeric_to_scalar(value, valuetypid,
&failure);
*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid,
@@ -4752,6 +4752,7 @@ convert_numeric_to_scalar(Datum value, Oid typid, bool *failure)
case REGDICTIONARYOID:
case REGROLEOID:
case REGNAMESPACEOID:
+ case REGDATABASEOID:
/* we can treat OIDs as integers... */
return (double) DatumGetObjectId(value);
}
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 347089b7626..0a5848a4ab2 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -6477,7 +6477,7 @@ timestamp2timestamptz_opt_overflow(Timestamp timestamp, int *overflow)
if (TIMESTAMP_NOT_FINITE(timestamp))
return timestamp;
- /* We don't expect this to fail, but check it pro forma */
+ /* timestamp2tm should not fail on valid timestamps, but cope */
if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) == 0)
{
tz = DetermineTimeZoneOffset(tm, session_timezone);
@@ -6485,23 +6485,22 @@ timestamp2timestamptz_opt_overflow(Timestamp timestamp, int *overflow)
result = dt2local(timestamp, -tz);
if (IS_VALID_TIMESTAMP(result))
- {
return result;
+ }
+
+ if (overflow)
+ {
+ if (timestamp < 0)
+ {
+ *overflow = -1;
+ TIMESTAMP_NOBEGIN(result);
}
- else if (overflow)
+ else
{
- if (result < MIN_TIMESTAMP)
- {
- *overflow = -1;
- TIMESTAMP_NOBEGIN(result);
- }
- else
- {
- *overflow = 1;
- TIMESTAMP_NOEND(result);
- }
- return result;
+ *overflow = 1;
+ TIMESTAMP_NOEND(result);
}
+ return result;
}
ereport(ERROR,
@@ -6531,27 +6530,81 @@ timestamptz_timestamp(PG_FUNCTION_ARGS)
PG_RETURN_TIMESTAMP(timestamptz2timestamp(timestamp));
}
+/*
+ * Convert timestamptz to timestamp, throwing error for overflow.
+ */
static Timestamp
timestamptz2timestamp(TimestampTz timestamp)
{
+ return timestamptz2timestamp_opt_overflow(timestamp, NULL);
+}
+
+/*
+ * Convert timestamp with time zone to timestamp.
+ *
+ * On successful conversion, *overflow is set to zero if it's not NULL.
+ *
+ * If the timestamptz is finite but out of the valid range for timestamp, then:
+ * if overflow is NULL, we throw an out-of-range error.
+ * if overflow is not NULL, we store +1 or -1 there to indicate the sign
+ * of the overflow, and return the appropriate timestamp infinity.
+ */
+Timestamp
+timestamptz2timestamp_opt_overflow(TimestampTz timestamp, int *overflow)
+{
Timestamp result;
struct pg_tm tt,
*tm = &tt;
fsec_t fsec;
int tz;
+ if (overflow)
+ *overflow = 0;
+
if (TIMESTAMP_NOT_FINITE(timestamp))
result = timestamp;
else
{
if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0)
+ {
+ if (overflow)
+ {
+ if (timestamp < 0)
+ {
+ *overflow = -1;
+ TIMESTAMP_NOBEGIN(result);
+ }
+ else
+ {
+ *overflow = 1;
+ TIMESTAMP_NOEND(result);
+ }
+ return result;
+ }
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
+ }
if (tm2timestamp(tm, fsec, NULL, &result) != 0)
+ {
+ if (overflow)
+ {
+ if (timestamp < 0)
+ {
+ *overflow = -1;
+ TIMESTAMP_NOBEGIN(result);
+ }
+ else
+ {
+ *overflow = 1;
+ TIMESTAMP_NOEND(result);
+ }
+ return result;
+ }
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
+ }
}
return result;
}
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 3e4d5568bde..ffae8c23abf 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -35,7 +35,6 @@
#include "port/pg_bswap.h"
#include "regex/regex.h"
#include "utils/builtins.h"
-#include "utils/bytea.h"
#include "utils/guc.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
@@ -43,10 +42,6 @@
#include "utils/sortsupport.h"
#include "utils/varlena.h"
-
-/* GUC variable */
-int bytea_output = BYTEA_OUTPUT_HEX;
-
typedef struct varlena VarString;
/*
@@ -148,12 +143,6 @@ static int text_position_get_match_pos(TextPositionState *state);
static void text_position_cleanup(TextPositionState *state);
static void check_collation_set(Oid collid);
static int text_cmp(text *arg1, text *arg2, Oid collid);
-static bytea *bytea_catenate(bytea *t1, bytea *t2);
-static bytea *bytea_substring(Datum str,
- int S,
- int L,
- bool length_not_specified);
-static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
static void appendStringInfoText(StringInfo str, const text *t);
static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate);
static void split_text_accum_result(SplitTextOutputData *tstate,
@@ -279,307 +268,6 @@ text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
* USER I/O ROUTINES *
*****************************************************************************/
-
-#define VAL(CH) ((CH) - '0')
-#define DIG(VAL) ((VAL) + '0')
-
-/*
- * byteain - converts from printable representation of byte array
- *
- * Non-printable characters must be passed as '\nnn' (octal) and are
- * converted to internal form. '\' must be passed as '\\'.
- * ereport(ERROR, ...) if bad form.
- *
- * BUGS:
- * The input is scanned twice.
- * The error checking of input is minimal.
- */
-Datum
-byteain(PG_FUNCTION_ARGS)
-{
- char *inputText = PG_GETARG_CSTRING(0);
- Node *escontext = fcinfo->context;
- char *tp;
- char *rp;
- int bc;
- bytea *result;
-
- /* Recognize hex input */
- if (inputText[0] == '\\' && inputText[1] == 'x')
- {
- size_t len = strlen(inputText);
-
- bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
- result = palloc(bc);
- bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result),
- escontext);
- SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
-
- PG_RETURN_BYTEA_P(result);
- }
-
- /* Else, it's the traditional escaped style */
- for (bc = 0, tp = inputText; *tp != '\0'; bc++)
- {
- if (tp[0] != '\\')
- tp++;
- else if ((tp[0] == '\\') &&
- (tp[1] >= '0' && tp[1] <= '3') &&
- (tp[2] >= '0' && tp[2] <= '7') &&
- (tp[3] >= '0' && tp[3] <= '7'))
- tp += 4;
- else if ((tp[0] == '\\') &&
- (tp[1] == '\\'))
- tp += 2;
- else
- {
- /*
- * one backslash, not followed by another or ### valid octal
- */
- ereturn(escontext, (Datum) 0,
- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
- errmsg("invalid input syntax for type %s", "bytea")));
- }
- }
-
- bc += VARHDRSZ;
-
- result = (bytea *) palloc(bc);
- SET_VARSIZE(result, bc);
-
- tp = inputText;
- rp = VARDATA(result);
- while (*tp != '\0')
- {
- if (tp[0] != '\\')
- *rp++ = *tp++;
- else if ((tp[0] == '\\') &&
- (tp[1] >= '0' && tp[1] <= '3') &&
- (tp[2] >= '0' && tp[2] <= '7') &&
- (tp[3] >= '0' && tp[3] <= '7'))
- {
- bc = VAL(tp[1]);
- bc <<= 3;
- bc += VAL(tp[2]);
- bc <<= 3;
- *rp++ = bc + VAL(tp[3]);
-
- tp += 4;
- }
- else if ((tp[0] == '\\') &&
- (tp[1] == '\\'))
- {
- *rp++ = '\\';
- tp += 2;
- }
- else
- {
- /*
- * We should never get here. The first pass should not allow it.
- */
- ereturn(escontext, (Datum) 0,
- (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
- errmsg("invalid input syntax for type %s", "bytea")));
- }
- }
-
- PG_RETURN_BYTEA_P(result);
-}
-
-/*
- * byteaout - converts to printable representation of byte array
- *
- * In the traditional escaped format, non-printable characters are
- * printed as '\nnn' (octal) and '\' as '\\'.
- */
-Datum
-byteaout(PG_FUNCTION_ARGS)
-{
- bytea *vlena = PG_GETARG_BYTEA_PP(0);
- char *result;
- char *rp;
-
- if (bytea_output == BYTEA_OUTPUT_HEX)
- {
- /* Print hex format */
- rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
- *rp++ = '\\';
- *rp++ = 'x';
- rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
- }
- else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
- {
- /* Print traditional escaped format */
- char *vp;
- uint64 len;
- int i;
-
- len = 1; /* empty string has 1 char */
- vp = VARDATA_ANY(vlena);
- for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
- {
- if (*vp == '\\')
- len += 2;
- else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
- len += 4;
- else
- len++;
- }
-
- /*
- * In principle len can't overflow uint32 if the input fit in 1GB, but
- * for safety let's check rather than relying on palloc's internal
- * check.
- */
- if (len > MaxAllocSize)
- ereport(ERROR,
- (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
- errmsg_internal("result of bytea output conversion is too large")));
- rp = result = (char *) palloc(len);
-
- vp = VARDATA_ANY(vlena);
- for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
- {
- if (*vp == '\\')
- {
- *rp++ = '\\';
- *rp++ = '\\';
- }
- else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
- {
- int val; /* holds unprintable chars */
-
- val = *vp;
- rp[0] = '\\';
- rp[3] = DIG(val & 07);
- val >>= 3;
- rp[2] = DIG(val & 07);
- val >>= 3;
- rp[1] = DIG(val & 03);
- rp += 4;
- }
- else
- *rp++ = *vp;
- }
- }
- else
- {
- elog(ERROR, "unrecognized \"bytea_output\" setting: %d",
- bytea_output);
- rp = result = NULL; /* keep compiler quiet */
- }
- *rp = '\0';
- PG_RETURN_CSTRING(result);
-}
-
-/*
- * bytearecv - converts external binary format to bytea
- */
-Datum
-bytearecv(PG_FUNCTION_ARGS)
-{
- StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
- bytea *result;
- int nbytes;
-
- nbytes = buf->len - buf->cursor;
- result = (bytea *) palloc(nbytes + VARHDRSZ);
- SET_VARSIZE(result, nbytes + VARHDRSZ);
- pq_copymsgbytes(buf, VARDATA(result), nbytes);
- PG_RETURN_BYTEA_P(result);
-}
-
-/*
- * byteasend - converts bytea to binary format
- *
- * This is a special case: just copy the input...
- */
-Datum
-byteasend(PG_FUNCTION_ARGS)
-{
- bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
-
- PG_RETURN_BYTEA_P(vlena);
-}
-
-Datum
-bytea_string_agg_transfn(PG_FUNCTION_ARGS)
-{
- StringInfo state;
-
- state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
-
- /* Append the value unless null, preceding it with the delimiter. */
- if (!PG_ARGISNULL(1))
- {
- bytea *value = PG_GETARG_BYTEA_PP(1);
- bool isfirst = false;
-
- /*
- * You might think we can just throw away the first delimiter, however
- * we must keep it as we may be a parallel worker doing partial
- * aggregation building a state to send to the main process. We need
- * to keep the delimiter of every aggregation so that the combine
- * function can properly join up the strings of two separately
- * partially aggregated results. The first delimiter is only stripped
- * off in the final function. To know how much to strip off the front
- * of the string, we store the length of the first delimiter in the
- * StringInfo's cursor field, which we don't otherwise need here.
- */
- if (state == NULL)
- {
- state = makeStringAggState(fcinfo);
- isfirst = true;
- }
-
- if (!PG_ARGISNULL(2))
- {
- bytea *delim = PG_GETARG_BYTEA_PP(2);
-
- appendBinaryStringInfo(state, VARDATA_ANY(delim),
- VARSIZE_ANY_EXHDR(delim));
- if (isfirst)
- state->cursor = VARSIZE_ANY_EXHDR(delim);
- }
-
- appendBinaryStringInfo(state, VARDATA_ANY(value),
- VARSIZE_ANY_EXHDR(value));
- }
-
- /*
- * The transition type for string_agg() is declared to be "internal",
- * which is a pass-by-value type the same size as a pointer.
- */
- if (state)
- PG_RETURN_POINTER(state);
- PG_RETURN_NULL();
-}
-
-Datum
-bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
-{
- StringInfo state;
-
- /* cannot be called directly because of internal-type argument */
- Assert(AggCheckCallContext(fcinfo, NULL));
-
- state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
-
- if (state != NULL)
- {
- /* As per comment in transfn, strip data before the cursor position */
- bytea *result;
- int strippedlen = state->len - state->cursor;
-
- result = (bytea *) palloc(strippedlen + VARHDRSZ);
- SET_VARSIZE(result, strippedlen + VARHDRSZ);
- memcpy(VARDATA(result), &state->data[state->cursor], strippedlen);
- PG_RETURN_BYTEA_P(result);
- }
- else
- PG_RETURN_NULL();
-}
-
/*
* textin - converts cstring to internal representation
*/
@@ -2959,467 +2647,6 @@ bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
}
-/*-------------------------------------------------------------
- * byteaoctetlen
- *
- * get the number of bytes contained in an instance of type 'bytea'
- *-------------------------------------------------------------
- */
-Datum
-byteaoctetlen(PG_FUNCTION_ARGS)
-{
- Datum str = PG_GETARG_DATUM(0);
-
- /* We need not detoast the input at all */
- PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
-}
-
-/*
- * byteacat -
- * takes two bytea* and returns a bytea* that is the concatenation of
- * the two.
- *
- * Cloned from textcat and modified as required.
- */
-Datum
-byteacat(PG_FUNCTION_ARGS)
-{
- bytea *t1 = PG_GETARG_BYTEA_PP(0);
- bytea *t2 = PG_GETARG_BYTEA_PP(1);
-
- PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
-}
-
-/*
- * bytea_catenate
- * Guts of byteacat(), broken out so it can be used by other functions
- *
- * Arguments can be in short-header form, but not compressed or out-of-line
- */
-static bytea *
-bytea_catenate(bytea *t1, bytea *t2)
-{
- bytea *result;
- int len1,
- len2,
- len;
- char *ptr;
-
- len1 = VARSIZE_ANY_EXHDR(t1);
- len2 = VARSIZE_ANY_EXHDR(t2);
-
- /* paranoia ... probably should throw error instead? */
- if (len1 < 0)
- len1 = 0;
- if (len2 < 0)
- len2 = 0;
-
- len = len1 + len2 + VARHDRSZ;
- result = (bytea *) palloc(len);
-
- /* Set size of result string... */
- SET_VARSIZE(result, len);
-
- /* Fill data field of result string... */
- ptr = VARDATA(result);
- if (len1 > 0)
- memcpy(ptr, VARDATA_ANY(t1), len1);
- if (len2 > 0)
- memcpy(ptr + len1, VARDATA_ANY(t2), len2);
-
- return result;
-}
-
-#define PG_STR_GET_BYTEA(str_) \
- DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
-
-/*
- * bytea_substr()
- * Return a substring starting at the specified position.
- * Cloned from text_substr and modified as required.
- *
- * Input:
- * - string
- * - starting position (is one-based)
- * - string length (optional)
- *
- * If the starting position is zero or less, then return from the start of the string
- * adjusting the length to be consistent with the "negative start" per SQL.
- * If the length is less than zero, an ERROR is thrown. If no third argument
- * (length) is provided, the length to the end of the string is assumed.
- */
-Datum
-bytea_substr(PG_FUNCTION_ARGS)
-{
- PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
- PG_GETARG_INT32(1),
- PG_GETARG_INT32(2),
- false));
-}
-
-/*
- * bytea_substr_no_len -
- * Wrapper to avoid opr_sanity failure due to
- * one function accepting a different number of args.
- */
-Datum
-bytea_substr_no_len(PG_FUNCTION_ARGS)
-{
- PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
- PG_GETARG_INT32(1),
- -1,
- true));
-}
-
-static bytea *
-bytea_substring(Datum str,
- int S,
- int L,
- bool length_not_specified)
-{
- int32 S1; /* adjusted start position */
- int32 L1; /* adjusted substring length */
- int32 E; /* end position */
-
- /*
- * The logic here should generally match text_substring().
- */
- S1 = Max(S, 1);
-
- if (length_not_specified)
- {
- /*
- * Not passed a length - DatumGetByteaPSlice() grabs everything to the
- * end of the string if we pass it a negative value for length.
- */
- L1 = -1;
- }
- else if (L < 0)
- {
- /* SQL99 says to throw an error for E < S, i.e., negative length */
- ereport(ERROR,
- (errcode(ERRCODE_SUBSTRING_ERROR),
- errmsg("negative substring length not allowed")));
- L1 = -1; /* silence stupider compilers */
- }
- else if (pg_add_s32_overflow(S, L, &E))
- {
- /*
- * L could be large enough for S + L to overflow, in which case the
- * substring must run to end of string.
- */
- L1 = -1;
- }
- else
- {
- /*
- * A zero or negative value for the end position can happen if the
- * start was negative or one. SQL99 says to return a zero-length
- * string.
- */
- if (E < 1)
- return PG_STR_GET_BYTEA("");
-
- L1 = E - S1;
- }
-
- /*
- * If the start position is past the end of the string, SQL99 says to
- * return a zero-length string -- DatumGetByteaPSlice() will do that for
- * us. We need only convert S1 to zero-based starting position.
- */
- return DatumGetByteaPSlice(str, S1 - 1, L1);
-}
-
-/*
- * byteaoverlay
- * Replace specified substring of first string with second
- *
- * The SQL standard defines OVERLAY() in terms of substring and concatenation.
- * This code is a direct implementation of what the standard says.
- */
-Datum
-byteaoverlay(PG_FUNCTION_ARGS)
-{
- bytea *t1 = PG_GETARG_BYTEA_PP(0);
- bytea *t2 = PG_GETARG_BYTEA_PP(1);
- int sp = PG_GETARG_INT32(2); /* substring start position */
- int sl = PG_GETARG_INT32(3); /* substring length */
-
- PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
-}
-
-Datum
-byteaoverlay_no_len(PG_FUNCTION_ARGS)
-{
- bytea *t1 = PG_GETARG_BYTEA_PP(0);
- bytea *t2 = PG_GETARG_BYTEA_PP(1);
- int sp = PG_GETARG_INT32(2); /* substring start position */
- int sl;
-
- sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
- PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
-}
-
-static bytea *
-bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
-{
- bytea *result;
- bytea *s1;
- bytea *s2;
- int sp_pl_sl;
-
- /*
- * Check for possible integer-overflow cases. For negative sp, throw a
- * "substring length" error because that's what should be expected
- * according to the spec's definition of OVERLAY().
- */
- if (sp <= 0)
- ereport(ERROR,
- (errcode(ERRCODE_SUBSTRING_ERROR),
- errmsg("negative substring length not allowed")));
- if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
- ereport(ERROR,
- (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("integer out of range")));
-
- s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
- s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
- result = bytea_catenate(s1, t2);
- result = bytea_catenate(result, s2);
-
- return result;
-}
-
-/*
- * bit_count
- */
-Datum
-bytea_bit_count(PG_FUNCTION_ARGS)
-{
- bytea *t1 = PG_GETARG_BYTEA_PP(0);
-
- PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
-}
-
-/*
- * byteapos -
- * Return the position of the specified substring.
- * Implements the SQL POSITION() function.
- * Cloned from textpos and modified as required.
- */
-Datum
-byteapos(PG_FUNCTION_ARGS)
-{
- bytea *t1 = PG_GETARG_BYTEA_PP(0);
- bytea *t2 = PG_GETARG_BYTEA_PP(1);
- int pos;
- int px,
- p;
- int len1,
- len2;
- char *p1,
- *p2;
-
- len1 = VARSIZE_ANY_EXHDR(t1);
- len2 = VARSIZE_ANY_EXHDR(t2);
-
- if (len2 <= 0)
- PG_RETURN_INT32(1); /* result for empty pattern */
-
- p1 = VARDATA_ANY(t1);
- p2 = VARDATA_ANY(t2);
-
- pos = 0;
- px = (len1 - len2);
- for (p = 0; p <= px; p++)
- {
- if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
- {
- pos = p + 1;
- break;
- };
- p1++;
- };
-
- PG_RETURN_INT32(pos);
-}
-
-/*-------------------------------------------------------------
- * byteaGetByte
- *
- * this routine treats "bytea" as an array of bytes.
- * It returns the Nth byte (a number between 0 and 255).
- *-------------------------------------------------------------
- */
-Datum
-byteaGetByte(PG_FUNCTION_ARGS)
-{
- bytea *v = PG_GETARG_BYTEA_PP(0);
- int32 n = PG_GETARG_INT32(1);
- int len;
- int byte;
-
- len = VARSIZE_ANY_EXHDR(v);
-
- if (n < 0 || n >= len)
- ereport(ERROR,
- (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
- errmsg("index %d out of valid range, 0..%d",
- n, len - 1)));
-
- byte = ((unsigned char *) VARDATA_ANY(v))[n];
-
- PG_RETURN_INT32(byte);
-}
-
-/*-------------------------------------------------------------
- * byteaGetBit
- *
- * This routine treats a "bytea" type like an array of bits.
- * It returns the value of the Nth bit (0 or 1).
- *
- *-------------------------------------------------------------
- */
-Datum
-byteaGetBit(PG_FUNCTION_ARGS)
-{
- bytea *v = PG_GETARG_BYTEA_PP(0);
- int64 n = PG_GETARG_INT64(1);
- int byteNo,
- bitNo;
- int len;
- int byte;
-
- len = VARSIZE_ANY_EXHDR(v);
-
- if (n < 0 || n >= (int64) len * 8)
- ereport(ERROR,
- (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
- errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
- n, (int64) len * 8 - 1)));
-
- /* n/8 is now known < len, so safe to cast to int */
- byteNo = (int) (n / 8);
- bitNo = (int) (n % 8);
-
- byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
-
- if (byte & (1 << bitNo))
- PG_RETURN_INT32(1);
- else
- PG_RETURN_INT32(0);
-}
-
-/*-------------------------------------------------------------
- * byteaSetByte
- *
- * Given an instance of type 'bytea' creates a new one with
- * the Nth byte set to the given value.
- *
- *-------------------------------------------------------------
- */
-Datum
-byteaSetByte(PG_FUNCTION_ARGS)
-{
- bytea *res = PG_GETARG_BYTEA_P_COPY(0);
- int32 n = PG_GETARG_INT32(1);
- int32 newByte = PG_GETARG_INT32(2);
- int len;
-
- len = VARSIZE(res) - VARHDRSZ;
-
- if (n < 0 || n >= len)
- ereport(ERROR,
- (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
- errmsg("index %d out of valid range, 0..%d",
- n, len - 1)));
-
- /*
- * Now set the byte.
- */
- ((unsigned char *) VARDATA(res))[n] = newByte;
-
- PG_RETURN_BYTEA_P(res);
-}
-
-/*-------------------------------------------------------------
- * byteaSetBit
- *
- * Given an instance of type 'bytea' creates a new one with
- * the Nth bit set to the given value.
- *
- *-------------------------------------------------------------
- */
-Datum
-byteaSetBit(PG_FUNCTION_ARGS)
-{
- bytea *res = PG_GETARG_BYTEA_P_COPY(0);
- int64 n = PG_GETARG_INT64(1);
- int32 newBit = PG_GETARG_INT32(2);
- int len;
- int oldByte,
- newByte;
- int byteNo,
- bitNo;
-
- len = VARSIZE(res) - VARHDRSZ;
-
- if (n < 0 || n >= (int64) len * 8)
- ereport(ERROR,
- (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
- errmsg("index %" PRId64 " out of valid range, 0..%" PRId64,
- n, (int64) len * 8 - 1)));
-
- /* n/8 is now known < len, so safe to cast to int */
- byteNo = (int) (n / 8);
- bitNo = (int) (n % 8);
-
- /*
- * sanity check!
- */
- if (newBit != 0 && newBit != 1)
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("new bit must be 0 or 1")));
-
- /*
- * Update the byte.
- */
- oldByte = ((unsigned char *) VARDATA(res))[byteNo];
-
- if (newBit == 0)
- newByte = oldByte & (~(1 << bitNo));
- else
- newByte = oldByte | (1 << bitNo);
-
- ((unsigned char *) VARDATA(res))[byteNo] = newByte;
-
- PG_RETURN_BYTEA_P(res);
-}
-
-/*
- * Return reversed bytea
- */
-Datum
-bytea_reverse(PG_FUNCTION_ARGS)
-{
- bytea *v = PG_GETARG_BYTEA_PP(0);
- const char *p = VARDATA_ANY(v);
- int len = VARSIZE_ANY_EXHDR(v);
- const char *endp = p + len;
- bytea *result = palloc(len + VARHDRSZ);
- char *dst = (char *) VARDATA(result) + len;
-
- SET_VARSIZE(result, len + VARHDRSZ);
-
- while (p < endp)
- *(--dst) = *p++;
-
- PG_RETURN_BYTEA_P(result);
-}
-
-
/* text_name()
* Converts a text type to a Name type.
*/
@@ -3849,331 +3076,6 @@ SplitGUCList(char *rawstring, char separator,
return true;
}
-
-/*****************************************************************************
- * Comparison Functions used for bytea
- *
- * Note: btree indexes need these routines not to leak memory; therefore,
- * be careful to free working copies of toasted datums. Most places don't
- * need to be so careful.
- *****************************************************************************/
-
-Datum
-byteaeq(PG_FUNCTION_ARGS)
-{
- Datum arg1 = PG_GETARG_DATUM(0);
- Datum arg2 = PG_GETARG_DATUM(1);
- bool result;
- Size len1,
- len2;
-
- /*
- * We can use a fast path for unequal lengths, which might save us from
- * having to detoast one or both values.
- */
- len1 = toast_raw_datum_size(arg1);
- len2 = toast_raw_datum_size(arg2);
- if (len1 != len2)
- result = false;
- else
- {
- bytea *barg1 = DatumGetByteaPP(arg1);
- bytea *barg2 = DatumGetByteaPP(arg2);
-
- result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
- len1 - VARHDRSZ) == 0);
-
- PG_FREE_IF_COPY(barg1, 0);
- PG_FREE_IF_COPY(barg2, 1);
- }
-
- PG_RETURN_BOOL(result);
-}
-
-Datum
-byteane(PG_FUNCTION_ARGS)
-{
- Datum arg1 = PG_GETARG_DATUM(0);
- Datum arg2 = PG_GETARG_DATUM(1);
- bool result;
- Size len1,
- len2;
-
- /*
- * We can use a fast path for unequal lengths, which might save us from
- * having to detoast one or both values.
- */
- len1 = toast_raw_datum_size(arg1);
- len2 = toast_raw_datum_size(arg2);
- if (len1 != len2)
- result = true;
- else
- {
- bytea *barg1 = DatumGetByteaPP(arg1);
- bytea *barg2 = DatumGetByteaPP(arg2);
-
- result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
- len1 - VARHDRSZ) != 0);
-
- PG_FREE_IF_COPY(barg1, 0);
- PG_FREE_IF_COPY(barg2, 1);
- }
-
- PG_RETURN_BOOL(result);
-}
-
-Datum
-bytealt(PG_FUNCTION_ARGS)
-{
- bytea *arg1 = PG_GETARG_BYTEA_PP(0);
- bytea *arg2 = PG_GETARG_BYTEA_PP(1);
- int len1,
- len2;
- int cmp;
-
- len1 = VARSIZE_ANY_EXHDR(arg1);
- len2 = VARSIZE_ANY_EXHDR(arg2);
-
- cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
-
- PG_FREE_IF_COPY(arg1, 0);
- PG_FREE_IF_COPY(arg2, 1);
-
- PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
-}
-
-Datum
-byteale(PG_FUNCTION_ARGS)
-{
- bytea *arg1 = PG_GETARG_BYTEA_PP(0);
- bytea *arg2 = PG_GETARG_BYTEA_PP(1);
- int len1,
- len2;
- int cmp;
-
- len1 = VARSIZE_ANY_EXHDR(arg1);
- len2 = VARSIZE_ANY_EXHDR(arg2);
-
- cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
-
- PG_FREE_IF_COPY(arg1, 0);
- PG_FREE_IF_COPY(arg2, 1);
-
- PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
-}
-
-Datum
-byteagt(PG_FUNCTION_ARGS)
-{
- bytea *arg1 = PG_GETARG_BYTEA_PP(0);
- bytea *arg2 = PG_GETARG_BYTEA_PP(1);
- int len1,
- len2;
- int cmp;
-
- len1 = VARSIZE_ANY_EXHDR(arg1);
- len2 = VARSIZE_ANY_EXHDR(arg2);
-
- cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
-
- PG_FREE_IF_COPY(arg1, 0);
- PG_FREE_IF_COPY(arg2, 1);
-
- PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
-}
-
-Datum
-byteage(PG_FUNCTION_ARGS)
-{
- bytea *arg1 = PG_GETARG_BYTEA_PP(0);
- bytea *arg2 = PG_GETARG_BYTEA_PP(1);
- int len1,
- len2;
- int cmp;
-
- len1 = VARSIZE_ANY_EXHDR(arg1);
- len2 = VARSIZE_ANY_EXHDR(arg2);
-
- cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
-
- PG_FREE_IF_COPY(arg1, 0);
- PG_FREE_IF_COPY(arg2, 1);
-
- PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
-}
-
-Datum
-byteacmp(PG_FUNCTION_ARGS)
-{
- bytea *arg1 = PG_GETARG_BYTEA_PP(0);
- bytea *arg2 = PG_GETARG_BYTEA_PP(1);
- int len1,
- len2;
- int cmp;
-
- len1 = VARSIZE_ANY_EXHDR(arg1);
- len2 = VARSIZE_ANY_EXHDR(arg2);
-
- cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
- if ((cmp == 0) && (len1 != len2))
- cmp = (len1 < len2) ? -1 : 1;
-
- PG_FREE_IF_COPY(arg1, 0);
- PG_FREE_IF_COPY(arg2, 1);
-
- PG_RETURN_INT32(cmp);
-}
-
-Datum
-bytea_larger(PG_FUNCTION_ARGS)
-{
- bytea *arg1 = PG_GETARG_BYTEA_PP(0);
- bytea *arg2 = PG_GETARG_BYTEA_PP(1);
- bytea *result;
- int len1,
- len2;
- int cmp;
-
- len1 = VARSIZE_ANY_EXHDR(arg1);
- len2 = VARSIZE_ANY_EXHDR(arg2);
-
- cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
- result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2);
-
- PG_RETURN_BYTEA_P(result);
-}
-
-Datum
-bytea_smaller(PG_FUNCTION_ARGS)
-{
- bytea *arg1 = PG_GETARG_BYTEA_PP(0);
- bytea *arg2 = PG_GETARG_BYTEA_PP(1);
- bytea *result;
- int len1,
- len2;
- int cmp;
-
- len1 = VARSIZE_ANY_EXHDR(arg1);
- len2 = VARSIZE_ANY_EXHDR(arg2);
-
- cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
- result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2);
-
- PG_RETURN_BYTEA_P(result);
-}
-
-Datum
-bytea_sortsupport(PG_FUNCTION_ARGS)
-{
- SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
- MemoryContext oldcontext;
-
- oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
-
- /* Use generic string SortSupport, forcing "C" collation */
- varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
-
- MemoryContextSwitchTo(oldcontext);
-
- PG_RETURN_VOID();
-}
-
-/* Cast bytea -> int2 */
-Datum
-bytea_int2(PG_FUNCTION_ARGS)
-{
- bytea *v = PG_GETARG_BYTEA_PP(0);
- int len = VARSIZE_ANY_EXHDR(v);
- uint16 result;
-
- /* Check that the byte array is not too long */
- if (len > sizeof(result))
- ereport(ERROR,
- errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("smallint out of range"));
-
- /* Convert it to an integer; most significant bytes come first */
- result = 0;
- for (int i = 0; i < len; i++)
- {
- result <<= BITS_PER_BYTE;
- result |= ((unsigned char *) VARDATA_ANY(v))[i];
- }
-
- PG_RETURN_INT16(result);
-}
-
-/* Cast bytea -> int4 */
-Datum
-bytea_int4(PG_FUNCTION_ARGS)
-{
- bytea *v = PG_GETARG_BYTEA_PP(0);
- int len = VARSIZE_ANY_EXHDR(v);
- uint32 result;
-
- /* Check that the byte array is not too long */
- if (len > sizeof(result))
- ereport(ERROR,
- errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("integer out of range"));
-
- /* Convert it to an integer; most significant bytes come first */
- result = 0;
- for (int i = 0; i < len; i++)
- {
- result <<= BITS_PER_BYTE;
- result |= ((unsigned char *) VARDATA_ANY(v))[i];
- }
-
- PG_RETURN_INT32(result);
-}
-
-/* Cast bytea -> int8 */
-Datum
-bytea_int8(PG_FUNCTION_ARGS)
-{
- bytea *v = PG_GETARG_BYTEA_PP(0);
- int len = VARSIZE_ANY_EXHDR(v);
- uint64 result;
-
- /* Check that the byte array is not too long */
- if (len > sizeof(result))
- ereport(ERROR,
- errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("bigint out of range"));
-
- /* Convert it to an integer; most significant bytes come first */
- result = 0;
- for (int i = 0; i < len; i++)
- {
- result <<= BITS_PER_BYTE;
- result |= ((unsigned char *) VARDATA_ANY(v))[i];
- }
-
- PG_RETURN_INT64(result);
-}
-
-/* Cast int2 -> bytea; can just use int2send() */
-Datum
-int2_bytea(PG_FUNCTION_ARGS)
-{
- return int2send(fcinfo);
-}
-
-/* Cast int4 -> bytea; can just use int4send() */
-Datum
-int4_bytea(PG_FUNCTION_ARGS)
-{
- return int4send(fcinfo);
-}
-
-/* Cast int8 -> bytea; can just use int8send() */
-Datum
-int8_bytea(PG_FUNCTION_ARGS)
-{
- return int8send(fcinfo);
-}
-
/*
* appendStringInfoText
*
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index db8d0d6a7e8..2bd39b6ac4b 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -529,14 +529,36 @@ xmltext(PG_FUNCTION_ARGS)
#ifdef USE_LIBXML
text *arg = PG_GETARG_TEXT_PP(0);
text *result;
- xmlChar *xmlbuf = NULL;
+ volatile xmlChar *xmlbuf = NULL;
+ PgXmlErrorContext *xmlerrcxt;
+
+ /* Otherwise, we gotta spin up some error handling. */
+ xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
+
+ PG_TRY();
+ {
+ xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg));
+
+ if (xmlbuf == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlChar");
- xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg));
+ result = cstring_to_text_with_len((const char *) xmlbuf,
+ xmlStrlen((const xmlChar *) xmlbuf));
+ }
+ PG_CATCH();
+ {
+ if (xmlbuf)
+ xmlFree((xmlChar *) xmlbuf);
- Assert(xmlbuf);
+ pg_xml_done(xmlerrcxt, true);
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ xmlFree((xmlChar *) xmlbuf);
+ pg_xml_done(xmlerrcxt, false);
- result = cstring_to_text_with_len((const char *) xmlbuf, xmlStrlen(xmlbuf));
- xmlFree(xmlbuf);
PG_RETURN_XML_P(result);
#else
NO_XML_SUPPORT();
@@ -754,6 +776,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
* content nodes, and then iterate over the nodes.
*/
xmlNodePtr root;
+ xmlNodePtr oldroot;
xmlNodePtr newline;
root = xmlNewNode(NULL, (const xmlChar *) "content-root");
@@ -761,9 +784,18 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate xml node");
- /* This attaches root to doc, so we need not free it separately. */
- xmlDocSetRootElement(doc, root);
- xmlAddChildList(root, content_nodes);
+ /*
+ * This attaches root to doc, so we need not free it separately...
+ * but instead, we have to free the old root if there was one.
+ */
+ oldroot = xmlDocSetRootElement(doc, root);
+ if (oldroot != NULL)
+ xmlFreeNode(oldroot);
+
+ if (xmlAddChildList(root, content_nodes) == NULL ||
+ xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not append xml node list");
/*
* We use this node to insert newlines in the dump. Note: in at
@@ -924,7 +956,10 @@ xmlelement(XmlExpr *xexpr,
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate xmlTextWriter");
- xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
+ if (xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name) < 0 ||
+ xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not start xml element");
forboth(arg, named_arg_strings, narg, xexpr->arg_names)
{
@@ -932,19 +967,30 @@ xmlelement(XmlExpr *xexpr,
char *argname = strVal(lfirst(narg));
if (str)
- xmlTextWriterWriteAttribute(writer,
- (xmlChar *) argname,
- (xmlChar *) str);
+ {
+ if (xmlTextWriterWriteAttribute(writer,
+ (xmlChar *) argname,
+ (xmlChar *) str) < 0 ||
+ xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not write xml attribute");
+ }
}
foreach(arg, arg_strings)
{
char *str = (char *) lfirst(arg);
- xmlTextWriterWriteRaw(writer, (xmlChar *) str);
+ if (xmlTextWriterWriteRaw(writer, (xmlChar *) str) < 0 ||
+ xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not write raw xml text");
}
- xmlTextWriterEndElement(writer);
+ if (xmlTextWriterEndElement(writer) < 0 ||
+ xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not end xml element");
/* we MUST do this now to flush data out to the buffer ... */
xmlFreeTextWriter(writer);
@@ -1850,6 +1896,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
else
{
xmlNodePtr root;
+ xmlNodePtr oldroot PG_USED_FOR_ASSERTS_ONLY;
/* set up document with empty root node to be the context node */
doc = xmlNewDoc(version);
@@ -1868,8 +1915,13 @@ xml_parse(text *data, XmlOptionType xmloption_arg,
if (root == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate xml node");
- /* This attaches root to doc, so we need not free it separately. */
- xmlDocSetRootElement(doc, root);
+
+ /*
+ * This attaches root to doc, so we need not free it separately;
+ * and there can't yet be any old root to free.
+ */
+ oldroot = xmlDocSetRootElement(doc, root);
+ Assert(oldroot == NULL);
/* allow empty content */
if (*(utf8string + count))
@@ -4207,20 +4259,27 @@ xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
}
else
{
- xmlChar *str;
+ volatile xmlChar *str = NULL;
- str = xmlXPathCastNodeToString(cur);
PG_TRY();
{
+ char *escaped;
+
+ str = xmlXPathCastNodeToString(cur);
+ if (str == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlChar");
+
/* Here we rely on XML having the same representation as TEXT */
- char *escaped = escape_xml((char *) str);
+ escaped = escape_xml((char *) str);
result = (xmltype *) cstring_to_text(escaped);
pfree(escaped);
}
PG_FINALLY();
{
- xmlFree(str);
+ if (str)
+ xmlFree((xmlChar *) str);
}
PG_END_TRY();
}
diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c
index 657648996c2..d1b25214376 100644
--- a/src/backend/utils/cache/catcache.c
+++ b/src/backend/utils/cache/catcache.c
@@ -317,6 +317,7 @@ GetCCHashEqFuncs(Oid keytype, CCHashFN *hashfunc, RegProcedure *eqfunc, CCFastEq
case REGDICTIONARYOID:
case REGROLEOID:
case REGNAMESPACEOID:
+ case REGDATABASEOID:
*hashfunc = int4hashfast;
*fasteqfunc = int4eqfast;
*eqfunc = F_OIDEQ;
diff --git a/src/backend/utils/cache/funccache.c b/src/backend/utils/cache/funccache.c
index 150c502a612..afc048a051e 100644
--- a/src/backend/utils/cache/funccache.c
+++ b/src/backend/utils/cache/funccache.c
@@ -491,6 +491,7 @@ cached_function_compile(FunctionCallInfo fcinfo,
CachedFunctionHashKey hashkey;
bool function_valid = false;
bool hashkey_valid = false;
+ bool new_function = false;
/*
* Lookup the pg_proc tuple by Oid; we'll need it in any case
@@ -570,13 +571,15 @@ recheck:
/*
* Create the new function struct, if not done already. The function
- * structs are never thrown away, so keep them in TopMemoryContext.
+ * cache entry will be kept for the life of the backend, so put it in
+ * TopMemoryContext.
*/
Assert(cacheEntrySize >= sizeof(CachedFunction));
if (function == NULL)
{
function = (CachedFunction *)
MemoryContextAllocZero(TopMemoryContext, cacheEntrySize);
+ new_function = true;
}
else
{
@@ -585,17 +588,36 @@ recheck:
}
/*
- * Fill in the CachedFunction part. fn_hashkey and use_count remain
- * zeroes for now.
+ * However, if function compilation fails, we'd like not to leak the
+ * function struct, so use a PG_TRY block to prevent that. (It's up
+ * to the compile callback function to avoid its own internal leakage
+ * in such cases.) Unfortunately, freeing the struct is only safe if
+ * we just allocated it: otherwise there are probably fn_extra
+ * pointers to it.
*/
- function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
- function->fn_tid = procTup->t_self;
- function->dcallback = dcallback;
+ PG_TRY();
+ {
+ /*
+ * Do the hard, language-specific part.
+ */
+ ccallback(fcinfo, procTup, &hashkey, function, forValidator);
+ }
+ PG_CATCH();
+ {
+ if (new_function)
+ pfree(function);
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
/*
- * Do the hard, language-specific part.
+ * Fill in the CachedFunction part. (We do this last to prevent the
+ * function from looking valid before it's fully built.) fn_hashkey
+ * will be set by cfunc_hashtable_insert; use_count remains zero.
*/
- ccallback(fcinfo, procTup, &hashkey, function, forValidator);
+ function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
+ function->fn_tid = procTup->t_self;
+ function->dcallback = dcallback;
/*
* Add the completed struct to the hash table.
diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c
index 9bcbc4c3e97..89a1c79e984 100644
--- a/src/backend/utils/cache/plancache.c
+++ b/src/backend/utils/cache/plancache.c
@@ -92,8 +92,7 @@ static void ReleaseGenericPlan(CachedPlanSource *plansource);
static bool StmtPlanRequiresRevalidation(CachedPlanSource *plansource);
static bool BuildingPlanRequiresSnapshot(CachedPlanSource *plansource);
static List *RevalidateCachedQuery(CachedPlanSource *plansource,
- QueryEnvironment *queryEnv,
- bool release_generic);
+ QueryEnvironment *queryEnv);
static bool CheckCachedPlan(CachedPlanSource *plansource);
static CachedPlan *BuildCachedPlan(CachedPlanSource *plansource, List *qlist,
ParamListInfo boundParams, QueryEnvironment *queryEnv);
@@ -663,17 +662,10 @@ BuildingPlanRequiresSnapshot(CachedPlanSource *plansource)
* The result value is the transient analyzed-and-rewritten query tree if we
* had to do re-analysis, and NIL otherwise. (This is returned just to save
* a tree copying step in a subsequent BuildCachedPlan call.)
- *
- * This also releases and drops the generic plan (plansource->gplan), if any,
- * as most callers will typically build a new CachedPlan for the plansource
- * right after this. However, when called from UpdateCachedPlan(), the
- * function does not release the generic plan, as UpdateCachedPlan() updates
- * an existing CachedPlan in place.
*/
static List *
RevalidateCachedQuery(CachedPlanSource *plansource,
- QueryEnvironment *queryEnv,
- bool release_generic)
+ QueryEnvironment *queryEnv)
{
bool snapshot_set;
List *tlist; /* transient query-tree list */
@@ -772,9 +764,8 @@ RevalidateCachedQuery(CachedPlanSource *plansource,
MemoryContextDelete(qcxt);
}
- /* Drop the generic plan reference, if any, and if requested */
- if (release_generic)
- ReleaseGenericPlan(plansource);
+ /* Drop the generic plan reference if any */
+ ReleaseGenericPlan(plansource);
/*
* Now re-do parse analysis and rewrite. This not incidentally acquires
@@ -937,10 +928,8 @@ RevalidateCachedQuery(CachedPlanSource *plansource,
* Caller must have already called RevalidateCachedQuery to verify that the
* querytree is up to date.
*
- * On a "true" return, we have acquired locks on the "unprunableRelids" set
- * for all plans in plansource->stmt_list. However, the plans are not fully
- * race-condition-free until the executor acquires locks on the prunable
- * relations that survive initial runtime pruning during InitPlan().
+ * On a "true" return, we have acquired the locks needed to run the plan.
+ * (We must do this for the "true" result to be race-condition-free.)
*/
static bool
CheckCachedPlan(CachedPlanSource *plansource)
@@ -1025,8 +1014,6 @@ CheckCachedPlan(CachedPlanSource *plansource)
* Planning work is done in the caller's memory context. The finished plan
* is in a child memory context, which typically should get reparented
* (unless this is a one-shot plan, in which case we don't copy the plan).
- *
- * Note: When changing this, you should also look at UpdateCachedPlan().
*/
static CachedPlan *
BuildCachedPlan(CachedPlanSource *plansource, List *qlist,
@@ -1037,7 +1024,6 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist,
bool snapshot_set;
bool is_transient;
MemoryContext plan_context;
- MemoryContext stmt_context = NULL;
MemoryContext oldcxt = CurrentMemoryContext;
ListCell *lc;
@@ -1055,7 +1041,7 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist,
* let's treat it as real and redo the RevalidateCachedQuery call.
*/
if (!plansource->is_valid)
- qlist = RevalidateCachedQuery(plansource, queryEnv, true);
+ qlist = RevalidateCachedQuery(plansource, queryEnv);
/*
* If we don't already have a copy of the querytree list that can be
@@ -1093,19 +1079,10 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist,
PopActiveSnapshot();
/*
- * Normally, we create a dedicated memory context for the CachedPlan and
- * its subsidiary data. Although it's usually not very large, the context
- * is designed to allow growth if necessary.
- *
- * The PlannedStmts are stored in a separate child context (stmt_context)
- * of the CachedPlan's memory context. This separation allows
- * UpdateCachedPlan() to free and replace the PlannedStmts without
- * affecting the CachedPlan structure or its stmt_list List.
- *
- * For one-shot plans, we instead use the caller's memory context, as the
- * CachedPlan will not persist. stmt_context will be set to NULL in this
- * case, because UpdateCachedPlan() should never get called on a one-shot
- * plan.
+ * Normally we make a dedicated memory context for the CachedPlan and its
+ * subsidiary data. (It's probably not going to be large, but just in
+ * case, allow it to grow large. It's transient for the moment.) But for
+ * a one-shot plan, we just leave it in the caller's memory context.
*/
if (!plansource->is_oneshot)
{
@@ -1114,17 +1091,12 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist,
ALLOCSET_START_SMALL_SIZES);
MemoryContextCopyAndSetIdentifier(plan_context, plansource->query_string);
- stmt_context = AllocSetContextCreate(CurrentMemoryContext,
- "CachedPlan PlannedStmts",
- ALLOCSET_START_SMALL_SIZES);
- MemoryContextCopyAndSetIdentifier(stmt_context, plansource->query_string);
- MemoryContextSetParent(stmt_context, plan_context);
+ /*
+ * Copy plan into the new context.
+ */
+ MemoryContextSwitchTo(plan_context);
- MemoryContextSwitchTo(stmt_context);
plist = copyObject(plist);
-
- MemoryContextSwitchTo(plan_context);
- plist = list_copy(plist);
}
else
plan_context = CurrentMemoryContext;
@@ -1165,10 +1137,8 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist,
plan->saved_xmin = InvalidTransactionId;
plan->refcount = 0;
plan->context = plan_context;
- plan->stmt_context = stmt_context;
plan->is_oneshot = plansource->is_oneshot;
plan->is_saved = false;
- plan->is_reused = false;
plan->is_valid = true;
/* assign generation number to new plan */
@@ -1180,113 +1150,6 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist,
}
/*
- * UpdateCachedPlan
- * Create fresh plans for all queries in the CachedPlanSource, replacing
- * those in the generic plan's stmt_list, and return the plan for the
- * query_index'th query.
- *
- * This function is primarily used by ExecutorStartCachedPlan() to handle
- * cases where the original generic CachedPlan becomes invalid. Such
- * invalidation may occur when prunable relations in the old plan for the
- * query_index'th query are locked in preparation for execution.
- *
- * Note that invalidations received during the execution of the query_index'th
- * query can affect both the queries that have already finished execution
- * (e.g., due to concurrent modifications on prunable relations that were not
- * locked during their execution) and also the queries that have not yet been
- * executed. As a result, this function updates all plans to ensure
- * CachedPlan.is_valid is safely set to true.
- *
- * The old PlannedStmts in plansource->gplan->stmt_list are freed here, so
- * the caller and any of its callers must not rely on them remaining accessible
- * after this function is called.
- */
-PlannedStmt *
-UpdateCachedPlan(CachedPlanSource *plansource, int query_index,
- QueryEnvironment *queryEnv)
-{
- List *query_list = plansource->query_list,
- *plan_list;
- ListCell *l1,
- *l2;
- CachedPlan *plan = plansource->gplan;
- MemoryContext oldcxt;
-
- Assert(ActiveSnapshotSet());
-
- /* Sanity checks (XXX can be Asserts?) */
- if (plan == NULL)
- elog(ERROR, "UpdateCachedPlan() called in the wrong context: plansource->gplan is NULL");
- else if (plan->is_valid)
- elog(ERROR, "UpdateCachedPlan() called in the wrong context: plansource->gplan->is_valid is true");
- else if (plan->is_oneshot)
- elog(ERROR, "UpdateCachedPlan() called in the wrong context: plansource->gplan->is_oneshot is true");
-
- /*
- * The plansource might have become invalid since GetCachedPlan() returned
- * the CachedPlan. See the comment in BuildCachedPlan() for details on why
- * this might happen. Although invalidation is likely a false positive as
- * stated there, we make the plan valid to ensure the query list used for
- * planning is up to date.
- *
- * The risk of catching an invalidation is higher here than when
- * BuildCachedPlan() is called from GetCachedPlan(), because this function
- * is normally called long after GetCachedPlan() returns the CachedPlan,
- * so much more processing could have occurred including things that mark
- * the CachedPlanSource invalid.
- *
- * Note: Do not release plansource->gplan, because the upstream callers
- * (such as the callers of ExecutorStartCachedPlan()) would still be
- * referencing it.
- */
- if (!plansource->is_valid)
- query_list = RevalidateCachedQuery(plansource, queryEnv, false);
- Assert(query_list != NIL);
-
- /*
- * Build a new generic plan for all the queries after making a copy to be
- * scribbled on by the planner.
- */
- query_list = copyObject(query_list);
-
- /*
- * Planning work is done in the caller's memory context. The resulting
- * PlannedStmt is then copied into plan->stmt_context after throwing away
- * the old ones.
- */
- plan_list = pg_plan_queries(query_list, plansource->query_string,
- plansource->cursor_options, NULL);
- Assert(list_length(plan_list) == list_length(plan->stmt_list));
-
- MemoryContextReset(plan->stmt_context);
- oldcxt = MemoryContextSwitchTo(plan->stmt_context);
- forboth(l1, plan_list, l2, plan->stmt_list)
- {
- PlannedStmt *plannedstmt = lfirst(l1);
-
- lfirst(l2) = copyObject(plannedstmt);
- }
- MemoryContextSwitchTo(oldcxt);
-
- /*
- * XXX Should this also (re)set the properties of the CachedPlan that are
- * set in BuildCachedPlan() after creating the fresh plans such as
- * planRoleId, dependsOnRole, and saved_xmin?
- */
-
- /*
- * We've updated all the plans that might have been invalidated, so mark
- * the CachedPlan as valid.
- */
- plan->is_valid = true;
-
- /* Also update generic_cost because we just created a new generic plan. */
- plansource->generic_cost = cached_plan_cost(plan, false);
-
- return list_nth_node(PlannedStmt, plan->stmt_list, query_index);
-}
-
-/*
* choose_custom_plan: choose whether to use custom or generic plan
*
* This defines the policy followed by GetCachedPlan.
@@ -1402,13 +1265,8 @@ cached_plan_cost(CachedPlan *plan, bool include_planner)
* plan or a custom plan for the given parameters: the caller does not know
* which it will get.
*
- * On return, the plan is valid, but if it is a reused generic plan, not all
- * locks are acquired. In such cases, CheckCachedPlan() does not take locks
- * on relations subject to initial runtime pruning; instead, these locks are
- * deferred until execution startup, when ExecDoInitialPruning() performs
- * initial pruning. The plan's "is_reused" flag is set to indicate that
- * CachedPlanRequiresLocking() should return true when called by
- * ExecDoInitialPruning().
+ * On return, the plan is valid and we have sufficient locks to begin
+ * execution.
*
* On return, the refcount of the plan has been incremented; a later
* ReleaseCachedPlan() call is expected. If "owner" is not NULL then
@@ -1434,7 +1292,7 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams,
elog(ERROR, "cannot apply ResourceOwner to non-saved cached plan");
/* Make sure the querytree list is valid and we have parse-time locks */
- qlist = RevalidateCachedQuery(plansource, queryEnv, true);
+ qlist = RevalidateCachedQuery(plansource, queryEnv);
/* Decide whether to use a custom plan */
customplan = choose_custom_plan(plansource, boundParams);
@@ -1446,8 +1304,6 @@ GetCachedPlan(CachedPlanSource *plansource, ParamListInfo boundParams,
/* We want a generic plan, and we already have a valid one */
plan = plansource->gplan;
Assert(plan->magic == CACHEDPLAN_MAGIC);
- /* Reusing the existing plan, so not all locks may be acquired. */
- plan->is_reused = true;
}
else
{
@@ -1913,7 +1769,7 @@ CachedPlanGetTargetList(CachedPlanSource *plansource,
return NIL;
/* Make sure the querytree list is valid and we have parse-time locks */
- RevalidateCachedQuery(plansource, queryEnv, true);
+ RevalidateCachedQuery(plansource, queryEnv);
/* Get the primary statement and find out what it returns */
pstmt = QueryListGetPrimaryStmt(plansource->query_list);
@@ -2035,7 +1891,7 @@ AcquireExecutorLocks(List *stmt_list, bool acquire)
foreach(lc1, stmt_list)
{
PlannedStmt *plannedstmt = lfirst_node(PlannedStmt, lc1);
- int rtindex;
+ ListCell *lc2;
if (plannedstmt->commandType == CMD_UTILITY)
{
@@ -2053,16 +1909,13 @@ AcquireExecutorLocks(List *stmt_list, bool acquire)
continue;
}
- rtindex = -1;
- while ((rtindex = bms_next_member(plannedstmt->unprunableRelids,
- rtindex)) >= 0)
+ foreach(lc2, plannedstmt->rtable)
{
- RangeTblEntry *rte = list_nth_node(RangeTblEntry,
- plannedstmt->rtable,
- rtindex - 1);
+ RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc2);
- Assert(rte->rtekind == RTE_RELATION ||
- (rte->rtekind == RTE_SUBQUERY && OidIsValid(rte->relid)));
+ if (!(rte->rtekind == RTE_RELATION ||
+ (rte->rtekind == RTE_SUBQUERY && OidIsValid(rte->relid))))
+ continue;
/*
* Acquire the appropriate type of lock on each relation OID. Note
diff --git a/src/backend/utils/fmgr/dfmgr.c b/src/backend/utils/fmgr/dfmgr.c
index 603632581d0..4bb84ff7087 100644
--- a/src/backend/utils/fmgr/dfmgr.c
+++ b/src/backend/utils/fmgr/dfmgr.c
@@ -99,6 +99,14 @@ load_external_function(const char *filename, const char *funcname,
void *lib_handle;
void *retval;
+ /*
+ * If the value starts with "$libdir/", strip that. This is because many
+ * extensions have hardcoded '$libdir/foo' as their library name, which
+ * prevents using the path.
+ */
+ if (strncmp(filename, "$libdir/", 8) == 0)
+ filename += 8;
+
/* Expand the possibly-abbreviated filename to an exact path name */
fullname = expand_dynamic_library_name(filename);
@@ -456,14 +464,6 @@ expand_dynamic_library_name(const char *name)
Assert(name);
- /*
- * If the value starts with "$libdir/", strip that. This is because many
- * extensions have hardcoded '$libdir/foo' as their library name, which
- * prevents using the path.
- */
- if (strncmp(name, "$libdir/", 8) == 0)
- name += 8;
-
have_slash = (first_dir_separator(name) != NULL);
if (!have_slash)
diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c
index 92b0446b80c..d31cb45a058 100644
--- a/src/backend/utils/init/globals.c
+++ b/src/backend/utils/init/globals.c
@@ -39,7 +39,6 @@ volatile sig_atomic_t TransactionTimeoutPending = false;
volatile sig_atomic_t IdleSessionTimeoutPending = false;
volatile sig_atomic_t ProcSignalBarrierPending = false;
volatile sig_atomic_t LogMemoryContextPending = false;
-volatile sig_atomic_t PublishMemoryContextPending = false;
volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false;
volatile uint32 InterruptHoldoffCount = 0;
volatile uint32 QueryCancelHoldoffCount = 0;
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 89d72cdd5ff..c86ceefda94 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -663,13 +663,6 @@ BaseInit(void)
* drop ephemeral slots, which in turn triggers stats reporting.
*/
ReplicationSlotInitialize();
-
- /*
- * The before shmem exit callback frees the DSA memory occupied by the
- * latest memory context statistics that could be published by this proc
- * if requested.
- */
- before_shmem_exit(AtProcExit_memstats_cleanup, 0);
}
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 308016d7763..886ecbad871 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -39,6 +39,7 @@
#include "mb/pg_wchar.h"
#include "utils/fmgrprotos.h"
#include "utils/memutils.h"
+#include "utils/relcache.h"
#include "varatt.h"
/*
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 2f8cbd86759..511dc32d519 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -1028,7 +1028,7 @@ struct config_bool ConfigureNamesBool[] =
},
{
{"enable_distinct_reordering", PGC_USERSET, QUERY_TUNING_METHOD,
- gettext_noop("Enables reordering of DISTINCT pathkeys."),
+ gettext_noop("Enables reordering of DISTINCT keys."),
NULL,
GUC_EXPLAIN
},
@@ -1602,11 +1602,11 @@ struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
{
- {"log_lock_failure", PGC_SUSET, LOGGING_WHAT,
+ {"log_lock_failures", PGC_SUSET, LOGGING_WHAT,
gettext_noop("Logs lock failures."),
NULL
},
- &log_lock_failure,
+ &log_lock_failures,
false,
NULL, NULL, NULL
},
@@ -4837,7 +4837,7 @@ struct config_string ConfigureNamesString[] =
{
{"ssl_groups", PGC_SIGHUP, CONN_AUTH_SSL,
gettext_noop("Sets the group(s) to use for Diffie-Hellman key exchange."),
- gettext_noop("Multiple groups can be specified using colon-separated list."),
+ gettext_noop("Multiple groups can be specified using a colon-separated list."),
GUC_SUPERUSER_ONLY
},
&SSLECDHCurve,
diff --git a/src/backend/utils/misc/injection_point.c b/src/backend/utils/misc/injection_point.c
index f58ebc8ee52..83b887b6978 100644
--- a/src/backend/utils/misc/injection_point.c
+++ b/src/backend/utils/misc/injection_point.c
@@ -584,3 +584,49 @@ IsInjectionPointAttached(const char *name)
return false; /* silence compiler */
#endif
}
+
+/*
+ * Retrieve a list of all the injection points currently attached.
+ *
+ * This list is palloc'd in the current memory context.
+ */
+List *
+InjectionPointList(void)
+{
+#ifdef USE_INJECTION_POINTS
+ List *inj_points = NIL;
+ uint32 max_inuse;
+
+ LWLockAcquire(InjectionPointLock, LW_SHARED);
+
+ max_inuse = pg_atomic_read_u32(&ActiveInjectionPoints->max_inuse);
+
+ for (uint32 idx = 0; idx < max_inuse; idx++)
+ {
+ InjectionPointEntry *entry;
+ InjectionPointData *inj_point;
+ uint64 generation;
+
+ entry = &ActiveInjectionPoints->entries[idx];
+ generation = pg_atomic_read_u64(&entry->generation);
+
+ /* skip free slots */
+ if (generation % 2 == 0)
+ continue;
+
+ inj_point = (InjectionPointData *) palloc0(sizeof(InjectionPointData));
+ inj_point->name = pstrdup(entry->name);
+ inj_point->library = pstrdup(entry->library);
+ inj_point->function = pstrdup(entry->function);
+ inj_points = lappend(inj_points, inj_point);
+ }
+
+ LWLockRelease(InjectionPointLock);
+
+ return inj_points;
+
+#else
+ elog(ERROR, "Injection points are not supported by this build");
+ return NIL; /* keep compiler quiet */
+#endif
+}
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 34826d01380..341f88adc87 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -97,6 +97,7 @@
#password_encryption = scram-sha-256 # scram-sha-256 or md5
#scram_iterations = 4096
#md5_password_warnings = on
+#oauth_validator_libraries = '' # comma-separated list of trusted validator modules
# GSSAPI using Kerberos
#krb_server_keyfile = 'FILE:${sysconfdir}/krb5.keytab'
@@ -121,9 +122,6 @@
#ssl_passphrase_command = ''
#ssl_passphrase_command_supports_reload = off
-# OAuth
-#oauth_validator_libraries = '' # comma-separated list of trusted validator modules
-
#------------------------------------------------------------------------------
# RESOURCE USAGE (except WAL)
@@ -180,13 +178,11 @@
#temp_file_limit = -1 # limits per-process temp file space
# in kilobytes, or -1 for no limit
+#file_copy_method = copy # copy, clone (if supported by OS)
+
#max_notify_queue_pages = 1048576 # limits the number of SLRU pages allocated
# for NOTIFY / LISTEN queue
-#file_copy_method = copy # the default is the first option
- # copy
- # clone (if system support is available)
-
# - Kernel Resources -
#max_files_per_process = 1000 # min 64
@@ -628,7 +624,7 @@
# %% = '%'
# e.g. '<%u%%%d> '
#log_lock_waits = off # log lock waits >= deadlock_timeout
-#log_lock_failure = off # log lock failures
+#log_lock_failures = off # log lock failures
#log_recovery_conflict_waits = off # log standby recovery conflict waits
# >= deadlock_timeout
#log_parameter_max_length = -1 # when logging statements, limit logged
diff --git a/src/backend/utils/mmgr/alignedalloc.c b/src/backend/utils/mmgr/alignedalloc.c
index 85aee389d6b..7eea695de62 100644
--- a/src/backend/utils/mmgr/alignedalloc.c
+++ b/src/backend/utils/mmgr/alignedalloc.c
@@ -45,6 +45,7 @@ AlignedAllocFree(void *pointer)
GetMemoryChunkContext(unaligned)->name, chunk);
#endif
+ /* Recursively pfree the unaligned chunk */
pfree(unaligned);
}
@@ -96,18 +97,32 @@ AlignedAllocRealloc(void *pointer, Size size, int flags)
Assert(old_size >= redirchunk->requested_size);
#endif
+ /*
+ * To keep things simple, we always allocate a new aligned chunk and copy
+ * data into it. Because of the above inaccuracy, this may end in copying
+ * more data than was in the original allocation request size, but that
+ * should be OK.
+ */
ctx = GetMemoryChunkContext(unaligned);
newptr = MemoryContextAllocAligned(ctx, size, alignto, flags);
- /*
- * We may memcpy beyond the end of the original allocation request size,
- * so we must mark the entire allocation as defined.
- */
- if (likely(newptr != NULL))
+ /* Cope cleanly with OOM */
+ if (unlikely(newptr == NULL))
{
- VALGRIND_MAKE_MEM_DEFINED(pointer, old_size);
- memcpy(newptr, pointer, Min(size, old_size));
+ VALGRIND_MAKE_MEM_NOACCESS(redirchunk, sizeof(MemoryChunk));
+ return MemoryContextAllocationFailure(ctx, size, flags);
}
+
+ /*
+ * We may memcpy more than the original allocation request size, which
+ * would result in trying to copy trailing bytes that the original
+ * MemoryContextAllocAligned call marked NOACCESS. So we must mark the
+ * entire old_size as defined. That's slightly annoying, but probably not
+ * worth improving.
+ */
+ VALGRIND_MAKE_MEM_DEFINED(pointer, old_size);
+ memcpy(newptr, pointer, Min(size, old_size));
+
pfree(unaligned);
return newptr;
diff --git a/src/backend/utils/mmgr/dsa.c b/src/backend/utils/mmgr/dsa.c
index 17d4f7a7a06..be43e9351c3 100644
--- a/src/backend/utils/mmgr/dsa.c
+++ b/src/backend/utils/mmgr/dsa.c
@@ -532,6 +532,21 @@ dsa_attach(dsa_handle handle)
}
/*
+ * Returns whether the area with the given handle was already attached by the
+ * current process. The area must have been created with dsa_create (not
+ * dsa_create_in_place).
+ */
+bool
+dsa_is_attached(dsa_handle handle)
+{
+ /*
+ * An area handle is really a DSM segment handle for the first segment, so
+ * we can just search for that.
+ */
+ return dsm_find_mapping(handle) != NULL;
+}
+
+/*
* Attach to an area that was created with dsa_create_in_place. The caller
* must somehow know the location in memory that was used when the area was
* created, though it may be mapped at a different virtual address in this
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index 7d28ca706eb..15fa4d0a55e 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -23,11 +23,6 @@
#include "mb/pg_wchar.h"
#include "miscadmin.h"
-#include "nodes/pg_list.h"
-#include "storage/lwlock.h"
-#include "storage/ipc.h"
-#include "utils/dsa.h"
-#include "utils/hsearch.h"
#include "utils/memdebug.h"
#include "utils/memutils.h"
#include "utils/memutils_internal.h"
@@ -140,17 +135,6 @@ static const MemoryContextMethods mcxt_methods[] = {
};
#undef BOGUS_MCTX
-/*
- * This is passed to MemoryContextStatsInternal to determine whether
- * to print context statistics or not and where to print them logs or
- * stderr.
- */
-typedef enum PrintDestination
-{
- PRINT_STATS_TO_STDERR = 0,
- PRINT_STATS_TO_LOGS,
- PRINT_STATS_NONE
-} PrintDestination;
/*
* CurrentMemoryContext
@@ -172,31 +156,16 @@ MemoryContext CurTransactionContext = NULL;
/* This is a transient link to the active portal's memory context: */
MemoryContext PortalContext = NULL;
-dsa_area *MemoryStatsDsaArea = NULL;
static void MemoryContextDeleteOnly(MemoryContext context);
static void MemoryContextCallResetCallbacks(MemoryContext context);
static void MemoryContextStatsInternal(MemoryContext context, int level,
int max_level, int max_children,
MemoryContextCounters *totals,
- PrintDestination print_location,
- int *num_contexts);
+ bool print_to_stderr);
static void MemoryContextStatsPrint(MemoryContext context, void *passthru,
const char *stats_string,
bool print_to_stderr);
-static void PublishMemoryContext(MemoryStatsEntry *memcxt_info,
- int curr_id, MemoryContext context,
- List *path,
- MemoryContextCounters stat,
- int num_contexts, dsa_area *area,
- int max_levels);
-static void compute_contexts_count_and_ids(List *contexts, HTAB *context_id_lookup,
- int *stats_count,
- bool summary);
-static List *compute_context_path(MemoryContext c, HTAB *context_id_lookup);
-static void free_memorycontextstate_dsa(dsa_area *area, int total_stats,
- dsa_pointer prev_dsa_pointer);
-static void end_memorycontext_reporting(void);
/*
* You should not do memory allocations within a critical section, because
@@ -862,19 +831,11 @@ MemoryContextStatsDetail(MemoryContext context,
bool print_to_stderr)
{
MemoryContextCounters grand_totals;
- int num_contexts;
- PrintDestination print_location;
memset(&grand_totals, 0, sizeof(grand_totals));
- if (print_to_stderr)
- print_location = PRINT_STATS_TO_STDERR;
- else
- print_location = PRINT_STATS_TO_LOGS;
-
- /* num_contexts report number of contexts aggregated in the output */
MemoryContextStatsInternal(context, 1, max_level, max_children,
- &grand_totals, print_location, &num_contexts);
+ &grand_totals, print_to_stderr);
if (print_to_stderr)
fprintf(stderr,
@@ -909,14 +870,13 @@ MemoryContextStatsDetail(MemoryContext context,
* One recursion level for MemoryContextStats
*
* Print stats for this context if possible, but in any case accumulate counts
- * into *totals (if not NULL). The callers should make sure that print_location
- * is set to PRINT_STATS_TO_STDERR or PRINT_STATS_TO_LOGS or PRINT_STATS_NONE.
+ * into *totals (if not NULL).
*/
static void
MemoryContextStatsInternal(MemoryContext context, int level,
int max_level, int max_children,
MemoryContextCounters *totals,
- PrintDestination print_location, int *num_contexts)
+ bool print_to_stderr)
{
MemoryContext child;
int ichild;
@@ -924,39 +884,10 @@ MemoryContextStatsInternal(MemoryContext context, int level,
Assert(MemoryContextIsValid(context));
/* Examine the context itself */
- switch (print_location)
- {
- case PRINT_STATS_TO_STDERR:
- context->methods->stats(context,
- MemoryContextStatsPrint,
- &level,
- totals, true);
- break;
-
- case PRINT_STATS_TO_LOGS:
- context->methods->stats(context,
- MemoryContextStatsPrint,
- &level,
- totals, false);
- break;
-
- case PRINT_STATS_NONE:
-
- /*
- * Do not print the statistics if print_location is
- * PRINT_STATS_NONE, only compute totals. This is used in
- * reporting of memory context statistics via a sql function. Last
- * parameter is not relevant.
- */
- context->methods->stats(context,
- NULL,
- NULL,
- totals, false);
- break;
- }
-
- /* Increment the context count for each of the recursive call */
- *num_contexts = *num_contexts + 1;
+ context->methods->stats(context,
+ MemoryContextStatsPrint,
+ &level,
+ totals, print_to_stderr);
/*
* Examine children.
@@ -976,7 +907,7 @@ MemoryContextStatsInternal(MemoryContext context, int level,
MemoryContextStatsInternal(child, level + 1,
max_level, max_children,
totals,
- print_location, num_contexts);
+ print_to_stderr);
}
}
@@ -995,13 +926,7 @@ MemoryContextStatsInternal(MemoryContext context, int level,
child = MemoryContextTraverseNext(child, context);
}
- /*
- * Add the count of children contexts which are traversed in the
- * non-recursive manner.
- */
- *num_contexts = *num_contexts + ichild;
-
- if (print_location == PRINT_STATS_TO_STDERR)
+ if (print_to_stderr)
{
for (int i = 0; i < level; i++)
fprintf(stderr, " ");
@@ -1014,7 +939,7 @@ MemoryContextStatsInternal(MemoryContext context, int level,
local_totals.freechunks,
local_totals.totalspace - local_totals.freespace);
}
- else if (print_location == PRINT_STATS_TO_LOGS)
+ else
ereport(LOG_SERVER_ONLY,
(errhidestmt(true),
errhidecontext(true),
@@ -1356,22 +1281,6 @@ HandleLogMemoryContextInterrupt(void)
}
/*
- * HandleGetMemoryContextInterrupt
- * Handle receipt of an interrupt indicating a request to publish memory
- * contexts statistics.
- *
- * All the actual work is deferred to ProcessGetMemoryContextInterrupt() as
- * this cannot be performed in a signal handler.
- */
-void
-HandleGetMemoryContextInterrupt(void)
-{
- InterruptPending = true;
- PublishMemoryContextPending = true;
- /* latch will be set by procsignal_sigusr1_handler */
-}
-
-/*
* ProcessLogMemoryContextInterrupt
* Perform logging of memory contexts of this backend process.
*
@@ -1408,539 +1317,6 @@ ProcessLogMemoryContextInterrupt(void)
MemoryContextStatsDetail(TopMemoryContext, 100, 100, false);
}
-/*
- * ProcessGetMemoryContextInterrupt
- * Generate information about memory contexts used by the process.
- *
- * Performs a breadth first search on the memory context tree, thus parents
- * statistics are reported before their children in the monitoring function
- * output.
- *
- * Statistics for all the processes are shared via the same dynamic shared
- * area. Statistics written by each process are tracked independently in
- * per-process DSA pointers. These pointers are stored in static shared memory.
- *
- * We calculate maximum number of context's statistics that can be displayed
- * using a pre-determined limit for memory available per process for this
- * utility maximum size of statistics for each context. The remaining context
- * statistics if any are captured as a cumulative total at the end of
- * individual context's statistics.
- *
- * If summary is true, we capture the level 1 and level 2 contexts
- * statistics. For that we traverse the memory context tree recursively in
- * depth first search manner to cover all the children of a parent context, to
- * be able to display a cumulative total of memory consumption by a parent at
- * level 2 and all its children.
- */
-void
-ProcessGetMemoryContextInterrupt(void)
-{
- List *contexts;
- HASHCTL ctl;
- HTAB *context_id_lookup;
- int context_id = 0;
- MemoryStatsEntry *meminfo;
- bool summary = false;
- int max_stats;
- int idx = MyProcNumber;
- int stats_count = 0;
- int stats_num = 0;
- MemoryContextCounters stat;
- int num_individual_stats = 0;
-
- PublishMemoryContextPending = false;
-
- /*
- * The hash table is used for constructing "path" column of the view,
- * similar to its local backend counterpart.
- */
- ctl.keysize = sizeof(MemoryContext);
- ctl.entrysize = sizeof(MemoryStatsContextId);
- ctl.hcxt = CurrentMemoryContext;
-
- context_id_lookup = hash_create("pg_get_remote_backend_memory_contexts",
- 256,
- &ctl,
- HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
-
- /* List of contexts to process in the next round - start at the top. */
- contexts = list_make1(TopMemoryContext);
-
- /* Compute the number of stats that can fit in the defined limit */
- max_stats =
- MEMORY_CONTEXT_REPORT_MAX_PER_BACKEND / MAX_MEMORY_CONTEXT_STATS_SIZE;
- LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE);
- summary = memCxtState[idx].summary;
- LWLockRelease(&memCxtState[idx].lw_lock);
-
- /*
- * Traverse the memory context tree to find total number of contexts. If
- * summary is requested report the total number of contexts at level 1 and
- * 2 from the top. Also, populate the hash table of context ids.
- */
- compute_contexts_count_and_ids(contexts, context_id_lookup, &stats_count,
- summary);
-
- /*
- * Allocate memory in this process's DSA for storing statistics of the
- * memory contexts upto max_stats, for contexts that don't fit within a
- * limit, a cumulative total is written as the last record in the DSA
- * segment.
- */
- stats_num = Min(stats_count, max_stats);
-
- LWLockAcquire(&memCxtArea->lw_lock, LW_EXCLUSIVE);
-
- /*
- * Create a DSA and send handle to the client process after storing the
- * context statistics. If number of contexts exceed a predefined limit
- * (1MB), a cumulative total is stored for such contexts.
- */
- if (memCxtArea->memstats_dsa_handle == DSA_HANDLE_INVALID)
- {
- MemoryContext oldcontext = CurrentMemoryContext;
- dsa_handle handle;
-
- MemoryContextSwitchTo(TopMemoryContext);
-
- MemoryStatsDsaArea = dsa_create(memCxtArea->lw_lock.tranche);
-
- handle = dsa_get_handle(MemoryStatsDsaArea);
- MemoryContextSwitchTo(oldcontext);
-
- dsa_pin_mapping(MemoryStatsDsaArea);
-
- /*
- * Pin the DSA area, this is to make sure the area remains attachable
- * even if the backend that created it exits. This is done so that the
- * statistics are published even if the process exits while a client
- * is waiting. Also, other processes that publish statistics will use
- * the same area.
- */
- dsa_pin(MemoryStatsDsaArea);
-
- /* Set the handle in shared memory */
- memCxtArea->memstats_dsa_handle = handle;
- }
-
- /*
- * If DSA exists, created by another process publishing statistics, attach
- * to it.
- */
- else if (MemoryStatsDsaArea == NULL)
- {
- MemoryContext oldcontext = CurrentMemoryContext;
-
- MemoryContextSwitchTo(TopMemoryContext);
- MemoryStatsDsaArea = dsa_attach(memCxtArea->memstats_dsa_handle);
- MemoryContextSwitchTo(oldcontext);
- dsa_pin_mapping(MemoryStatsDsaArea);
- }
- LWLockRelease(&memCxtArea->lw_lock);
-
- /*
- * Hold the process lock to protect writes to process specific memory. Two
- * processes publishing statistics do not block each other.
- */
- LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE);
- memCxtState[idx].proc_id = MyProcPid;
-
- if (DsaPointerIsValid(memCxtState[idx].memstats_dsa_pointer))
- {
- /*
- * Free any previous allocations, free the name, ident and path
- * pointers before freeing the pointer that contains them.
- */
- free_memorycontextstate_dsa(MemoryStatsDsaArea, memCxtState[idx].total_stats,
- memCxtState[idx].memstats_dsa_pointer);
- }
-
- /*
- * Assigning total stats before allocating memory so that memory cleanup
- * can run if any subsequent dsa_allocate call to allocate name/ident/path
- * fails.
- */
- memCxtState[idx].total_stats = stats_num;
- memCxtState[idx].memstats_dsa_pointer =
- dsa_allocate0(MemoryStatsDsaArea, stats_num * sizeof(MemoryStatsEntry));
-
- meminfo = (MemoryStatsEntry *)
- dsa_get_address(MemoryStatsDsaArea, memCxtState[idx].memstats_dsa_pointer);
-
- if (summary)
- {
- int cxt_id = 0;
- List *path = NIL;
-
- /* Copy TopMemoryContext statistics to DSA */
- memset(&stat, 0, sizeof(stat));
- (*TopMemoryContext->methods->stats) (TopMemoryContext, NULL, NULL,
- &stat, true);
- path = lcons_int(1, path);
- PublishMemoryContext(meminfo, cxt_id, TopMemoryContext, path, stat,
- 1, MemoryStatsDsaArea, 100);
- cxt_id = cxt_id + 1;
-
- /*
- * Copy statistics for each of TopMemoryContexts children. This
- * includes statistics of at most 100 children per node, with each
- * child node limited to a depth of 100 in its subtree.
- */
- for (MemoryContext c = TopMemoryContext->firstchild; c != NULL;
- c = c->nextchild)
- {
- MemoryContextCounters grand_totals;
- int num_contexts = 0;
-
- path = NIL;
- memset(&grand_totals, 0, sizeof(grand_totals));
-
- MemoryContextStatsInternal(c, 1, 100, 100, &grand_totals,
- PRINT_STATS_NONE, &num_contexts);
-
- path = compute_context_path(c, context_id_lookup);
-
- /*
- * Register the stats entry first, that way the cleanup handler
- * can reach it in case of allocation failures of one or more
- * members.
- */
- memCxtState[idx].total_stats = cxt_id++;
- PublishMemoryContext(meminfo, cxt_id, c, path,
- grand_totals, num_contexts, MemoryStatsDsaArea, 100);
- }
- memCxtState[idx].total_stats = cxt_id;
-
- /* Notify waiting backends and return */
- end_memorycontext_reporting();
-
- hash_destroy(context_id_lookup);
-
- return;
- }
-
- foreach_ptr(MemoryContextData, cur, contexts)
- {
- List *path = NIL;
-
- /*
- * Figure out the transient context_id of this context and each of its
- * ancestors, to compute a path for this context.
- */
- path = compute_context_path(cur, context_id_lookup);
-
- /* Examine the context stats */
- memset(&stat, 0, sizeof(stat));
- (*cur->methods->stats) (cur, NULL, NULL, &stat, true);
-
- /* Account for saving one statistics slot for cumulative reporting */
- if (context_id < (max_stats - 1) || stats_count <= max_stats)
- {
- /* Copy statistics to DSA memory */
- PublishMemoryContext(meminfo, context_id, cur, path, stat, 1, MemoryStatsDsaArea, 100);
- }
- else
- {
- meminfo[max_stats - 1].totalspace += stat.totalspace;
- meminfo[max_stats - 1].nblocks += stat.nblocks;
- meminfo[max_stats - 1].freespace += stat.freespace;
- meminfo[max_stats - 1].freechunks += stat.freechunks;
- }
-
- /*
- * DSA max limit per process is reached, write aggregate of the
- * remaining statistics.
- *
- * We can store contexts from 0 to max_stats - 1. When stats_count is
- * greater than max_stats, we stop reporting individual statistics
- * when context_id equals max_stats - 2. As we use max_stats - 1 array
- * slot for reporting cumulative statistics or "Remaining Totals".
- */
- if (stats_count > max_stats && context_id == (max_stats - 2))
- {
- char *nameptr;
- int namelen = strlen("Remaining Totals");
-
- num_individual_stats = context_id + 1;
- meminfo[max_stats - 1].name = dsa_allocate(MemoryStatsDsaArea, namelen + 1);
- nameptr = dsa_get_address(MemoryStatsDsaArea, meminfo[max_stats - 1].name);
- strlcpy(nameptr, "Remaining Totals", namelen + 1);
- meminfo[max_stats - 1].ident = InvalidDsaPointer;
- meminfo[max_stats - 1].path = InvalidDsaPointer;
- meminfo[max_stats - 1].type = 0;
- }
- context_id++;
- }
-
- /*
- * Statistics are not aggregated, i.e individual statistics reported when
- * stats_count <= max_stats.
- */
- if (stats_count <= max_stats)
- {
- memCxtState[idx].total_stats = context_id;
- }
- /* Report number of aggregated memory contexts */
- else
- {
- meminfo[max_stats - 1].num_agg_stats = context_id -
- num_individual_stats;
-
- /*
- * Total stats equals num_individual_stats + 1 record for cumulative
- * statistics.
- */
- memCxtState[idx].total_stats = num_individual_stats + 1;
- }
-
- /* Notify waiting backends and return */
- end_memorycontext_reporting();
-
- hash_destroy(context_id_lookup);
-}
-
-/*
- * Update timestamp and signal all the waiting client backends after copying
- * all the statistics.
- */
-static void
-end_memorycontext_reporting(void)
-{
- memCxtState[MyProcNumber].stats_timestamp = GetCurrentTimestamp();
- LWLockRelease(&memCxtState[MyProcNumber].lw_lock);
- ConditionVariableBroadcast(&memCxtState[MyProcNumber].memcxt_cv);
-}
-
-/*
- * compute_context_path
- *
- * Append the transient context_id of this context and each of its ancestors
- * to a list, in order to compute a path.
- */
-static List *
-compute_context_path(MemoryContext c, HTAB *context_id_lookup)
-{
- bool found;
- List *path = NIL;
- MemoryContext cur_context;
-
- for (cur_context = c; cur_context != NULL; cur_context = cur_context->parent)
- {
- MemoryStatsContextId *cur_entry;
-
- cur_entry = hash_search(context_id_lookup, &cur_context, HASH_FIND, &found);
-
- if (!found)
- elog(ERROR, "hash table corrupted, can't construct path value");
-
- path = lcons_int(cur_entry->context_id, path);
- }
-
- return path;
-}
-
-/*
- * Return the number of contexts allocated currently by the backend
- * Assign context ids to each of the contexts.
- */
-static void
-compute_contexts_count_and_ids(List *contexts, HTAB *context_id_lookup,
- int *stats_count, bool summary)
-{
- foreach_ptr(MemoryContextData, cur, contexts)
- {
- MemoryStatsContextId *entry;
- bool found;
-
- entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &cur,
- HASH_ENTER, &found);
- Assert(!found);
-
- /*
- * context id starts with 1 so increment the stats_count before
- * assigning.
- */
- entry->context_id = ++(*stats_count);
-
- /* Append the children of the current context to the main list. */
- for (MemoryContext c = cur->firstchild; c != NULL; c = c->nextchild)
- {
- if (summary)
- {
- entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &c,
- HASH_ENTER, &found);
- Assert(!found);
-
- entry->context_id = ++(*stats_count);
- }
-
- contexts = lappend(contexts, c);
- }
-
- /*
- * In summary mode only the first two level (from top) contexts are
- * displayed.
- */
- if (summary)
- break;
- }
-}
-
-/*
- * PublishMemoryContext
- *
- * Copy the memory context statistics of a single context to a DSA memory
- */
-static void
-PublishMemoryContext(MemoryStatsEntry *memcxt_info, int curr_id,
- MemoryContext context, List *path,
- MemoryContextCounters stat, int num_contexts,
- dsa_area *area, int max_levels)
-{
- const char *ident = context->ident;
- const char *name = context->name;
- int *path_list;
-
- /*
- * To be consistent with logging output, we label dynahash contexts with
- * just the hash table name as with MemoryContextStatsPrint().
- */
- if (context->ident && strncmp(context->name, "dynahash", 8) == 0)
- {
- name = context->ident;
- ident = NULL;
- }
-
- if (name != NULL)
- {
- int namelen = strlen(name);
- char *nameptr;
-
- if (strlen(name) >= MEMORY_CONTEXT_IDENT_SHMEM_SIZE)
- namelen = pg_mbcliplen(name, namelen,
- MEMORY_CONTEXT_IDENT_SHMEM_SIZE - 1);
-
- memcxt_info[curr_id].name = dsa_allocate(area, namelen + 1);
- nameptr = (char *) dsa_get_address(area, memcxt_info[curr_id].name);
- strlcpy(nameptr, name, namelen + 1);
- }
- else
- memcxt_info[curr_id].name = InvalidDsaPointer;
-
- /* Trim and copy the identifier if it is not set to NULL */
- if (ident != NULL)
- {
- int idlen = strlen(context->ident);
- char *identptr;
-
- /*
- * Some identifiers such as SQL query string can be very long,
- * truncate oversize identifiers.
- */
- if (idlen >= MEMORY_CONTEXT_IDENT_SHMEM_SIZE)
- idlen = pg_mbcliplen(ident, idlen,
- MEMORY_CONTEXT_IDENT_SHMEM_SIZE - 1);
-
- memcxt_info[curr_id].ident = dsa_allocate(area, idlen + 1);
- identptr = (char *) dsa_get_address(area, memcxt_info[curr_id].ident);
- strlcpy(identptr, ident, idlen + 1);
- }
- else
- memcxt_info[curr_id].ident = InvalidDsaPointer;
-
- /* Allocate DSA memory for storing path information */
- if (path == NIL)
- memcxt_info[curr_id].path = InvalidDsaPointer;
- else
- {
- int levels = Min(list_length(path), max_levels);
-
- memcxt_info[curr_id].path_length = levels;
- memcxt_info[curr_id].path = dsa_allocate0(area, levels * sizeof(int));
- memcxt_info[curr_id].levels = list_length(path);
- path_list = (int *) dsa_get_address(area, memcxt_info[curr_id].path);
-
- foreach_int(i, path)
- {
- path_list[foreach_current_index(i)] = i;
- if (--levels == 0)
- break;
- }
- }
- memcxt_info[curr_id].type = context->type;
- memcxt_info[curr_id].totalspace = stat.totalspace;
- memcxt_info[curr_id].nblocks = stat.nblocks;
- memcxt_info[curr_id].freespace = stat.freespace;
- memcxt_info[curr_id].freechunks = stat.freechunks;
- memcxt_info[curr_id].num_agg_stats = num_contexts;
-}
-
-/*
- * free_memorycontextstate_dsa
- *
- * Worker for freeing resources from a MemoryStatsEntry. Callers are
- * responsible for ensuring that the DSA pointer is valid.
- */
-static void
-free_memorycontextstate_dsa(dsa_area *area, int total_stats,
- dsa_pointer prev_dsa_pointer)
-{
- MemoryStatsEntry *meminfo;
-
- meminfo = (MemoryStatsEntry *) dsa_get_address(area, prev_dsa_pointer);
- Assert(meminfo != NULL);
- for (int i = 0; i < total_stats; i++)
- {
- if (DsaPointerIsValid(meminfo[i].name))
- dsa_free(area, meminfo[i].name);
-
- if (DsaPointerIsValid(meminfo[i].ident))
- dsa_free(area, meminfo[i].ident);
-
- if (DsaPointerIsValid(meminfo[i].path))
- dsa_free(area, meminfo[i].path);
- }
-
- dsa_free(area, memCxtState[MyProcNumber].memstats_dsa_pointer);
- memCxtState[MyProcNumber].memstats_dsa_pointer = InvalidDsaPointer;
-}
-
-/*
- * Free the memory context statistics stored by this process
- * in DSA area.
- */
-void
-AtProcExit_memstats_cleanup(int code, Datum arg)
-{
- int idx = MyProcNumber;
-
- if (memCxtArea->memstats_dsa_handle == DSA_HANDLE_INVALID)
- return;
-
- LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE);
-
- if (!DsaPointerIsValid(memCxtState[idx].memstats_dsa_pointer))
- {
- LWLockRelease(&memCxtState[idx].lw_lock);
- return;
- }
-
- /* If the dsa mapping could not be found, attach to the area */
- if (MemoryStatsDsaArea == NULL)
- MemoryStatsDsaArea = dsa_attach(memCxtArea->memstats_dsa_handle);
-
- /*
- * Free the memory context statistics, free the name, ident and path
- * pointers before freeing the pointer that contains these pointers and
- * integer statistics.
- */
- free_memorycontextstate_dsa(MemoryStatsDsaArea, memCxtState[idx].total_stats,
- memCxtState[idx].memstats_dsa_pointer);
-
- dsa_detach(MemoryStatsDsaArea);
- LWLockRelease(&memCxtState[idx].lw_lock);
-}
-
void *
palloc(Size size)
{
diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c
index e3526e78064..0be1c2b0fff 100644
--- a/src/backend/utils/mmgr/portalmem.c
+++ b/src/backend/utils/mmgr/portalmem.c
@@ -284,8 +284,7 @@ PortalDefineQuery(Portal portal,
const char *sourceText,
CommandTag commandTag,
List *stmts,
- CachedPlan *cplan,
- CachedPlanSource *plansource)
+ CachedPlan *cplan)
{
Assert(PortalIsValid(portal));
Assert(portal->status == PORTAL_NEW);
@@ -300,7 +299,6 @@ PortalDefineQuery(Portal portal,
portal->commandTag = commandTag;
portal->stmts = stmts;
portal->cplan = cplan;
- portal->plansource = plansource;
portal->status = PORTAL_DEFINED;
}