diff options
Diffstat (limited to 'src')
417 files changed, 10816 insertions, 7431 deletions
diff --git a/src/Makefile.shlib b/src/Makefile.shlib index fa81f6ffdd6..3825af5b228 100644 --- a/src/Makefile.shlib +++ b/src/Makefile.shlib @@ -112,7 +112,7 @@ ifeq ($(PORTNAME), darwin) ifneq ($(SO_MAJOR_VERSION), 0) version_link = -compatibility_version $(SO_MAJOR_VERSION) -current_version $(SO_MAJOR_VERSION).$(SO_MINOR_VERSION) endif - LINK.shared = $(COMPILER) -dynamiclib -install_name '$(libdir)/lib$(NAME).$(SO_MAJOR_VERSION)$(DLSUFFIX)' $(version_link) $(exported_symbols_list) + LINK.shared = $(COMPILER) -dynamiclib -install_name '$(libdir)/lib$(NAME).$(SO_MAJOR_VERSION)$(DLSUFFIX)' $(version_link) shlib = lib$(NAME).$(SO_MAJOR_VERSION)$(DLSUFFIX) shlib_major = lib$(NAME).$(SO_MAJOR_VERSION)$(DLSUFFIX) else @@ -122,7 +122,7 @@ ifeq ($(PORTNAME), darwin) BUILD.exports = $(AWK) '/^[^\#]/ {printf "_%s\n",$$1}' $< >$@ exports_file = $(SHLIB_EXPORTS:%.txt=%.list) ifneq (,$(exports_file)) - exported_symbols_list = -exported_symbols_list $(exports_file) + LINK.shared += -exported_symbols_list $(exports_file) endif endif diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 01e1db7f856..4204088fa0d 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -68,7 +68,7 @@ typedef struct BrinShared int scantuplesortstates; /* Query ID, for report in worker processes */ - uint64 queryid; + int64 queryid; /* * workersdonecv is used to monitor the progress of workers. All parallel diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index 46c1dce222d..50747c16396 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -1243,8 +1243,9 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace, } else { - text *t; + const char *name; const char *value; + text *t; Size len; /* @@ -1291,11 +1292,19 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace, * have just "name", assume "name=true" is meant. Note: the * namespace is not output. */ + name = def->defname; if (def->arg != NULL) value = defGetString(def); else value = "true"; + /* Insist that name not contain "=", else "a=b=c" is ambiguous */ + if (strchr(name, '=') != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid option name \"%s\": must not contain \"=\"", + name))); + /* * This is not a great place for this test, but there's no other * convenient place to filter the option out. As WITH (oids = @@ -1303,7 +1312,7 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace, * amount of ugly. */ if (acceptOidsOff && def->defnamespace == NULL && - strcmp(def->defname, "oids") == 0) + strcmp(name, "oids") == 0) { if (defGetBoolean(def)) ereport(ERROR, @@ -1313,11 +1322,11 @@ transformRelOptions(Datum oldOptions, List *defList, const char *namspace, continue; } - len = VARHDRSZ + strlen(def->defname) + 1 + strlen(value); + len = VARHDRSZ + strlen(name) + 1 + strlen(value); /* +1 leaves room for sprintf's trailing null */ t = (text *) palloc(len + 1); SET_VARSIZE(t, len); - sprintf(VARDATA(t), "%s=%s", def->defname, value); + sprintf(VARDATA(t), "%s=%s", name, value); astate = accumArrayResult(astate, PointerGetDatum(t), false, TEXTOID, diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c index ffd0c78f905..020d00cd01c 100644 --- a/src/backend/access/common/tupdesc.c +++ b/src/backend/access/common/tupdesc.c @@ -142,11 +142,18 @@ void verify_compact_attribute(TupleDesc tupdesc, int attnum) { #ifdef USE_ASSERT_CHECKING - CompactAttribute *cattr = &tupdesc->compact_attrs[attnum]; + CompactAttribute cattr; Form_pg_attribute attr = TupleDescAttr(tupdesc, attnum); CompactAttribute tmp; /* + * Make a temp copy of the TupleDesc's CompactAttribute. This may be a + * shared TupleDesc and the attcacheoff might get changed by another + * backend. + */ + memcpy(&cattr, &tupdesc->compact_attrs[attnum], sizeof(CompactAttribute)); + + /* * Populate the temporary CompactAttribute from the corresponding * Form_pg_attribute */ @@ -156,11 +163,11 @@ verify_compact_attribute(TupleDesc tupdesc, int attnum) * Make the attcacheoff match since it's been reset to -1 by * populate_compact_attribute_internal. Same with attnullability. */ - tmp.attcacheoff = cattr->attcacheoff; - tmp.attnullability = cattr->attnullability; + tmp.attcacheoff = cattr.attcacheoff; + tmp.attnullability = cattr.attnullability; /* Check the freshly populated CompactAttribute matches the TupleDesc's */ - Assert(memcmp(&tmp, cattr, sizeof(CompactAttribute)) == 0); + Assert(memcmp(&tmp, &cattr, sizeof(CompactAttribute)) == 0); #endif } diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c index a6b701943d3..c0aa7d0222f 100644 --- a/src/backend/access/gist/gistutil.c +++ b/src/backend/access/gist/gistutil.c @@ -1058,11 +1058,11 @@ gistGetFakeLSN(Relation rel) } /* - * This is a stratnum support function for GiST opclasses that use the - * RT*StrategyNumber constants. + * This is a stratnum translation support function for GiST opclasses that use + * the RT*StrategyNumber constants. */ Datum -gist_stratnum_common(PG_FUNCTION_ARGS) +gist_translate_cmptype_common(PG_FUNCTION_ARGS) { CompareType cmptype = PG_GETARG_INT32(0); @@ -1090,9 +1090,9 @@ gist_stratnum_common(PG_FUNCTION_ARGS) /* * Returns the opclass's private stratnum used for the given compare type. * - * Calls the opclass's GIST_STRATNUM_PROC support function, if any, - * and returns the result. - * Returns InvalidStrategy if the function is not defined. + * Calls the opclass's GIST_TRANSLATE_CMPTYPE_PROC support function, if any, + * and returns the result. Returns InvalidStrategy if the function is not + * defined. */ StrategyNumber gisttranslatecmptype(CompareType cmptype, Oid opfamily) @@ -1101,7 +1101,7 @@ gisttranslatecmptype(CompareType cmptype, Oid opfamily) Datum result; /* Check whether the function is provided. */ - funcid = get_opfamily_proc(opfamily, ANYOID, ANYOID, GIST_STRATNUM_PROC); + funcid = get_opfamily_proc(opfamily, ANYOID, ANYOID, GIST_TRANSLATE_CMPTYPE_PROC); if (!OidIsValid(funcid)) return InvalidStrategy; diff --git a/src/backend/access/gist/gistvalidate.c b/src/backend/access/gist/gistvalidate.c index 2a49e6d20f0..2ed6f74fce9 100644 --- a/src/backend/access/gist/gistvalidate.c +++ b/src/backend/access/gist/gistvalidate.c @@ -138,7 +138,7 @@ gistvalidate(Oid opclassoid) ok = check_amproc_signature(procform->amproc, VOIDOID, true, 1, 1, INTERNALOID); break; - case GIST_STRATNUM_PROC: + case GIST_TRANSLATE_CMPTYPE_PROC: ok = check_amproc_signature(procform->amproc, INT2OID, true, 1, 1, INT4OID) && procform->amproclefttype == ANYOID && @@ -265,7 +265,7 @@ gistvalidate(Oid opclassoid) if (i == GIST_DISTANCE_PROC || i == GIST_FETCH_PROC || i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC || i == GIST_OPTIONS_PROC || i == GIST_SORTSUPPORT_PROC || - i == GIST_STRATNUM_PROC) + i == GIST_TRANSLATE_CMPTYPE_PROC) continue; /* optional methods */ ereport(INFO, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), @@ -336,7 +336,7 @@ gistadjustmembers(Oid opfamilyoid, case GIST_FETCH_PROC: case GIST_OPTIONS_PROC: case GIST_SORTSUPPORT_PROC: - case GIST_STRATNUM_PROC: + case GIST_TRANSLATE_CMPTYPE_PROC: /* Optional, so force it to be a soft family dependency */ op->ref_is_hard = false; op->ref_is_family = true; diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 9ec8cda1c68..0dcd6ee817e 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -213,6 +213,27 @@ static const int MultiXactStatusLock[MaxMultiXactStatus + 1] = #define TUPLOCK_from_mxstatus(status) \ (MultiXactStatusLock[(status)]) +/* + * Check that we have a valid snapshot if we might need TOAST access. + */ +static inline void +AssertHasSnapshotForToast(Relation rel) +{ +#ifdef USE_ASSERT_CHECKING + + /* bootstrap mode in particular breaks this rule */ + if (!IsNormalProcessingMode()) + return; + + /* if the relation doesn't have a TOAST table, we are good */ + if (!OidIsValid(rel->rd_rel->reltoastrelid)) + return; + + Assert(HaveRegisteredOrActiveSnapshot()); + +#endif /* USE_ASSERT_CHECKING */ +} + /* ---------------------------------------------------------------- * heap support routines * ---------------------------------------------------------------- @@ -2066,6 +2087,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, Assert(HeapTupleHeaderGetNatts(tup->t_data) <= RelationGetNumberOfAttributes(relation)); + AssertHasSnapshotForToast(relation); + /* * Fill in tuple header fields and toast the tuple if necessary. * @@ -2343,6 +2366,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, /* currently not needed (thus unsupported) for heap_multi_insert() */ Assert(!(options & HEAP_INSERT_NO_LOGICAL)); + AssertHasSnapshotForToast(relation); + needwal = RelationNeedsWAL(relation); saveFreeSpace = RelationGetTargetPageFreeSpace(relation, HEAP_DEFAULT_FILLFACTOR); @@ -2765,6 +2790,8 @@ heap_delete(Relation relation, ItemPointer tid, Assert(ItemPointerIsValid(tid)); + AssertHasSnapshotForToast(relation); + /* * Forbid this during a parallel operation, lest it allocate a combo CID. * Other workers might need that combo CID for visibility checks, and we @@ -3260,6 +3287,8 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, Assert(HeapTupleHeaderGetNatts(newtup->t_data) <= RelationGetNumberOfAttributes(relation)); + AssertHasSnapshotForToast(relation); + /* * Forbid this during a parallel operation, lest it allocate a combo CID. * Other workers might need that combo CID for visibility checks, and we @@ -4953,7 +4982,7 @@ l3: case LockWaitError: if (!ConditionalMultiXactIdWait((MultiXactId) xwait, status, infomask, relation, - NULL, log_lock_failure)) + NULL, log_lock_failures)) ereport(ERROR, (errcode(ERRCODE_LOCK_NOT_AVAILABLE), errmsg("could not obtain lock on row in relation \"%s\"", @@ -4991,7 +5020,7 @@ l3: } break; case LockWaitError: - if (!ConditionalXactLockTableWait(xwait, log_lock_failure)) + if (!ConditionalXactLockTableWait(xwait, log_lock_failures)) ereport(ERROR, (errcode(ERRCODE_LOCK_NOT_AVAILABLE), errmsg("could not obtain lock on row in relation \"%s\"", @@ -5256,7 +5285,7 @@ heap_acquire_tuplock(Relation relation, ItemPointer tid, LockTupleMode mode, break; case LockWaitError: - if (!ConditionalLockTupleTuplock(relation, tid, mode, log_lock_failure)) + if (!ConditionalLockTupleTuplock(relation, tid, mode, log_lock_failures)) ereport(ERROR, (errcode(ERRCODE_LOCK_NOT_AVAILABLE), errmsg("could not obtain lock on row in relation \"%s\"", diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index ac082fefa77..cb4bc35c93e 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -464,7 +464,7 @@ tuple_lock_retry: return TM_WouldBlock; break; case LockWaitError: - if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, log_lock_failure)) + if (!ConditionalXactLockTableWait(SnapshotDirty.xmax, log_lock_failures)) ereport(ERROR, (errcode(ERRCODE_LOCK_NOT_AVAILABLE), errmsg("could not obtain lock on row in relation \"%s\"", diff --git a/src/backend/access/heap/heapam_xlog.c b/src/backend/access/heap/heapam_xlog.c index 30f4c2d3c67..eb4bd3d6ae3 100644 --- a/src/backend/access/heap/heapam_xlog.c +++ b/src/backend/access/heap/heapam_xlog.c @@ -438,6 +438,9 @@ heap_xlog_insert(XLogReaderState *record) ItemPointerSetBlockNumber(&target_tid, blkno); ItemPointerSetOffsetNumber(&target_tid, xlrec->offnum); + /* No freezing in the heap_insert() code path */ + Assert(!(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)); + /* * The visibility map may need to be fixed even if the heap page is * already up-to-date. @@ -508,10 +511,6 @@ heap_xlog_insert(XLogReaderState *record) if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) PageClearAllVisible(page); - /* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */ - if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET) - PageSetAllVisible(page); - MarkBufferDirty(buffer); } if (BufferIsValid(buffer)) diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index f28326bad09..14036c27e87 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -423,7 +423,7 @@ typedef struct LVSavedErrInfo /* non-export function prototypes */ static void lazy_scan_heap(LVRelState *vacrel); static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, - VacuumParams *params); + const VacuumParams params); static BlockNumber heap_vac_scan_next_block(ReadStream *stream, void *callback_private_data, void *per_buffer_data); @@ -431,7 +431,7 @@ static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis); static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool sharelock, Buffer vmbuffer); -static void lazy_scan_prune(LVRelState *vacrel, Buffer buf, +static int lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, Buffer vmbuffer, bool all_visible_according_to_vm, bool *has_lpdead_items, bool *vm_page_frozen); @@ -485,7 +485,7 @@ static void restore_vacuum_error_info(LVRelState *vacrel, * vacuum options or for relfrozenxid/relminmxid advancement. */ static void -heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params) +heap_vacuum_eager_scan_setup(LVRelState *vacrel, const VacuumParams params) { uint32 randseed; BlockNumber allvisible; @@ -504,7 +504,7 @@ heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params) vacrel->eager_scan_remaining_successes = 0; /* If eager scanning is explicitly disabled, just return. */ - if (params->max_eager_freeze_failure_rate == 0) + if (params.max_eager_freeze_failure_rate == 0) return; /* @@ -581,11 +581,11 @@ heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params) vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE; - Assert(params->max_eager_freeze_failure_rate > 0 && - params->max_eager_freeze_failure_rate <= 1); + Assert(params.max_eager_freeze_failure_rate > 0 && + params.max_eager_freeze_failure_rate <= 1); vacrel->eager_scan_max_fails_per_region = - params->max_eager_freeze_failure_rate * + params.max_eager_freeze_failure_rate * EAGER_SCAN_REGION_SIZE; /* @@ -612,7 +612,7 @@ heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params) * and locked the relation. */ void -heap_vacuum_rel(Relation rel, VacuumParams *params, +heap_vacuum_rel(Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy) { LVRelState *vacrel; @@ -634,9 +634,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, ErrorContextCallback errcallback; char **indnames = NULL; - verbose = (params->options & VACOPT_VERBOSE) != 0; + verbose = (params.options & VACOPT_VERBOSE) != 0; instrument = (verbose || (AmAutoVacuumWorkerProcess() && - params->log_min_duration >= 0)); + params.log_min_duration >= 0)); if (instrument) { pg_rusage_init(&ru0); @@ -699,9 +699,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, * The truncate param allows user to avoid attempting relation truncation, * though it can't force truncation to happen. */ - Assert(params->index_cleanup != VACOPTVALUE_UNSPECIFIED); - Assert(params->truncate != VACOPTVALUE_UNSPECIFIED && - params->truncate != VACOPTVALUE_AUTO); + Assert(params.index_cleanup != VACOPTVALUE_UNSPECIFIED); + Assert(params.truncate != VACOPTVALUE_UNSPECIFIED && + params.truncate != VACOPTVALUE_AUTO); /* * While VacuumFailSafeActive is reset to false before calling this, we @@ -711,14 +711,14 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, vacrel->consider_bypass_optimization = true; vacrel->do_index_vacuuming = true; vacrel->do_index_cleanup = true; - vacrel->do_rel_truncate = (params->truncate != VACOPTVALUE_DISABLED); - if (params->index_cleanup == VACOPTVALUE_DISABLED) + vacrel->do_rel_truncate = (params.truncate != VACOPTVALUE_DISABLED); + if (params.index_cleanup == VACOPTVALUE_DISABLED) { /* Force disable index vacuuming up-front */ vacrel->do_index_vacuuming = false; vacrel->do_index_cleanup = false; } - else if (params->index_cleanup == VACOPTVALUE_ENABLED) + else if (params.index_cleanup == VACOPTVALUE_ENABLED) { /* Force index vacuuming. Note that failsafe can still bypass. */ vacrel->consider_bypass_optimization = false; @@ -726,7 +726,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, else { /* Default/auto, make all decisions dynamically */ - Assert(params->index_cleanup == VACOPTVALUE_AUTO); + Assert(params.index_cleanup == VACOPTVALUE_AUTO); } /* Initialize page counters explicitly (be tidy) */ @@ -757,7 +757,6 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, vacrel->vm_new_visible_pages = 0; vacrel->vm_new_visible_frozen_pages = 0; vacrel->vm_new_frozen_pages = 0; - vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel); /* * Get cutoffs that determine which deleted tuples are considered DEAD, @@ -776,7 +775,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, * to increase the number of dead tuples it can prune away.) */ vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs); + vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel); vacrel->vistest = GlobalVisTestFor(rel); + /* Initialize state used to track oldest extant XID/MXID */ vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin; vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact; @@ -788,7 +789,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, */ vacrel->skippedallvis = false; skipwithvm = true; - if (params->options & VACOPT_DISABLE_PAGE_SKIPPING) + if (params.options & VACOPT_DISABLE_PAGE_SKIPPING) { /* * Force aggressive mode, and disable skipping blocks using the @@ -829,7 +830,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, * is already dangerously old.) */ lazy_check_wraparound_failsafe(vacrel); - dead_items_alloc(vacrel, params->nworkers); + dead_items_alloc(vacrel, params.nworkers); /* * Call lazy_scan_heap to perform all required heap pruning, index @@ -946,9 +947,9 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, { TimestampTz endtime = GetCurrentTimestamp(); - if (verbose || params->log_min_duration == 0 || + if (verbose || params.log_min_duration == 0 || TimestampDifferenceExceeds(starttime, endtime, - params->log_min_duration)) + params.log_min_duration)) { long secs_dur; int usecs_dur; @@ -983,10 +984,10 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, * Aggressiveness already reported earlier, in dedicated * VACUUM VERBOSE ereport */ - Assert(!params->is_wraparound); + Assert(!params.is_wraparound); msgfmt = _("finished vacuuming \"%s.%s.%s\": index scans: %d\n"); } - else if (params->is_wraparound) + else if (params.is_wraparound) { /* * While it's possible for a VACUUM to be both is_wraparound @@ -1244,6 +1245,7 @@ lazy_scan_heap(LVRelState *vacrel) Buffer buf; Page page; uint8 blk_info = 0; + int ndeleted = 0; bool has_lpdead_items; void *per_buffer_data = NULL; bool vm_page_frozen = false; @@ -1386,10 +1388,10 @@ lazy_scan_heap(LVRelState *vacrel) * line pointers previously marked LP_DEAD. */ if (got_cleanup_lock) - lazy_scan_prune(vacrel, buf, blkno, page, - vmbuffer, - blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM, - &has_lpdead_items, &vm_page_frozen); + ndeleted = lazy_scan_prune(vacrel, buf, blkno, page, + vmbuffer, + blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM, + &has_lpdead_items, &vm_page_frozen); /* * Count an eagerly scanned page as a failure or a success. @@ -1413,12 +1415,26 @@ lazy_scan_heap(LVRelState *vacrel) if (vm_page_frozen) { - Assert(vacrel->eager_scan_remaining_successes > 0); - vacrel->eager_scan_remaining_successes--; + if (vacrel->eager_scan_remaining_successes > 0) + vacrel->eager_scan_remaining_successes--; if (vacrel->eager_scan_remaining_successes == 0) { /* + * Report only once that we disabled eager scanning. We + * may eagerly read ahead blocks in excess of the success + * or failure caps before attempting to freeze them, so we + * could reach here even after disabling additional eager + * scanning. + */ + if (vacrel->eager_scan_max_fails_per_region > 0) + ereport(vacrel->verbose ? INFO : DEBUG2, + (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of relation \"%s.%s.%s\"", + orig_eager_scan_success_limit, + vacrel->dbname, vacrel->relnamespace, + vacrel->relname))); + + /* * If we hit our success cap, permanently disable eager * scanning by setting the other eager scan management * fields to their disabled values. @@ -1426,19 +1442,10 @@ lazy_scan_heap(LVRelState *vacrel) vacrel->eager_scan_remaining_fails = 0; vacrel->next_eager_scan_region_start = InvalidBlockNumber; vacrel->eager_scan_max_fails_per_region = 0; - - ereport(vacrel->verbose ? INFO : DEBUG2, - (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of \"%s.%s.%s\"", - orig_eager_scan_success_limit, - vacrel->dbname, vacrel->relnamespace, - vacrel->relname))); } } - else - { - Assert(vacrel->eager_scan_remaining_fails > 0); + else if (vacrel->eager_scan_remaining_fails > 0) vacrel->eager_scan_remaining_fails--; - } } /* @@ -1475,7 +1482,7 @@ lazy_scan_heap(LVRelState *vacrel) * table has indexes. There will only be newly-freed space if we * held the cleanup lock and lazy_scan_prune() was called. */ - if (got_cleanup_lock && vacrel->nindexes == 0 && has_lpdead_items && + if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 && blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES) { FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, @@ -1866,8 +1873,6 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, */ if (!PageIsAllVisible(page)) { - uint8 old_vmbits; - START_CRIT_SECTION(); /* mark buffer dirty before writing a WAL record */ @@ -1887,24 +1892,16 @@ lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, log_newpage_buffer(buf, true); PageSetAllVisible(page); - old_vmbits = visibilitymap_set(vacrel->rel, blkno, buf, - InvalidXLogRecPtr, - vmbuffer, InvalidTransactionId, - VISIBILITYMAP_ALL_VISIBLE | - VISIBILITYMAP_ALL_FROZEN); + visibilitymap_set(vacrel->rel, blkno, buf, + InvalidXLogRecPtr, + vmbuffer, InvalidTransactionId, + VISIBILITYMAP_ALL_VISIBLE | + VISIBILITYMAP_ALL_FROZEN); END_CRIT_SECTION(); - /* - * If the page wasn't already set all-visible and/or all-frozen in - * the VM, count it as newly set for logging. - */ - if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0) - { - vacrel->vm_new_visible_pages++; - vacrel->vm_new_visible_frozen_pages++; - } - else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0) - vacrel->vm_new_frozen_pages++; + /* Count the newly all-frozen pages for logging */ + vacrel->vm_new_visible_pages++; + vacrel->vm_new_visible_frozen_pages++; } freespace = PageGetHeapFreeSpace(page); @@ -1940,8 +1937,10 @@ cmpOffsetNumbers(const void *a, const void *b) * *vm_page_frozen is set to true if the page is newly set all-frozen in the * VM. The caller currently only uses this for determining whether an eagerly * scanned page was successfully set all-frozen. + * + * Returns the number of tuples deleted from the page during HOT pruning. */ -static void +static int lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, @@ -2212,6 +2211,8 @@ lazy_scan_prune(LVRelState *vacrel, *vm_page_frozen = true; } } + + return presult.ndeleted; } /* @@ -2909,7 +2910,6 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, if (heap_page_is_all_visible(vacrel, buffer, &visibility_cutoff_xid, &all_frozen)) { - uint8 old_vmbits; uint8 flags = VISIBILITYMAP_ALL_VISIBLE; if (all_frozen) @@ -2919,25 +2919,15 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer, } PageSetAllVisible(page); - old_vmbits = visibilitymap_set(vacrel->rel, blkno, buffer, - InvalidXLogRecPtr, - vmbuffer, visibility_cutoff_xid, - flags); - - /* - * If the page wasn't already set all-visible and/or all-frozen in the - * VM, count it as newly set for logging. - */ - if ((old_vmbits & VISIBILITYMAP_ALL_VISIBLE) == 0) - { - vacrel->vm_new_visible_pages++; - if (all_frozen) - vacrel->vm_new_visible_frozen_pages++; - } + visibilitymap_set(vacrel->rel, blkno, buffer, + InvalidXLogRecPtr, + vmbuffer, visibility_cutoff_xid, + flags); - else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 && - all_frozen) - vacrel->vm_new_frozen_pages++; + /* Count the newly set VM page for logging */ + vacrel->vm_new_visible_pages++; + if (all_frozen) + vacrel->vm_new_visible_frozen_pages++; } /* Revert to the previous phase information for error traceback */ diff --git a/src/backend/access/nbtree/nbtpreprocesskeys.c b/src/backend/access/nbtree/nbtpreprocesskeys.c index a136e4bbfdf..21c519cd108 100644 --- a/src/backend/access/nbtree/nbtpreprocesskeys.c +++ b/src/backend/access/nbtree/nbtpreprocesskeys.c @@ -16,6 +16,7 @@ #include "postgres.h" #include "access/nbtree.h" +#include "common/int.h" #include "lib/qunique.h" #include "utils/array.h" #include "utils/lsyscache.h" @@ -56,6 +57,8 @@ static void _bt_skiparray_strat_decrement(IndexScanDesc scan, ScanKey arraysk, BTArrayKeyInfo *array); static void _bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk, BTArrayKeyInfo *array); +static void _bt_unmark_keys(IndexScanDesc scan, int *keyDataMap); +static int _bt_reorder_array_cmp(const void *a, const void *b); static ScanKey _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys); static void _bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap); static int _bt_num_array_keys(IndexScanDesc scan, Oid *skip_eq_ops_out, @@ -96,7 +99,7 @@ static int _bt_compare_array_elements(const void *a, const void *b, void *arg); * incomplete sets of cross-type operators, we may fail to detect redundant * or contradictory keys, but we can survive that.) * - * The output keys must be sorted by index attribute. Presently we expect + * Required output keys are sorted by index attribute. Presently we expect * (but verify) that the input keys are already so sorted --- this is done * by match_clauses_to_index() in indxpath.c. Some reordering of the keys * within each attribute may be done as a byproduct of the processing here. @@ -127,29 +130,36 @@ static int _bt_compare_array_elements(const void *a, const void *b, void *arg); * This has the potential to be much more efficient than a full index scan * (though it behaves like a full scan when there's many distinct "x" values). * - * If possible, redundant keys are eliminated: we keep only the tightest + * Typically, redundant keys are eliminated: we keep only the tightest * >/>= bound and the tightest </<= bound, and if there's an = key then * that's the only one returned. (So, we return either a single = key, * or one or two boundary-condition keys for each attr.) However, if we * cannot compare two keys for lack of a suitable cross-type operator, - * we cannot eliminate either. If there are two such keys of the same - * operator strategy, the second one is just pushed into the output array - * without further processing here. We may also emit both >/>= or both - * </<= keys if we can't compare them. The logic about required keys still - * works if we don't eliminate redundant keys. - * - * Note that one reason we need direction-sensitive required-key flags is - * precisely that we may not be able to eliminate redundant keys. Suppose - * we have "x > 4::int AND x > 10::bigint", and we are unable to determine - * which key is more restrictive for lack of a suitable cross-type operator. - * _bt_first will arbitrarily pick one of the keys to do the initial - * positioning with. If it picks x > 4, then the x > 10 condition will fail - * until we reach index entries > 10; but we can't stop the scan just because - * x > 10 is failing. On the other hand, if we are scanning backwards, then - * failure of either key is indeed enough to stop the scan. (In general, when - * inequality keys are present, the initial-positioning code only promises to - * position before the first possible match, not exactly at the first match, - * for a forward scan; or after the last match for a backward scan.) + * we cannot eliminate either key. + * + * When all redundant keys could not be eliminated, we'll output a key array + * that can more or less be treated as if it had no redundant keys. Suppose + * we have "x > 4::int AND x > 10::bigint AND x < 70", and we are unable to + * determine which > key is more restrictive for lack of a suitable cross-type + * operator. We'll arbitrarily pick one of the > keys; the other > key won't + * be marked required. Obviously, the scan will be less efficient if we + * choose x > 4 over x > 10 -- but it can still largely proceed as if there + * was only a single > condition. "x > 10" will be placed at the end of the + * so->keyData[] output array. It'll always be evaluated last, after the keys + * that could be marked required in the usual way (after "x > 4 AND x < 70"). + * This can sometimes result in so->keyData[] keys that aren't even in index + * attribute order (if the qual involves multiple attributes). The scan's + * required keys will still be in attribute order, though, so it can't matter. + * + * This scheme ensures that _bt_first always uses the same set of keys at the + * start of a forwards scan as those _bt_checkkeys uses to determine when to + * end a similar backwards scan (and vice-versa). _bt_advance_array_keys + * depends on this: it expects to be able to reliably predict what the next + * _bt_first call will do by testing whether _bt_checkkeys' routines report + * that the final tuple on the page is past the end of matches for the scan's + * keys with the scan direction flipped. If it is (if continuescan=false), + * then it follows that calling _bt_first will, at a minimum, relocate the + * scan to the very next leaf page (in the current scan direction). * * As a byproduct of this work, we can detect contradictory quals such * as "x = 1 AND x > 2". If we see that, we return so->qual_ok = false, @@ -188,7 +198,8 @@ _bt_preprocess_keys(IndexScanDesc scan) int numberOfEqualCols; ScanKey inkeys; BTScanKeyPreproc xform[BTMaxStrategyNumber]; - bool test_result; + bool test_result, + redundant_key_kept = false; AttrNumber attno; ScanKey arrayKeyData; int *keyDataMap = NULL; @@ -388,7 +399,8 @@ _bt_preprocess_keys(IndexScanDesc scan) xform[j].inkey = NULL; xform[j].inkeyi = -1; } - /* else, cannot determine redundancy, keep both keys */ + else + redundant_key_kept = true; } /* track number of attrs for which we have "=" keys */ numberOfEqualCols++; @@ -409,6 +421,8 @@ _bt_preprocess_keys(IndexScanDesc scan) else xform[BTLessStrategyNumber - 1].inkey = NULL; } + else + redundant_key_kept = true; } /* try to keep only one of >, >= */ @@ -426,6 +440,8 @@ _bt_preprocess_keys(IndexScanDesc scan) else xform[BTGreaterStrategyNumber - 1].inkey = NULL; } + else + redundant_key_kept = true; } /* @@ -466,25 +482,6 @@ _bt_preprocess_keys(IndexScanDesc scan) /* check strategy this key's operator corresponds to */ j = inkey->sk_strategy - 1; - /* if row comparison, push it directly to the output array */ - if (inkey->sk_flags & SK_ROW_HEADER) - { - ScanKey outkey = &so->keyData[new_numberOfKeys++]; - - memcpy(outkey, inkey, sizeof(ScanKeyData)); - if (arrayKeyData) - keyDataMap[new_numberOfKeys - 1] = i; - if (numberOfEqualCols == attno - 1) - _bt_mark_scankey_required(outkey); - - /* - * We don't support RowCompare using equality; such a qual would - * mess up the numberOfEqualCols tracking. - */ - Assert(j != (BTEqualStrategyNumber - 1)); - continue; - } - if (inkey->sk_strategy == BTEqualStrategyNumber && (inkey->sk_flags & SK_SEARCHARRAY)) { @@ -593,9 +590,8 @@ _bt_preprocess_keys(IndexScanDesc scan) * the new scan key. * * Note: We do things this way around so that our arrays are - * always in the same order as their corresponding scan keys, - * even with incomplete opfamilies. _bt_advance_array_keys - * depends on this. + * always in the same order as their corresponding scan keys. + * _bt_preprocess_array_keys_final expects this. */ ScanKey outkey = &so->keyData[new_numberOfKeys++]; @@ -607,6 +603,7 @@ _bt_preprocess_keys(IndexScanDesc scan) xform[j].inkey = inkey; xform[j].inkeyi = i; xform[j].arrayidx = arrayidx; + redundant_key_kept = true; } } } @@ -622,6 +619,15 @@ _bt_preprocess_keys(IndexScanDesc scan) if (arrayKeyData) _bt_preprocess_array_keys_final(scan, keyDataMap); + /* + * If there are remaining redundant inequality keys, we must make sure + * that each index attribute has no more than one required >/>= key, and + * no more than one required </<= key. Attributes that have one or more + * required = keys now must keep only one required key (the first = key). + */ + if (unlikely(redundant_key_kept) && so->qual_ok) + _bt_unmark_keys(scan, keyDataMap); + /* Could pfree arrayKeyData/keyDataMap now, but not worth the cycles */ } @@ -746,9 +752,12 @@ _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption) * * Depending on the operator type, the key may be required for both scan * directions or just one. Also, if the key is a row comparison header, - * we have to mark its first subsidiary ScanKey as required. (Subsequent - * subsidiary ScanKeys are normally for lower-order columns, and thus - * cannot be required, since they're after the first non-equality scankey.) + * we have to mark the appropriate subsidiary ScanKeys as required. In such + * cases, the first subsidiary key is required, but subsequent ones are + * required only as long as they correspond to successive index columns and + * match the leading column as to sort direction. Otherwise the row + * comparison ordering is different from the index ordering and so we can't + * stop the scan on the basis of those lower-order columns. * * Note: when we set required-key flag bits in a subsidiary scankey, we are * scribbling on a data structure belonging to the index AM's caller, not on @@ -786,12 +795,25 @@ _bt_mark_scankey_required(ScanKey skey) if (skey->sk_flags & SK_ROW_HEADER) { ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument); + AttrNumber attno = skey->sk_attno; /* First subkey should be same column/operator as the header */ - Assert(subkey->sk_flags & SK_ROW_MEMBER); - Assert(subkey->sk_attno == skey->sk_attno); + Assert(subkey->sk_attno == attno); Assert(subkey->sk_strategy == skey->sk_strategy); - subkey->sk_flags |= addflags; + + for (;;) + { + Assert(subkey->sk_flags & SK_ROW_MEMBER); + if (subkey->sk_attno != attno) + break; /* non-adjacent key, so not required */ + if (subkey->sk_strategy != skey->sk_strategy) + break; /* wrong direction, so not required */ + subkey->sk_flags |= addflags; + if (subkey->sk_flags & SK_ROW_END) + break; + subkey++; + attno++; + } } } @@ -847,8 +869,7 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op, cmp_op; StrategyNumber strat; - Assert(!((leftarg->sk_flags | rightarg->sk_flags) & - (SK_ROW_HEADER | SK_ROW_MEMBER))); + Assert(!((leftarg->sk_flags | rightarg->sk_flags) & SK_ROW_MEMBER)); /* * First, deal with cases where one or both args are NULL. This should @@ -925,6 +946,16 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op, } /* + * We don't yet know how to determine redundancy when it involves a row + * compare key (barring simple cases involving IS NULL/IS NOT NULL) + */ + if ((leftarg->sk_flags | rightarg->sk_flags) & SK_ROW_HEADER) + { + Assert(!((leftarg->sk_flags | rightarg->sk_flags) & SK_BT_SKIP)); + return false; + } + + /* * If either leftarg or rightarg are equality-type array scankeys, we need * specialized handling (since by now we know that IS NULL wasn't used) */ @@ -1468,6 +1499,283 @@ _bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk, } /* + * _bt_unmark_keys() -- make superfluous required keys nonrequired after all + * + * When _bt_preprocess_keys fails to eliminate one or more redundant keys, it + * calls here to make sure that no index attribute has more than one > or >= + * key marked required, and no more than one required < or <= key. Attributes + * with = keys will always get one = key as their required key. All other + * keys that were initially marked required get "unmarked" here. That way, + * _bt_first and _bt_checkkeys will reliably agree on which keys to use to + * start and/or to end the scan. + * + * We also relocate keys that become/started out nonrequired to the end of + * so->keyData[]. That way, _bt_first and _bt_checkkeys cannot fail to reach + * a required key due to some earlier nonrequired key getting in the way. + * + * Only call here when _bt_compare_scankey_args returned false at least once + * (otherwise, calling here will just waste cycles). + */ +static void +_bt_unmark_keys(IndexScanDesc scan, int *keyDataMap) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + AttrNumber attno; + bool *unmarkikey; + int nunmark, + nunmarked, + nkept, + firsti; + ScanKey keepKeys, + unmarkKeys; + FmgrInfo *keepOrderProcs = NULL, + *unmarkOrderProcs = NULL; + bool haveReqEquals, + haveReqForward, + haveReqBackward; + + /* + * Do an initial pass over so->keyData[] that determines which keys to + * keep as required. We expect so->keyData[] to still be in attribute + * order when we're called (though we don't expect any particular order + * among each attribute's keys). + * + * When both equality and inequality keys remain on a single attribute, we + * *must* make sure that exactly one of the equalities remains required. + * Any requiredness markings that we might leave on later keys/attributes + * are predicated on there being required = keys on all prior columns. + */ + unmarkikey = palloc0(so->numberOfKeys * sizeof(bool)); + nunmark = 0; + + /* Set things up for first key's attribute */ + attno = so->keyData[0].sk_attno; + firsti = 0; + haveReqEquals = false; + haveReqForward = false; + haveReqBackward = false; + for (int i = 0; i < so->numberOfKeys; i++) + { + ScanKey origkey = &so->keyData[i]; + + if (origkey->sk_attno != attno) + { + /* Reset for next attribute */ + attno = origkey->sk_attno; + firsti = i; + + haveReqEquals = false; + haveReqForward = false; + haveReqBackward = false; + } + + /* Equalities get priority over inequalities */ + if (haveReqEquals) + { + /* + * We already found the first "=" key for this attribute. We've + * already decided that all its other keys will be unmarked. + */ + Assert(!(origkey->sk_flags & SK_SEARCHNULL)); + unmarkikey[i] = true; + nunmark++; + continue; + } + else if ((origkey->sk_flags & SK_BT_REQFWD) && + (origkey->sk_flags & SK_BT_REQBKWD)) + { + /* + * Found the first "=" key for attno. All other attno keys will + * be unmarked. + */ + Assert(origkey->sk_strategy == BTEqualStrategyNumber); + + haveReqEquals = true; + for (int j = firsti; j < i; j++) + { + /* Unmark any prior inequality keys on attno after all */ + if (!unmarkikey[j]) + { + unmarkikey[j] = true; + nunmark++; + } + } + continue; + } + + /* Deal with inequalities next */ + if ((origkey->sk_flags & SK_BT_REQFWD) && !haveReqForward) + { + haveReqForward = true; + continue; + } + else if ((origkey->sk_flags & SK_BT_REQBKWD) && !haveReqBackward) + { + haveReqBackward = true; + continue; + } + + /* + * We have either a redundant inequality key that will be unmarked, or + * we have a key that wasn't marked required in the first place + */ + unmarkikey[i] = true; + nunmark++; + } + + /* Should only be called when _bt_compare_scankey_args reported failure */ + Assert(nunmark > 0); + + /* + * Next, allocate temp arrays: one for required keys that'll remain + * required, the other for all remaining keys + */ + unmarkKeys = palloc(nunmark * sizeof(ScanKeyData)); + keepKeys = palloc((so->numberOfKeys - nunmark) * sizeof(ScanKeyData)); + nunmarked = 0; + nkept = 0; + if (so->numArrayKeys) + { + unmarkOrderProcs = palloc(nunmark * sizeof(FmgrInfo)); + keepOrderProcs = palloc((so->numberOfKeys - nunmark) * sizeof(FmgrInfo)); + } + + /* + * Next, copy the contents of so->keyData[] into the appropriate temp + * array. + * + * Scans with = array keys need us to maintain invariants around the order + * of so->orderProcs[] and so->arrayKeys[] relative to so->keyData[]. See + * _bt_preprocess_array_keys_final for a full explanation. + */ + for (int i = 0; i < so->numberOfKeys; i++) + { + ScanKey origkey = &so->keyData[i]; + ScanKey unmark; + + if (!unmarkikey[i]) + { + /* + * Key gets to keep its original requiredness markings. + * + * Key will stay in its original position, unless we're going to + * unmark an earlier key (in which case this key gets moved back). + */ + memcpy(keepKeys + nkept, origkey, sizeof(ScanKeyData)); + + if (so->numArrayKeys) + { + keyDataMap[i] = nkept; + memcpy(keepOrderProcs + nkept, &so->orderProcs[i], + sizeof(FmgrInfo)); + } + + nkept++; + continue; + } + + /* + * Key will be unmarked as needed, and moved to the end of the array, + * next to other keys that will become (or always were) nonrequired + */ + unmark = unmarkKeys + nunmarked; + memcpy(unmark, origkey, sizeof(ScanKeyData)); + + if (so->numArrayKeys) + { + keyDataMap[i] = (so->numberOfKeys - nunmark) + nunmarked; + memcpy(&unmarkOrderProcs[nunmarked], &so->orderProcs[i], + sizeof(FmgrInfo)); + } + + /* + * Preprocessing only generates skip arrays when it knows that they'll + * be the only required = key on the attr. We'll never unmark them. + */ + Assert(!(unmark->sk_flags & SK_BT_SKIP)); + + /* + * Also shouldn't have to unmark an IS NULL or an IS NOT NULL key. + * They aren't cross-type, so an incomplete opfamily can't matter. + */ + Assert(!(unmark->sk_flags & SK_ISNULL) || + !(unmark->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))); + + /* Clear requiredness flags on redundant key (and on any subkeys) */ + unmark->sk_flags &= ~(SK_BT_REQFWD | SK_BT_REQBKWD); + if (unmark->sk_flags & SK_ROW_HEADER) + { + ScanKey subkey = (ScanKey) DatumGetPointer(unmark->sk_argument); + + Assert(subkey->sk_strategy == unmark->sk_strategy); + for (;;) + { + Assert(subkey->sk_flags & SK_ROW_MEMBER); + subkey->sk_flags &= ~(SK_BT_REQFWD | SK_BT_REQBKWD); + if (subkey->sk_flags & SK_ROW_END) + break; + subkey++; + } + } + + nunmarked++; + } + + /* Copy both temp arrays back into so->keyData[] to reorder */ + Assert(nkept == so->numberOfKeys - nunmark); + Assert(nunmarked == nunmark); + memcpy(so->keyData, keepKeys, sizeof(ScanKeyData) * nkept); + memcpy(so->keyData + nkept, unmarkKeys, sizeof(ScanKeyData) * nunmarked); + + /* Done with temp arrays */ + pfree(unmarkikey); + pfree(keepKeys); + pfree(unmarkKeys); + + /* + * Now copy so->orderProcs[] temp entries needed by scans with = array + * keys back (just like with the so->keyData[] temp arrays) + */ + if (so->numArrayKeys) + { + memcpy(so->orderProcs, keepOrderProcs, sizeof(FmgrInfo) * nkept); + memcpy(so->orderProcs + nkept, unmarkOrderProcs, + sizeof(FmgrInfo) * nunmarked); + + /* Also fix-up array->scan_key references */ + for (int arridx = 0; arridx < so->numArrayKeys; arridx++) + { + BTArrayKeyInfo *array = &so->arrayKeys[arridx]; + + array->scan_key = keyDataMap[array->scan_key]; + } + + /* + * Sort so->arrayKeys[] based on its new BTArrayKeyInfo.scan_key + * offsets, so that its order matches so->keyData[] order as expected + */ + qsort(so->arrayKeys, so->numArrayKeys, sizeof(BTArrayKeyInfo), + _bt_reorder_array_cmp); + + /* Done with temp arrays */ + pfree(unmarkOrderProcs); + pfree(keepOrderProcs); + } +} + +/* + * qsort comparator for reordering so->arrayKeys[] BTArrayKeyInfo entries + */ +static int +_bt_reorder_array_cmp(const void *a, const void *b) +{ + BTArrayKeyInfo *arraya = (BTArrayKeyInfo *) a; + BTArrayKeyInfo *arrayb = (BTArrayKeyInfo *) b; + + return pg_cmp_s32(arraya->scan_key, arrayb->scan_key); +} + +/* * _bt_preprocess_array_keys() -- Preprocess SK_SEARCHARRAY scan keys * * If there are any SK_SEARCHARRAY scan keys, deconstruct the array(s) and diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 765659887af..fdff960c130 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -228,6 +228,8 @@ btgettuple(IndexScanDesc scan, ScanDirection dir) BTScanOpaque so = (BTScanOpaque) scan->opaque; bool res; + Assert(scan->heapRelation != NULL); + /* btree indexes are never lossy */ scan->xs_recheck = false; @@ -289,6 +291,8 @@ btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm) int64 ntids = 0; ItemPointer heapTid; + Assert(scan->heapRelation == NULL); + /* Each loop iteration performs another primitive index scan */ do { @@ -393,6 +397,34 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, BTScanPosInvalidate(so->currPos); } + /* + * We prefer to eagerly drop leaf page pins before btgettuple returns. + * This avoids making VACUUM wait to acquire a cleanup lock on the page. + * + * We cannot safely drop leaf page pins during index-only scans due to a + * race condition involving VACUUM setting pages all-visible in the VM. + * It's also unsafe for plain index scans that use a non-MVCC snapshot. + * + * When we drop pins eagerly, the mechanism that marks so->killedItems[] + * index tuples LP_DEAD has to deal with concurrent TID recycling races. + * The scheme used to detect unsafe TID recycling won't work when scanning + * unlogged relations (since it involves saving an affected page's LSN). + * Opt out of eager pin dropping during unlogged relation scans for now + * (this is preferable to opting out of kill_prior_tuple LP_DEAD setting). + * + * Also opt out of dropping leaf page pins eagerly during bitmap scans. + * Pins cannot be held for more than an instant during bitmap scans either + * way, so we might as well avoid wasting cycles on acquiring page LSNs. + * + * See nbtree/README section on making concurrent TID recycling safe. + * + * Note: so->dropPin should never change across rescans. + */ + so->dropPin = (!scan->xs_want_itup && + IsMVCCSnapshot(scan->xs_snapshot) && + RelationNeedsWAL(scan->indexRelation) && + scan->heapRelation != NULL); + so->markItemIndex = -1; so->needPrimScan = false; so->scanBehind = false; diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index fe9a3886913..4af1ff1e9e5 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -25,7 +25,7 @@ #include "utils/rel.h" -static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp); +static inline void _bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so); static Buffer _bt_moveright(Relation rel, Relation heaprel, BTScanInsert key, Buffer buf, bool forupdate, BTStack stack, int access); @@ -57,24 +57,29 @@ static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir); /* * _bt_drop_lock_and_maybe_pin() * - * Unlock the buffer; and if it is safe to release the pin, do that, too. - * This will prevent vacuum from stalling in a blocked state trying to read a - * page when a cursor is sitting on it. - * - * See nbtree/README section on making concurrent TID recycling safe. + * Unlock so->currPos.buf. If scan is so->dropPin, drop the pin, too. + * Dropping the pin prevents VACUUM from blocking on acquiring a cleanup lock. */ -static void -_bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp) +static inline void +_bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so) { - _bt_unlockbuf(scan->indexRelation, sp->buf); - - if (IsMVCCSnapshot(scan->xs_snapshot) && - RelationNeedsWAL(scan->indexRelation) && - !scan->xs_want_itup) + if (!so->dropPin) { - ReleaseBuffer(sp->buf); - sp->buf = InvalidBuffer; + /* Just drop the lock (not the pin) */ + _bt_unlockbuf(rel, so->currPos.buf); + return; } + + /* + * Drop both the lock and the pin. + * + * Have to set so->currPos.lsn so that _bt_killitems has a way to detect + * when concurrent heap TID recycling by VACUUM might have taken place. + */ + Assert(RelationNeedsWAL(rel)); + so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf); + _bt_relbuf(rel, so->currPos.buf); + so->currPos.buf = InvalidBuffer; } /* @@ -866,8 +871,8 @@ _bt_compare(Relation rel, * if backwards scan, the last item) in the tree that satisfies the * qualifications in the scan key. On success exit, data about the * matching tuple(s) on the page has been loaded into so->currPos. We'll - * drop all locks and hold onto a pin on page's buffer, except when - * _bt_drop_lock_and_maybe_pin dropped the pin to avoid blocking VACUUM. + * drop all locks and hold onto a pin on page's buffer, except during + * so->dropPin scans, when we drop both the lock and the pin. * _bt_returnitem sets the next item to return to scan on success exit. * * If there are no matching items in the index, we return false, with no @@ -955,46 +960,51 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) /*---------- * Examine the scan keys to discover where we need to start the scan. + * The selected scan keys (at most one per index column) are remembered by + * storing their addresses into the local startKeys[] array. The final + * startKeys[] entry's strategy is set in strat_total. (Actually, there + * are a couple of cases where we force a less/more restrictive strategy.) * - * We want to identify the keys that can be used as starting boundaries; - * these are =, >, or >= keys for a forward scan or =, <, <= keys for - * a backwards scan. We can use keys for multiple attributes so long as - * the prior attributes had only =, >= (resp. =, <=) keys. Once we accept - * a > or < boundary or find an attribute with no boundary (which can be - * thought of as the same as "> -infinity"), we can't use keys for any - * attributes to its right, because it would break our simplistic notion - * of what initial positioning strategy to use. + * We must use the key that was marked required (in the direction opposite + * our own scan's) during preprocessing. Each index attribute can only + * have one such required key. In general, the keys that we use to find + * an initial position when scanning forwards are the same keys that end + * the scan on the leaf level when scanning backwards (and vice-versa). * * When the scan keys include cross-type operators, _bt_preprocess_keys - * may not be able to eliminate redundant keys; in such cases we will - * arbitrarily pick a usable one for each attribute. This is correct - * but possibly not optimal behavior. (For example, with keys like - * "x >= 4 AND x >= 5" we would elect to scan starting at x=4 when - * x=5 would be more efficient.) Since the situation only arises given - * a poorly-worded query plus an incomplete opfamily, live with it. + * may not be able to eliminate redundant keys; in such cases it will + * arbitrarily pick a usable key for each attribute (and scan direction), + * ensuring that there is no more than one key required in each direction. + * We stop considering further keys once we reach the first nonrequired + * key (which must come after all required keys), so this can't affect us. + * + * The required keys that we use as starting boundaries have to be =, >, + * or >= keys for a forward scan or =, <, <= keys for a backwards scan. + * We can use keys for multiple attributes so long as the prior attributes + * had only =, >= (resp. =, <=) keys. These rules are very similar to the + * rules that preprocessing used to determine which keys to mark required. + * We cannot always use every required key as a positioning key, though. + * Skip arrays necessitate independently applying our own rules here. + * Skip arrays are always generally considered = array keys, but we'll + * nevertheless treat them as inequalities at certain points of the scan. + * When that happens, it _might_ have implications for the number of + * required keys that we can safely use for initial positioning purposes. * - * When both equality and inequality keys appear for a single attribute - * (again, only possible when cross-type operators appear), we *must* - * select one of the equality keys for the starting point, because - * _bt_checkkeys() will stop the scan as soon as an equality qual fails. - * For example, if we have keys like "x >= 4 AND x = 10" and we elect to - * start at x=4, we will fail and stop before reaching x=10. If multiple - * equality quals survive preprocessing, however, it doesn't matter which - * one we use --- by definition, they are either redundant or - * contradictory. + * For example, a forward scan with a skip array on its leading attribute + * (with no low_compare/high_compare) will have at least two required scan + * keys, but we won't use any of them as boundary keys during the scan's + * initial call here. Our positioning key during the first call here can + * be thought of as representing "> -infinity". Similarly, if such a skip + * array's low_compare is "a > 'foo'", then we position using "a > 'foo'" + * during the scan's initial call here; a lower-order key such as "b = 42" + * can't be used until the "a" array advances beyond MINVAL/low_compare. * - * In practice we rarely see any "attribute boundary key gaps" here. - * Preprocessing can usually backfill skip array keys for any attributes - * that were omitted from the original scan->keyData[] input keys. All - * array keys are always considered = keys, but we'll sometimes need to - * treat the current key value as if we were using an inequality strategy. - * This happens with range skip arrays, which store inequality keys in the - * array's low_compare/high_compare fields (used to find the first/last - * set of matches, when = key will lack a usable sk_argument value). - * These are always preferred over any redundant "standard" inequality - * keys on the same column (per the usual rule about preferring = keys). - * Note also that any column with an = skip array key can never have an - * additional, contradictory = key. + * On the other hand, if such a skip array's low_compare was "a >= 'foo'", + * then we _can_ use "a >= 'foo' AND b = 42" during the initial call here. + * A subsequent call here might have us use "a = 'fop' AND b = 42". Note + * that we treat = and >= as equivalent when scanning forwards (just as we + * treat = and <= as equivalent when scanning backwards). We effectively + * do the same thing (though with a distinct "a" element/value) each time. * * All keys (with the exception of SK_SEARCHNULL keys and SK_BT_SKIP * array keys whose array is "null_elem=true") imply a NOT NULL qualifier. @@ -1006,21 +1016,20 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * traversing a lot of null entries at the start of the scan. * * In this loop, row-comparison keys are treated the same as keys on their - * first (leftmost) columns. We'll add on lower-order columns of the row - * comparison below, if possible. + * first (leftmost) columns. We'll add all lower-order columns of the row + * comparison that were marked required during preprocessing below. * - * The selected scan keys (at most one per index column) are remembered by - * storing their addresses into the local startKeys[] array. - * - * _bt_checkkeys/_bt_advance_array_keys decide whether and when to start - * the next primitive index scan (for scans with array keys) based in part - * on an understanding of how it'll enable us to reposition the scan. - * They're directly aware of how we'll sometimes cons up an explicit - * SK_SEARCHNOTNULL key. They'll even end primitive scans by applying a - * symmetric "deduce NOT NULL" rule of their own. This allows top-level - * scans to skip large groups of NULLs through repeated deductions about - * key strictness (for a required inequality key) and whether NULLs in the - * key's index column are stored last or first (relative to non-NULLs). + * _bt_advance_array_keys needs to know exactly how we'll reposition the + * scan (should it opt to schedule another primitive index scan). It is + * critical that primscans only be scheduled when they'll definitely make + * some useful progress. _bt_advance_array_keys does this by calling + * _bt_checkkeys routines that report whether a tuple is past the end of + * matches for the scan's keys (given the scan's current array elements). + * If the page's final tuple is "after the end of matches" for a scan that + * uses the *opposite* scan direction, then it must follow that it's also + * "before the start of matches" for the actual current scan direction. + * It is therefore essential that all of our initial positioning rules are + * symmetric with _bt_checkkeys's corresponding continuescan=false rule. * If you update anything here, _bt_checkkeys/_bt_advance_array_keys might * need to be kept in sync. *---------- @@ -1029,18 +1038,17 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) if (so->numberOfKeys > 0) { AttrNumber curattr; - ScanKey chosen; + ScanKey bkey; ScanKey impliesNN; ScanKey cur; /* - * chosen is the so-far-chosen key for the current attribute, if any. - * We don't cast the decision in stone until we reach keys for the - * next attribute. + * bkey will be set to the key that preprocessing left behind as the + * boundary key for this attribute, in this scan direction (if any) */ cur = so->keyData; curattr = 1; - chosen = NULL; + bkey = NULL; /* Also remember any scankey that implies a NOT NULL constraint */ impliesNN = NULL; @@ -1053,23 +1061,29 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) { if (i >= so->numberOfKeys || cur->sk_attno != curattr) { + /* Done looking for the curattr boundary key */ + Assert(bkey == NULL || + (bkey->sk_attno == curattr && + (bkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))); + Assert(impliesNN == NULL || + (impliesNN->sk_attno == curattr && + (impliesNN->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))); + /* - * Done looking at keys for curattr. - * * If this is a scan key for a skip array whose current * element is MINVAL, choose low_compare (when scanning * backwards it'll be MAXVAL, and we'll choose high_compare). * - * Note: if the array's low_compare key makes 'chosen' NULL, + * Note: if the array's low_compare key makes 'bkey' NULL, * then we behave as if the array's first element is -inf, * except when !array->null_elem implies a usable NOT NULL * constraint. */ - if (chosen != NULL && - (chosen->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL))) + if (bkey != NULL && + (bkey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL))) { - int ikey = chosen - so->keyData; - ScanKey skipequalitykey = chosen; + int ikey = bkey - so->keyData; + ScanKey skipequalitykey = bkey; BTArrayKeyInfo *array = NULL; for (int arridx = 0; arridx < so->numArrayKeys; arridx++) @@ -1082,35 +1096,35 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) if (ScanDirectionIsForward(dir)) { Assert(!(skipequalitykey->sk_flags & SK_BT_MAXVAL)); - chosen = array->low_compare; + bkey = array->low_compare; } else { Assert(!(skipequalitykey->sk_flags & SK_BT_MINVAL)); - chosen = array->high_compare; + bkey = array->high_compare; } - Assert(chosen == NULL || - chosen->sk_attno == skipequalitykey->sk_attno); + Assert(bkey == NULL || + bkey->sk_attno == skipequalitykey->sk_attno); if (!array->null_elem) impliesNN = skipequalitykey; else - Assert(chosen == NULL && impliesNN == NULL); + Assert(bkey == NULL && impliesNN == NULL); } /* * If we didn't find a usable boundary key, see if we can * deduce a NOT NULL key */ - if (chosen == NULL && impliesNN != NULL && + if (bkey == NULL && impliesNN != NULL && ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ? ScanDirectionIsForward(dir) : ScanDirectionIsBackward(dir))) { /* Yes, so build the key in notnullkeys[keysz] */ - chosen = ¬nullkeys[keysz]; - ScanKeyEntryInitialize(chosen, + bkey = ¬nullkeys[keysz]; + ScanKeyEntryInitialize(bkey, (SK_SEARCHNOTNULL | SK_ISNULL | (impliesNN->sk_flags & (SK_BT_DESC | SK_BT_NULLS_FIRST))), @@ -1125,12 +1139,12 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) } /* - * If we still didn't find a usable boundary key, quit; else - * save the boundary key pointer in startKeys. + * If preprocessing didn't leave a usable boundary key, quit; + * else save the boundary key pointer in startKeys[] */ - if (chosen == NULL) + if (bkey == NULL) break; - startKeys[keysz++] = chosen; + startKeys[keysz++] = bkey; /* * We can only consider adding more boundary keys when the one @@ -1138,7 +1152,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * (during backwards scans we can only do so when the key that * we just added to startKeys[] uses the = or <= strategy) */ - strat_total = chosen->sk_strategy; + strat_total = bkey->sk_strategy; if (strat_total == BTGreaterStrategyNumber || strat_total == BTLessStrategyNumber) break; @@ -1149,19 +1163,19 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * make strat_total > or < (and stop adding boundary keys). * This can only happen with opclasses that lack skip support. */ - if (chosen->sk_flags & (SK_BT_NEXT | SK_BT_PRIOR)) + if (bkey->sk_flags & (SK_BT_NEXT | SK_BT_PRIOR)) { - Assert(chosen->sk_flags & SK_BT_SKIP); + Assert(bkey->sk_flags & SK_BT_SKIP); Assert(strat_total == BTEqualStrategyNumber); if (ScanDirectionIsForward(dir)) { - Assert(!(chosen->sk_flags & SK_BT_PRIOR)); + Assert(!(bkey->sk_flags & SK_BT_PRIOR)); strat_total = BTGreaterStrategyNumber; } else { - Assert(!(chosen->sk_flags & SK_BT_NEXT)); + Assert(!(bkey->sk_flags & SK_BT_NEXT)); strat_total = BTLessStrategyNumber; } @@ -1175,24 +1189,30 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) /* * Done if that was the last scan key output by preprocessing. - * Also done if there is a gap index attribute that lacks a - * usable key (only possible when preprocessing was unable to - * generate a skip array key to "fill in the gap"). + * Also done if we've now examined all keys marked required. */ if (i >= so->numberOfKeys || - cur->sk_attno != curattr + 1) + !(cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))) break; /* * Reset for next attr. */ + Assert(cur->sk_attno == curattr + 1); curattr = cur->sk_attno; - chosen = NULL; + bkey = NULL; impliesNN = NULL; } /* - * Can we use this key as a starting boundary for this attr? + * If we've located the starting boundary key for curattr, we have + * no interest in curattr's other required key + */ + if (bkey != NULL) + continue; + + /* + * Is this key the starting boundary key for curattr? * * If not, does it imply a NOT NULL constraint? (Because * SK_SEARCHNULL keys are always assigned BTEqualStrategyNumber, @@ -1202,27 +1222,20 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) { case BTLessStrategyNumber: case BTLessEqualStrategyNumber: - if (chosen == NULL) - { - if (ScanDirectionIsBackward(dir)) - chosen = cur; - else - impliesNN = cur; - } + if (ScanDirectionIsBackward(dir)) + bkey = cur; + else if (impliesNN == NULL) + impliesNN = cur; break; case BTEqualStrategyNumber: - /* override any non-equality choice */ - chosen = cur; + bkey = cur; break; case BTGreaterEqualStrategyNumber: case BTGreaterStrategyNumber: - if (chosen == NULL) - { - if (ScanDirectionIsForward(dir)) - chosen = cur; - else - impliesNN = cur; - } + if (ScanDirectionIsForward(dir)) + bkey = cur; + else if (impliesNN == NULL) + impliesNN = cur; break; } } @@ -1248,16 +1261,18 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) Assert(keysz <= INDEX_MAX_KEYS); for (int i = 0; i < keysz; i++) { - ScanKey cur = startKeys[i]; + ScanKey bkey = startKeys[i]; - Assert(cur->sk_attno == i + 1); + Assert(bkey->sk_attno == i + 1); - if (cur->sk_flags & SK_ROW_HEADER) + if (bkey->sk_flags & SK_ROW_HEADER) { /* * Row comparison header: look to the first row member instead */ - ScanKey subkey = (ScanKey) DatumGetPointer(cur->sk_argument); + ScanKey subkey = (ScanKey) DatumGetPointer(bkey->sk_argument); + bool loosen_strat = false, + tighten_strat = false; /* * Cannot be a NULL in the first row member: _bt_preprocess_keys @@ -1265,122 +1280,160 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * ever getting this far */ Assert(subkey->sk_flags & SK_ROW_MEMBER); - Assert(subkey->sk_attno == cur->sk_attno); + Assert(subkey->sk_attno == bkey->sk_attno); Assert(!(subkey->sk_flags & SK_ISNULL)); /* + * This is either a > or >= key (during backwards scans it is + * either < or <=) that was marked required during preprocessing. + * Later so->keyData[] keys can't have been marked required, so + * our row compare header key must be the final startKeys[] entry. + */ + Assert(subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)); + Assert(i == keysz - 1); + + /* * The member scankeys are already in insertion format (ie, they * have sk_func = 3-way-comparison function) */ memcpy(inskey.scankeys + i, subkey, sizeof(ScanKeyData)); /* - * If the row comparison is the last positioning key we accepted, - * try to add additional keys from the lower-order row members. - * (If we accepted independent conditions on additional index - * columns, we use those instead --- doesn't seem worth trying to - * determine which is more restrictive.) Note that this is OK - * even if the row comparison is of ">" or "<" type, because the - * condition applied to all but the last row member is effectively - * ">=" or "<=", and so the extra keys don't break the positioning - * scheme. But, by the same token, if we aren't able to use all - * the row members, then the part of the row comparison that we - * did use has to be treated as just a ">=" or "<=" condition, and - * so we'd better adjust strat_total accordingly. + * Now look to later row compare members. + * + * If there's an "index attribute gap" between two row compare + * members, the second member won't have been marked required, and + * so can't be used as a starting boundary key here. The part of + * the row comparison that we do still use has to be treated as a + * ">=" or "<=" condition. For example, a qual "(a, c) > (1, 42)" + * with an omitted intervening index attribute "b" will use an + * insertion scan key "a >= 1". Even the first "a = 1" tuple on + * the leaf level might satisfy the row compare qual. + * + * We're able to use a _more_ restrictive strategy when we reach a + * NULL row compare member, since they're always unsatisfiable. + * For example, a qual "(a, b, c) >= (1, NULL, 77)" will use an + * insertion scan key "a > 1". All tuples where "a = 1" cannot + * possibly satisfy the row compare qual, so this is safe. */ - if (i == keysz - 1) + Assert(!(subkey->sk_flags & SK_ROW_END)); + for (;;) { - bool used_all_subkeys = false; + subkey++; + Assert(subkey->sk_flags & SK_ROW_MEMBER); - Assert(!(subkey->sk_flags & SK_ROW_END)); - for (;;) + if (subkey->sk_flags & SK_ISNULL) { - subkey++; - Assert(subkey->sk_flags & SK_ROW_MEMBER); - if (subkey->sk_attno != keysz + 1) - break; /* out-of-sequence, can't use it */ - if (subkey->sk_strategy != cur->sk_strategy) - break; /* wrong direction, can't use it */ - if (subkey->sk_flags & SK_ISNULL) - break; /* can't use null keys */ - Assert(keysz < INDEX_MAX_KEYS); - memcpy(inskey.scankeys + keysz, subkey, - sizeof(ScanKeyData)); - keysz++; - if (subkey->sk_flags & SK_ROW_END) - { - used_all_subkeys = true; - break; - } + /* + * NULL member key, can only use earlier keys. + * + * We deliberately avoid checking if this key is marked + * required. All earlier keys are required, and this key + * is unsatisfiable either way, so we can't miss anything. + */ + tighten_strat = true; + break; } - if (!used_all_subkeys) + + if (!(subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))) { - switch (strat_total) - { - case BTLessStrategyNumber: - strat_total = BTLessEqualStrategyNumber; - break; - case BTGreaterStrategyNumber: - strat_total = BTGreaterEqualStrategyNumber; - break; - } + /* nonrequired member key, can only use earlier keys */ + loosen_strat = true; + break; } - break; /* done with outer loop */ + + Assert(subkey->sk_attno == keysz + 1); + Assert(subkey->sk_strategy == bkey->sk_strategy); + Assert(keysz < INDEX_MAX_KEYS); + + memcpy(inskey.scankeys + keysz, subkey, + sizeof(ScanKeyData)); + keysz++; + if (subkey->sk_flags & SK_ROW_END) + break; } - } - else - { - /* - * Ordinary comparison key. Transform the search-style scan key - * to an insertion scan key by replacing the sk_func with the - * appropriate btree comparison function. - * - * If scankey operator is not a cross-type comparison, we can use - * the cached comparison function; otherwise gotta look it up in - * the catalogs. (That can't lead to infinite recursion, since no - * indexscan initiated by syscache lookup will use cross-data-type - * operators.) - * - * We support the convention that sk_subtype == InvalidOid means - * the opclass input type; this is a hack to simplify life for - * ScanKeyInit(). - */ - if (cur->sk_subtype == rel->rd_opcintype[i] || - cur->sk_subtype == InvalidOid) + Assert(!(loosen_strat && tighten_strat)); + if (loosen_strat) { - FmgrInfo *procinfo; - - procinfo = index_getprocinfo(rel, cur->sk_attno, BTORDER_PROC); - ScanKeyEntryInitializeWithInfo(inskey.scankeys + i, - cur->sk_flags, - cur->sk_attno, - InvalidStrategy, - cur->sk_subtype, - cur->sk_collation, - procinfo, - cur->sk_argument); + /* Use less restrictive strategy (and fewer member keys) */ + switch (strat_total) + { + case BTLessStrategyNumber: + strat_total = BTLessEqualStrategyNumber; + break; + case BTGreaterStrategyNumber: + strat_total = BTGreaterEqualStrategyNumber; + break; + } } - else + if (tighten_strat) { - RegProcedure cmp_proc; - - cmp_proc = get_opfamily_proc(rel->rd_opfamily[i], - rel->rd_opcintype[i], - cur->sk_subtype, - BTORDER_PROC); - if (!RegProcedureIsValid(cmp_proc)) - elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"", - BTORDER_PROC, rel->rd_opcintype[i], cur->sk_subtype, - cur->sk_attno, RelationGetRelationName(rel)); - ScanKeyEntryInitialize(inskey.scankeys + i, - cur->sk_flags, - cur->sk_attno, - InvalidStrategy, - cur->sk_subtype, - cur->sk_collation, - cmp_proc, - cur->sk_argument); + /* Use more restrictive strategy (and fewer member keys) */ + switch (strat_total) + { + case BTLessEqualStrategyNumber: + strat_total = BTLessStrategyNumber; + break; + case BTGreaterEqualStrategyNumber: + strat_total = BTGreaterStrategyNumber; + break; + } } + + /* done adding to inskey (row comparison keys always come last) */ + break; + } + + /* + * Ordinary comparison key/search-style key. + * + * Transform the search-style scan key to an insertion scan key by + * replacing the sk_func with the appropriate btree 3-way-comparison + * function. + * + * If scankey operator is not a cross-type comparison, we can use the + * cached comparison function; otherwise gotta look it up in the + * catalogs. (That can't lead to infinite recursion, since no + * indexscan initiated by syscache lookup will use cross-data-type + * operators.) + * + * We support the convention that sk_subtype == InvalidOid means the + * opclass input type; this hack simplifies life for ScanKeyInit(). + */ + if (bkey->sk_subtype == rel->rd_opcintype[i] || + bkey->sk_subtype == InvalidOid) + { + FmgrInfo *procinfo; + + procinfo = index_getprocinfo(rel, bkey->sk_attno, BTORDER_PROC); + ScanKeyEntryInitializeWithInfo(inskey.scankeys + i, + bkey->sk_flags, + bkey->sk_attno, + InvalidStrategy, + bkey->sk_subtype, + bkey->sk_collation, + procinfo, + bkey->sk_argument); + } + else + { + RegProcedure cmp_proc; + + cmp_proc = get_opfamily_proc(rel->rd_opfamily[i], + rel->rd_opcintype[i], + bkey->sk_subtype, BTORDER_PROC); + if (!RegProcedureIsValid(cmp_proc)) + elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"", + BTORDER_PROC, rel->rd_opcintype[i], bkey->sk_subtype, + bkey->sk_attno, RelationGetRelationName(rel)); + ScanKeyEntryInitialize(inskey.scankeys + i, + bkey->sk_flags, + bkey->sk_attno, + InvalidStrategy, + bkey->sk_subtype, + bkey->sk_collation, + cmp_proc, + bkey->sk_argument); } } @@ -1469,6 +1522,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) if (!BufferIsValid(so->currPos.buf)) { + Assert(!so->needPrimScan); + /* * We only get here if the index is completely empty. Lock relation * because nothing finer to lock exists. Without a buffer lock, it's @@ -1487,7 +1542,6 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) if (!BufferIsValid(so->currPos.buf)) { - Assert(!so->needPrimScan); _bt_parallel_done(scan); return false; } @@ -1610,7 +1664,13 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, so->currPos.currPage = BufferGetBlockNumber(so->currPos.buf); so->currPos.prevPage = opaque->btpo_prev; so->currPos.nextPage = opaque->btpo_next; + /* delay setting so->currPos.lsn until _bt_drop_lock_and_maybe_pin */ + so->currPos.dir = dir; + so->currPos.nextTupleOffset = 0; + /* either moreRight or moreLeft should be set now (may be unset later) */ + Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight : + so->currPos.moreLeft); Assert(!P_IGNORE(opaque)); Assert(BTScanPosIsPinned(so->currPos)); Assert(!so->needPrimScan); @@ -1626,14 +1686,6 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum, so->currPos.currPage); } - /* initialize remaining currPos fields related to current page */ - so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf); - so->currPos.dir = dir; - so->currPos.nextTupleOffset = 0; - /* either moreLeft or moreRight should be set now (may be unset later) */ - Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight : - so->currPos.moreLeft); - PredicateLockPage(rel, so->currPos.currPage, scan->xs_snapshot); /* initialize local variables */ @@ -2107,10 +2159,9 @@ _bt_returnitem(IndexScanDesc scan, BTScanOpaque so) * * Wrapper on _bt_readnextpage that performs final steps for the current page. * - * On entry, if so->currPos.buf is valid the buffer is pinned but not locked. - * If there's no pin held, it's because _bt_drop_lock_and_maybe_pin dropped - * the pin eagerly earlier on. The scan must have so->currPos.currPage set to - * a valid block, in any case. + * On entry, so->currPos must be valid. Its buffer will be pinned, though + * never locked. (Actually, when so->dropPin there won't even be a pin held, + * though so->currPos.currPage must still be set to a valid block number.) */ static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir) @@ -2251,12 +2302,14 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir) */ if (_bt_readpage(scan, dir, offnum, true)) { + Relation rel = scan->indexRelation; + /* * _bt_readpage succeeded. Drop the lock (and maybe the pin) on * so->currPos.buf in preparation for btgettuple returning tuples. */ Assert(BTScanPosIsPinned(so->currPos)); - _bt_drop_lock_and_maybe_pin(scan, &so->currPos); + _bt_drop_lock_and_maybe_pin(rel, so); return true; } @@ -2278,9 +2331,12 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir) * previously-saved right link or left link. lastcurrblkno is the page that * was current at the point where the blkno link was saved, which we use to * reason about concurrent page splits/page deletions during backwards scans. + * In the common case where seized=false, blkno is either so->currPos.nextPage + * or so->currPos.prevPage, and lastcurrblkno is so->currPos.currPage. * - * On entry, caller shouldn't hold any locks or pins on any page (we work - * directly off of blkno and lastcurrblkno instead). Parallel scan callers + * On entry, so->currPos shouldn't be locked by caller. so->currPos.buf must + * be InvalidBuffer/unpinned as needed by caller (note that lastcurrblkno + * won't need to be read again in almost all cases). Parallel scan callers * that seized the scan before calling here should pass seized=true; such a * caller's blkno and lastcurrblkno arguments come from the seized scan. * seized=false callers just pass us the blkno/lastcurrblkno taken from their @@ -2294,11 +2350,11 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir) * * On success exit, so->currPos is updated to contain data from the next * interesting page, and we return true. We hold a pin on the buffer on - * success exit, except when _bt_drop_lock_and_maybe_pin decided it was safe - * to eagerly drop the pin (to avoid blocking VACUUM). + * success exit (except during so->dropPin index scans, when we drop the pin + * eagerly to avoid blocking VACUUM). * - * If there are no more matching records in the given direction, we drop all - * locks and pins, invalidate so->currPos, and return false. + * If there are no more matching records in the given direction, we invalidate + * so->currPos (while ensuring it retains no locks or pins), and return false. * * We always release the scan for a parallel scan caller, regardless of * success or failure; we'll call _bt_parallel_release as soon as possible. @@ -2413,7 +2469,7 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno, */ Assert(so->currPos.currPage == blkno); Assert(BTScanPosIsPinned(so->currPos)); - _bt_drop_lock_and_maybe_pin(scan, &so->currPos); + _bt_drop_lock_and_maybe_pin(rel, so); return true; } diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index 3794cc924ad..9d70e89c1f3 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -105,7 +105,7 @@ typedef struct BTShared int scantuplesortstates; /* Query ID, for report in worker processes */ - uint64 queryid; + int64 queryid; /* * workersdonecv is used to monitor the progress of workers. All parallel diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 1a15dfcb7d3..9aed207995f 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -44,7 +44,6 @@ static bool _bt_array_decrement(Relation rel, ScanKey skey, BTArrayKeyInfo *arra static bool _bt_array_increment(Relation rel, ScanKey skey, BTArrayKeyInfo *array); static bool _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir, bool *skip_array_set); -static void _bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir); static bool _bt_tuple_before_array_skeys(IndexScanDesc scan, ScanDirection dir, IndexTuple tuple, TupleDesc tupdesc, int tupnatts, bool readpagetup, int sktrig, bool *scanBehind); @@ -52,7 +51,6 @@ static bool _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate, IndexTuple tuple, int tupnatts, TupleDesc tupdesc, int sktrig, bool sktrig_required); #ifdef USE_ASSERT_CHECKING -static bool _bt_verify_arrays_bt_first(IndexScanDesc scan, ScanDirection dir); static bool _bt_verify_keys_with_arraykeys(IndexScanDesc scan); #endif static bool _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir, @@ -1035,73 +1033,6 @@ _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir, } /* - * _bt_rewind_nonrequired_arrays() -- Rewind SAOP arrays not marked required - * - * Called when _bt_advance_array_keys decides to start a new primitive index - * scan on the basis of the current scan position being before the position - * that _bt_first is capable of repositioning the scan to by applying an - * inequality operator required in the opposite-to-scan direction only. - * - * Although equality strategy scan keys (for both arrays and non-arrays alike) - * are either marked required in both directions or in neither direction, - * there is a sense in which non-required arrays behave like required arrays. - * With a qual such as "WHERE a IN (100, 200) AND b >= 3 AND c IN (5, 6, 7)", - * the scan key on "c" is non-required, but nevertheless enables positioning - * the scan at the first tuple >= "(100, 3, 5)" on the leaf level during the - * first descent of the tree by _bt_first. Later on, there could also be a - * second descent, that places the scan right before tuples >= "(200, 3, 5)". - * _bt_first must never be allowed to build an insertion scan key whose "c" - * entry is set to a value other than 5, the "c" array's first element/value. - * (Actually, it's the first in the current scan direction. This example uses - * a forward scan.) - * - * Calling here resets the array scan key elements for the scan's non-required - * arrays. This is strictly necessary for correctness in a subset of cases - * involving "required in opposite direction"-triggered primitive index scans. - * Not all callers are at risk of _bt_first using a non-required array like - * this, but advancement always resets the arrays when another primitive scan - * is scheduled, just to keep things simple. Array advancement even makes - * sure to reset non-required arrays during scans that have no inequalities. - * (Advancement still won't call here when there are no inequalities, though - * that's just because it's all handled indirectly instead.) - * - * Note: _bt_verify_arrays_bt_first is called by an assertion to enforce that - * everybody got this right. - * - * Note: In practice almost all SAOP arrays are marked required during - * preprocessing (if necessary by generating skip arrays). It is hardly ever - * truly necessary to call here, but consistently doing so is simpler. - */ -static void -_bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir) -{ - Relation rel = scan->indexRelation; - BTScanOpaque so = (BTScanOpaque) scan->opaque; - int arrayidx = 0; - - for (int ikey = 0; ikey < so->numberOfKeys; ikey++) - { - ScanKey cur = so->keyData + ikey; - BTArrayKeyInfo *array = NULL; - - if (!(cur->sk_flags & SK_SEARCHARRAY) || - cur->sk_strategy != BTEqualStrategyNumber) - continue; - - array = &so->arrayKeys[arrayidx++]; - Assert(array->scan_key == ikey); - - if ((cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))) - continue; - - Assert(array->num_elems != -1); /* No non-required skip arrays */ - - _bt_array_set_low_or_high(rel, cur, array, - ScanDirectionIsForward(dir)); - } -} - -/* * _bt_tuple_before_array_skeys() -- too early to advance required arrays? * * We always compare the tuple using the current array keys (which we assume @@ -1380,8 +1311,6 @@ _bt_start_prim_scan(IndexScanDesc scan, ScanDirection dir) */ if (so->needPrimScan) { - Assert(_bt_verify_arrays_bt_first(scan, dir)); - /* * Flag was set -- must call _bt_first again, which will reset the * scan's needPrimScan flag @@ -2007,14 +1936,7 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate, */ else if (has_required_opposite_direction_only && pstate->finaltup && unlikely(!_bt_oppodir_checkkeys(scan, dir, pstate->finaltup))) - { - /* - * Make sure that any SAOP arrays that were not marked required by - * preprocessing are reset to their first element for this direction - */ - _bt_rewind_nonrequired_arrays(scan, dir); goto new_prim_scan; - } continue_scan: @@ -2045,8 +1967,6 @@ continue_scan: */ so->oppositeDirCheck = has_required_opposite_direction_only; - _bt_rewind_nonrequired_arrays(scan, dir); - /* * skip by setting "look ahead" mechanism's offnum for forwards scans * (backwards scans check scanBehind flag directly instead) @@ -2143,48 +2063,6 @@ end_toplevel_scan: #ifdef USE_ASSERT_CHECKING /* - * Verify that the scan's qual state matches what we expect at the point that - * _bt_start_prim_scan is about to start a just-scheduled new primitive scan. - * - * We enforce a rule against non-required array scan keys: they must start out - * with whatever element is the first for the scan's current scan direction. - * See _bt_rewind_nonrequired_arrays comments for an explanation. - */ -static bool -_bt_verify_arrays_bt_first(IndexScanDesc scan, ScanDirection dir) -{ - BTScanOpaque so = (BTScanOpaque) scan->opaque; - int arrayidx = 0; - - for (int ikey = 0; ikey < so->numberOfKeys; ikey++) - { - ScanKey cur = so->keyData + ikey; - BTArrayKeyInfo *array = NULL; - int first_elem_dir; - - if (!(cur->sk_flags & SK_SEARCHARRAY) || - cur->sk_strategy != BTEqualStrategyNumber) - continue; - - array = &so->arrayKeys[arrayidx++]; - - if (((cur->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) || - ((cur->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir))) - continue; - - if (ScanDirectionIsForward(dir)) - first_elem_dir = 0; - else - first_elem_dir = array->num_elems - 1; - - if (array->cur_elem != first_elem_dir) - return false; - } - - return _bt_verify_keys_with_arraykeys(scan); -} - -/* * Verify that the scan's "so->keyData[]" scan keys are in agreement with * its array key state */ @@ -2194,6 +2072,7 @@ _bt_verify_keys_with_arraykeys(IndexScanDesc scan) BTScanOpaque so = (BTScanOpaque) scan->opaque; int last_sk_attno = InvalidAttrNumber, arrayidx = 0; + bool nonrequiredseen = false; if (!so->qual_ok) return false; @@ -2217,8 +2096,16 @@ _bt_verify_keys_with_arraykeys(IndexScanDesc scan) if (array->num_elems != -1 && cur->sk_argument != array->elem_values[array->cur_elem]) return false; - if (last_sk_attno > cur->sk_attno) - return false; + if (cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) + { + if (last_sk_attno > cur->sk_attno) + return false; + if (nonrequiredseen) + return false; + } + else + nonrequiredseen = true; + last_sk_attno = cur->sk_attno; } @@ -2551,37 +2438,12 @@ _bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate) if (!(key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))) { /* Scan key isn't marked required (corner case) */ - Assert(!(key->sk_flags & SK_ROW_HEADER)); break; /* unsafe */ } if (key->sk_flags & SK_ROW_HEADER) { - /* - * RowCompare inequality. - * - * Only the first subkey from a RowCompare can ever be marked - * required (that happens when the row header is marked required). - * There is no simple, general way for us to transitively deduce - * whether or not every tuple on the page satisfies a RowCompare - * key based only on firsttup and lasttup -- so we just give up. - */ - if (!start_past_saop_eq && !so->skipScan) - break; /* unsafe to go further */ - - /* - * We have to be even more careful with RowCompares that come - * after an array: we assume it's unsafe to even bypass the array. - * Calling _bt_start_array_keys to recover the scan's arrays - * following use of forcenonrequired mode isn't compatible with - * _bt_check_rowcompare's continuescan=false behavior with NULL - * row compare members. _bt_advance_array_keys must not make a - * decision on the basis of a key not being satisfied in the - * opposite-to-scan direction until the scan reaches a leaf page - * where the same key begins to be satisfied in scan direction. - * The _bt_first !used_all_subkeys behavior makes this limitation - * hard to work around some other way. - */ - return; /* completely unsafe to set pstate.startikey */ + /* RowCompare inequalities currently aren't supported */ + break; /* "unsafe" */ } if (key->sk_strategy != BTEqualStrategyNumber) { @@ -3078,6 +2940,31 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, Assert(subkey->sk_flags & SK_ROW_MEMBER); + /* When a NULL row member is compared, the row never matches */ + if (subkey->sk_flags & SK_ISNULL) + { + /* + * Unlike the simple-scankey case, this isn't a disallowed case + * (except when it's the first row element that has the NULL arg). + * But it can never match. If all the earlier row comparison + * columns are required for the scan direction, we can stop the + * scan, because there can't be another tuple that will succeed. + */ + Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument)); + subkey--; + if (forcenonrequired) + { + /* treating scan's keys as non-required */ + } + else if ((subkey->sk_flags & SK_BT_REQFWD) && + ScanDirectionIsForward(dir)) + *continuescan = false; + else if ((subkey->sk_flags & SK_BT_REQBKWD) && + ScanDirectionIsBackward(dir)) + *continuescan = false; + return false; + } + if (subkey->sk_attno > tupnatts) { /* @@ -3087,11 +2974,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, * attribute passes the qual. */ Assert(BTreeTupleIsPivot(tuple)); - cmpresult = 0; - if (subkey->sk_flags & SK_ROW_END) - break; - subkey++; - continue; + return true; } datum = index_getattr(tuple, @@ -3101,6 +2984,8 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, if (isNull) { + int reqflags; + if (forcenonrequired) { /* treating scan's keys as non-required */ @@ -3111,15 +2996,35 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, * Since NULLs are sorted before non-NULLs, we know we have * reached the lower limit of the range of values for this * index attr. On a backward scan, we can stop if this qual - * is one of the "must match" subset. We can stop regardless - * of whether the qual is > or <, so long as it's required, - * because it's not possible for any future tuples to pass. On - * a forward scan, however, we must keep going, because we may - * have initially positioned to the start of the index. - * (_bt_advance_array_keys also relies on this behavior during - * forward scans.) + * is one of the "must match" subset. However, on a forwards + * scan, we must keep going, because we may have initially + * positioned to the start of the index. + * + * All required NULLS FIRST > row members can use NULL tuple + * values to end backwards scans, just like with other values. + * A qual "WHERE (a, b, c) > (9, 42, 'foo')" can terminate a + * backwards scan upon reaching the index's rightmost "a = 9" + * tuple whose "b" column contains a NULL (if not sooner). + * Since "b" is NULLS FIRST, we can treat its NULLs as "<" 42. */ - if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) && + reqflags = SK_BT_REQBKWD; + + /* + * When a most significant required NULLS FIRST < row compare + * member sees NULL tuple values during a backwards scan, it + * signals the end of matches for the whole row compare/scan. + * A qual "WHERE (a, b, c) < (9, 42, 'foo')" will terminate a + * backwards scan upon reaching the rightmost tuple whose "a" + * column has a NULL. The "a" NULL value is "<" 9, and yet + * our < row compare will still end the scan. (This isn't + * safe with later/lower-order row members. Notice that it + * can only happen with an "a" NULL some time after the scan + * completely stops needing to use its "b" and "c" members.) + */ + if (subkey == (ScanKey) DatumGetPointer(skey->sk_argument)) + reqflags |= SK_BT_REQFWD; /* safe, first row member */ + + if ((subkey->sk_flags & reqflags) && ScanDirectionIsBackward(dir)) *continuescan = false; } @@ -3129,15 +3034,35 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, * Since NULLs are sorted after non-NULLs, we know we have * reached the upper limit of the range of values for this * index attr. On a forward scan, we can stop if this qual is - * one of the "must match" subset. We can stop regardless of - * whether the qual is > or <, so long as it's required, - * because it's not possible for any future tuples to pass. On - * a backward scan, however, we must keep going, because we - * may have initially positioned to the end of the index. - * (_bt_advance_array_keys also relies on this behavior during - * backward scans.) + * one of the "must match" subset. However, on a backward + * scan, we must keep going, because we may have initially + * positioned to the end of the index. + * + * All required NULLS LAST < row members can use NULL tuple + * values to end forwards scans, just like with other values. + * A qual "WHERE (a, b, c) < (9, 42, 'foo')" can terminate a + * forwards scan upon reaching the index's leftmost "a = 9" + * tuple whose "b" column contains a NULL (if not sooner). + * Since "b" is NULLS LAST, we can treat its NULLs as ">" 42. + */ + reqflags = SK_BT_REQFWD; + + /* + * When a most significant required NULLS LAST > row compare + * member sees NULL tuple values during a forwards scan, it + * signals the end of matches for the whole row compare/scan. + * A qual "WHERE (a, b, c) > (9, 42, 'foo')" will terminate a + * forwards scan upon reaching the leftmost tuple whose "a" + * column has a NULL. The "a" NULL value is ">" 9, and yet + * our > row compare will end the scan. (This isn't safe with + * later/lower-order row members. Notice that it can only + * happen with an "a" NULL some time after the scan completely + * stops needing to use its "b" and "c" members.) */ - if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) && + if (subkey == (ScanKey) DatumGetPointer(skey->sk_argument)) + reqflags |= SK_BT_REQBKWD; /* safe, first row member */ + + if ((subkey->sk_flags & reqflags) && ScanDirectionIsForward(dir)) *continuescan = false; } @@ -3148,30 +3073,6 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, return false; } - if (subkey->sk_flags & SK_ISNULL) - { - /* - * Unlike the simple-scankey case, this isn't a disallowed case - * (except when it's the first row element that has the NULL arg). - * But it can never match. If all the earlier row comparison - * columns are required for the scan direction, we can stop the - * scan, because there can't be another tuple that will succeed. - */ - Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument)); - subkey--; - if (forcenonrequired) - { - /* treating scan's keys as non-required */ - } - else if ((subkey->sk_flags & SK_BT_REQFWD) && - ScanDirectionIsForward(dir)) - *continuescan = false; - else if ((subkey->sk_flags & SK_BT_REQBKWD) && - ScanDirectionIsBackward(dir)) - *continuescan = false; - return false; - } - /* Perform the test --- three-way comparison not bool operator */ cmpresult = DatumGetInt32(FunctionCall2Coll(&subkey->sk_func, subkey->sk_collation, @@ -3330,87 +3231,85 @@ _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate, * current page and killed tuples thereon (generally, this should only be * called if so->numKilled > 0). * - * The caller does not have a lock on the page and may or may not have the - * page pinned in a buffer. Note that read-lock is sufficient for setting - * LP_DEAD status (which is only a hint). - * - * We match items by heap TID before assuming they are the right ones to - * delete. We cope with cases where items have moved right due to insertions. - * If an item has moved off the current page due to a split, we'll fail to - * find it and do nothing (this is not an error case --- we assume the item - * will eventually get marked in a future indexscan). + * Caller should not have a lock on the so->currPos page, but must hold a + * buffer pin when !so->dropPin. When we return, it still won't be locked. + * It'll continue to hold whatever pins were held before calling here. * - * Note that if we hold a pin on the target page continuously from initially - * reading the items until applying this function, VACUUM cannot have deleted - * any items from the page, and so there is no need to search left from the - * recorded offset. (This observation also guarantees that the item is still - * the right one to delete, which might otherwise be questionable since heap - * TIDs can get recycled.) This holds true even if the page has been modified - * by inserts and page splits, so there is no need to consult the LSN. + * We match items by heap TID before assuming they are the right ones to set + * LP_DEAD. If the scan is one that holds a buffer pin on the target page + * continuously from initially reading the items until applying this function + * (if it is a !so->dropPin scan), VACUUM cannot have deleted any items on the + * page, so the page's TIDs can't have been recycled by now. There's no risk + * that we'll confuse a new index tuple that happens to use a recycled TID + * with a now-removed tuple with the same TID (that used to be on this same + * page). We can't rely on that during scans that drop buffer pins eagerly + * (so->dropPin scans), though, so we must condition setting LP_DEAD bits on + * the page LSN having not changed since back when _bt_readpage saw the page. + * We totally give up on setting LP_DEAD bits when the page LSN changed. * - * If the pin was released after reading the page, then we re-read it. If it - * has been modified since we read it (as determined by the LSN), we dare not - * flag any entries because it is possible that the old entry was vacuumed - * away and the TID was re-used by a completely different heap tuple. + * We give up much less often during !so->dropPin scans, but it still happens. + * We cope with cases where items have moved right due to insertions. If an + * item has moved off the current page due to a split, we'll fail to find it + * and just give up on it. */ void _bt_killitems(IndexScanDesc scan) { + Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; Page page; BTPageOpaque opaque; OffsetNumber minoff; OffsetNumber maxoff; - int i; int numKilled = so->numKilled; bool killedsomething = false; - bool droppedpin PG_USED_FOR_ASSERTS_ONLY; + Buffer buf; + Assert(numKilled > 0); Assert(BTScanPosIsValid(so->currPos)); + Assert(scan->heapRelation != NULL); /* can't be a bitmap index scan */ - /* - * Always reset the scan state, so we don't look for same items on other - * pages. - */ + /* Always invalidate so->killedItems[] before leaving so->currPos */ so->numKilled = 0; - if (BTScanPosIsPinned(so->currPos)) + if (!so->dropPin) { /* * We have held the pin on this page since we read the index tuples, * so all we need to do is lock it. The pin will have prevented - * re-use of any TID on the page, so there is no need to check the - * LSN. + * concurrent VACUUMs from recycling any of the TIDs on the page. */ - droppedpin = false; - _bt_lockbuf(scan->indexRelation, so->currPos.buf, BT_READ); - - page = BufferGetPage(so->currPos.buf); + Assert(BTScanPosIsPinned(so->currPos)); + buf = so->currPos.buf; + _bt_lockbuf(rel, buf, BT_READ); } else { - Buffer buf; + XLogRecPtr latestlsn; - droppedpin = true; - /* Attempt to re-read the buffer, getting pin and lock. */ - buf = _bt_getbuf(scan->indexRelation, so->currPos.currPage, BT_READ); + Assert(!BTScanPosIsPinned(so->currPos)); + Assert(RelationNeedsWAL(rel)); + buf = _bt_getbuf(rel, so->currPos.currPage, BT_READ); - page = BufferGetPage(buf); - if (BufferGetLSNAtomic(buf) == so->currPos.lsn) - so->currPos.buf = buf; - else + latestlsn = BufferGetLSNAtomic(buf); + Assert(!XLogRecPtrIsInvalid(so->currPos.lsn)); + Assert(so->currPos.lsn <= latestlsn); + if (so->currPos.lsn != latestlsn) { - /* Modified while not pinned means hinting is not safe. */ - _bt_relbuf(scan->indexRelation, buf); + /* Modified, give up on hinting */ + _bt_relbuf(rel, buf); return; } + + /* Unmodified, hinting is safe */ } + page = BufferGetPage(buf); opaque = BTPageGetOpaque(page); minoff = P_FIRSTDATAKEY(opaque); maxoff = PageGetMaxOffsetNumber(page); - for (i = 0; i < numKilled; i++) + for (int i = 0; i < numKilled; i++) { int itemIndex = so->killedItems[i]; BTScanPosItem *kitem = &so->currPos.items[itemIndex]; @@ -3442,7 +3341,7 @@ _bt_killitems(IndexScanDesc scan) * correctness. * * Note that the page may have been modified in almost any way - * since we first read it (in the !droppedpin case), so it's + * since we first read it (in the !so->dropPin case), so it's * possible that this posting list tuple wasn't a posting list * tuple when we first encountered its heap TIDs. */ @@ -3458,7 +3357,7 @@ _bt_killitems(IndexScanDesc scan) * though only in the common case where the page can't * have been concurrently modified */ - Assert(kitem->indexOffset == offnum || !droppedpin); + Assert(kitem->indexOffset == offnum || !so->dropPin); /* * Read-ahead to later kitems here. @@ -3522,10 +3421,13 @@ _bt_killitems(IndexScanDesc scan) if (killedsomething) { opaque->btpo_flags |= BTP_HAS_GARBAGE; - MarkBufferDirtyHint(so->currPos.buf, true); + MarkBufferDirtyHint(buf, true); } - _bt_unlockbuf(scan->indexRelation, so->currPos.buf); + if (!so->dropPin) + _bt_unlockbuf(rel, buf); + else + _bt_relbuf(rel, buf); } diff --git a/src/backend/access/rmgrdesc/replorigindesc.c b/src/backend/access/rmgrdesc/replorigindesc.c index 5dd74233996..35e3af2903e 100644 --- a/src/backend/access/rmgrdesc/replorigindesc.c +++ b/src/backend/access/rmgrdesc/replorigindesc.c @@ -29,7 +29,7 @@ replorigin_desc(StringInfo buf, XLogReaderState *record) xlrec = (xl_replorigin_set *) rec; - appendStringInfo(buf, "set %u; lsn %X/%X; force: %d", + appendStringInfo(buf, "set %u; lsn %X/%08X; force: %d", xlrec->node_id, LSN_FORMAT_ARGS(xlrec->remote_lsn), xlrec->force); diff --git a/src/backend/access/rmgrdesc/xactdesc.c b/src/backend/access/rmgrdesc/xactdesc.c index 305598e2865..f0f696855b9 100644 --- a/src/backend/access/rmgrdesc/xactdesc.c +++ b/src/backend/access/rmgrdesc/xactdesc.c @@ -359,7 +359,7 @@ xact_desc_commit(StringInfo buf, uint8 info, xl_xact_commit *xlrec, RepOriginId if (parsed.xinfo & XACT_XINFO_HAS_ORIGIN) { - appendStringInfo(buf, "; origin: node %u, lsn %X/%X, at %s", + appendStringInfo(buf, "; origin: node %u, lsn %X/%08X, at %s", origin_id, LSN_FORMAT_ARGS(parsed.origin_lsn), timestamptz_to_str(parsed.origin_timestamp)); @@ -384,7 +384,7 @@ xact_desc_abort(StringInfo buf, uint8 info, xl_xact_abort *xlrec, RepOriginId or if (parsed.xinfo & XACT_XINFO_HAS_ORIGIN) { - appendStringInfo(buf, "; origin: node %u, lsn %X/%X, at %s", + appendStringInfo(buf, "; origin: node %u, lsn %X/%08X, at %s", origin_id, LSN_FORMAT_ARGS(parsed.origin_lsn), timestamptz_to_str(parsed.origin_timestamp)); @@ -418,7 +418,7 @@ xact_desc_prepare(StringInfo buf, uint8 info, xl_xact_prepare *xlrec, RepOriginI * way as PrepareRedoAdd(). */ if (origin_id != InvalidRepOriginId) - appendStringInfo(buf, "; origin: node %u, lsn %X/%X, at %s", + appendStringInfo(buf, "; origin: node %u, lsn %X/%08X, at %s", origin_id, LSN_FORMAT_ARGS(parsed.origin_lsn), timestamptz_to_str(parsed.origin_timestamp)); diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index 58040f28656..cd6c2a2f650 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -65,7 +65,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record) { CheckPoint *checkpoint = (CheckPoint *) rec; - appendStringInfo(buf, "redo %X/%X; " + appendStringInfo(buf, "redo %X/%08X; " "tli %u; prev tli %u; fpw %s; wal_level %s; xid %u:%u; oid %u; multi %u; offset %u; " "oldest xid %u in DB %u; oldest multi %u in DB %u; " "oldest/newest commit timestamp xid: %u/%u; " @@ -111,7 +111,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record) XLogRecPtr startpoint; memcpy(&startpoint, rec, sizeof(XLogRecPtr)); - appendStringInfo(buf, "%X/%X", LSN_FORMAT_ARGS(startpoint)); + appendStringInfo(buf, "%X/%08X", LSN_FORMAT_ARGS(startpoint)); } else if (info == XLOG_PARAMETER_CHANGE) { @@ -156,7 +156,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record) xl_overwrite_contrecord xlrec; memcpy(&xlrec, rec, sizeof(xl_overwrite_contrecord)); - appendStringInfo(buf, "lsn %X/%X; time %s", + appendStringInfo(buf, "lsn %X/%08X; time %s", LSN_FORMAT_ARGS(xlrec.overwritten_lsn), timestamptz_to_str(xlrec.overwrite_time)); } diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 48f10bec91e..e80fbe109cf 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -110,9 +110,7 @@ static SlruCtlData XactCtlData; #define XactCtl (&XactCtlData) -static int ZeroCLOGPage(int64 pageno, bool writeXlog); static bool CLOGPagePrecedes(int64 page1, int64 page2); -static void WriteZeroPageXlogRec(int64 pageno); static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXact, Oid oldestXactDb); static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids, @@ -832,41 +830,8 @@ check_transaction_buffers(int *newval, void **extra, GucSource source) void BootStrapCLOG(void) { - int slotno; - LWLock *lock = SimpleLruGetBankLock(XactCtl, 0); - - LWLockAcquire(lock, LW_EXCLUSIVE); - - /* Create and zero the first page of the commit log */ - slotno = ZeroCLOGPage(0, false); - - /* Make sure it's written out */ - SimpleLruWritePage(XactCtl, slotno); - Assert(!XactCtl->shared->page_dirty[slotno]); - - LWLockRelease(lock); -} - -/* - * Initialize (or reinitialize) a page of CLOG to zeroes. - * If writeXlog is true, also emit an XLOG record saying we did this. - * - * The page is not actually written, just set up in shared memory. - * The slot number of the new page is returned. - * - * Control lock must be held at entry, and will be held at exit. - */ -static int -ZeroCLOGPage(int64 pageno, bool writeXlog) -{ - int slotno; - - slotno = SimpleLruZeroPage(XactCtl, pageno); - - if (writeXlog) - WriteZeroPageXlogRec(pageno); - - return slotno; + /* Zero the initial page and flush it to disk */ + SimpleLruZeroAndWritePage(XactCtl, 0); } /* @@ -974,8 +939,9 @@ ExtendCLOG(TransactionId newestXact) LWLockAcquire(lock, LW_EXCLUSIVE); - /* Zero the page and make an XLOG entry about it */ - ZeroCLOGPage(pageno, true); + /* Zero the page and make a WAL entry about it */ + SimpleLruZeroPage(XactCtl, pageno); + XLogSimpleInsertInt64(RM_CLOG_ID, CLOG_ZEROPAGE, pageno); LWLockRelease(lock); } @@ -1068,17 +1034,6 @@ CLOGPagePrecedes(int64 page1, int64 page2) /* - * Write a ZEROPAGE xlog record - */ -static void -WriteZeroPageXlogRec(int64 pageno) -{ - XLogBeginInsert(); - XLogRegisterData(&pageno, sizeof(pageno)); - (void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE); -} - -/* * Write a TRUNCATE xlog record * * We must flush the xlog record to disk before returning --- see notes @@ -1114,19 +1069,9 @@ clog_redo(XLogReaderState *record) if (info == CLOG_ZEROPAGE) { int64 pageno; - int slotno; - LWLock *lock; memcpy(&pageno, XLogRecGetData(record), sizeof(pageno)); - - lock = SimpleLruGetBankLock(XactCtl, pageno); - LWLockAcquire(lock, LW_EXCLUSIVE); - - slotno = ZeroCLOGPage(pageno, false); - SimpleLruWritePage(XactCtl, slotno); - Assert(!XactCtl->shared->page_dirty[slotno]); - - LWLockRelease(lock); + SimpleLruZeroAndWritePage(XactCtl, pageno); } else if (info == CLOG_TRUNCATE) { diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index 113fae1437a..370b38e048b 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -114,11 +114,9 @@ static void SetXidCommitTsInPage(TransactionId xid, int nsubxids, static void TransactionIdSetCommitTs(TransactionId xid, TimestampTz ts, RepOriginId nodeid, int slotno); static void error_commit_ts_disabled(void); -static int ZeroCommitTsPage(int64 pageno, bool writeXlog); static bool CommitTsPagePrecedes(int64 page1, int64 page2); static void ActivateCommitTs(void); static void DeactivateCommitTs(void); -static void WriteZeroPageXlogRec(int64 pageno); static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXid); /* @@ -603,28 +601,6 @@ BootStrapCommitTs(void) } /* - * Initialize (or reinitialize) a page of CommitTs to zeroes. - * If writeXlog is true, also emit an XLOG record saying we did this. - * - * The page is not actually written, just set up in shared memory. - * The slot number of the new page is returned. - * - * Control lock must be held at entry, and will be held at exit. - */ -static int -ZeroCommitTsPage(int64 pageno, bool writeXlog) -{ - int slotno; - - slotno = SimpleLruZeroPage(CommitTsCtl, pageno); - - if (writeXlog) - WriteZeroPageXlogRec(pageno); - - return slotno; -} - -/* * This must be called ONCE during postmaster or standalone-backend startup, * after StartupXLOG has initialized TransamVariables->nextXid. */ @@ -707,6 +683,13 @@ ActivateCommitTs(void) TransactionId xid; int64 pageno; + /* + * During bootstrap, we should not register commit timestamps so skip the + * activation in this case. + */ + if (IsBootstrapProcessingMode()) + return; + /* If we've done this already, there's nothing to do */ LWLockAcquire(CommitTsLock, LW_EXCLUSIVE); if (commitTsShared->commitTsActive) @@ -747,16 +730,7 @@ ActivateCommitTs(void) /* Create the current segment file, if necessary */ if (!SimpleLruDoesPhysicalPageExist(CommitTsCtl, pageno)) - { - LWLock *lock = SimpleLruGetBankLock(CommitTsCtl, pageno); - int slotno; - - LWLockAcquire(lock, LW_EXCLUSIVE); - slotno = ZeroCommitTsPage(pageno, false); - SimpleLruWritePage(CommitTsCtl, slotno); - Assert(!CommitTsCtl->shared->page_dirty[slotno]); - LWLockRelease(lock); - } + SimpleLruZeroAndWritePage(CommitTsCtl, pageno); /* Change the activation status in shared memory. */ LWLockAcquire(CommitTsLock, LW_EXCLUSIVE); @@ -867,8 +841,12 @@ ExtendCommitTs(TransactionId newestXact) LWLockAcquire(lock, LW_EXCLUSIVE); - /* Zero the page and make an XLOG entry about it */ - ZeroCommitTsPage(pageno, !InRecovery); + /* Zero the page ... */ + SimpleLruZeroPage(CommitTsCtl, pageno); + + /* and make a WAL entry about that, unless we're in REDO */ + if (!InRecovery) + XLogSimpleInsertInt64(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE, pageno); LWLockRelease(lock); } @@ -983,17 +961,6 @@ CommitTsPagePrecedes(int64 page1, int64 page2) /* - * Write a ZEROPAGE xlog record - */ -static void -WriteZeroPageXlogRec(int64 pageno) -{ - XLogBeginInsert(); - XLogRegisterData(&pageno, sizeof(pageno)); - (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_ZEROPAGE); -} - -/* * Write a TRUNCATE xlog record */ static void @@ -1023,19 +990,9 @@ commit_ts_redo(XLogReaderState *record) if (info == COMMIT_TS_ZEROPAGE) { int64 pageno; - int slotno; - LWLock *lock; memcpy(&pageno, XLogRecGetData(record), sizeof(pageno)); - - lock = SimpleLruGetBankLock(CommitTsCtl, pageno); - LWLockAcquire(lock, LW_EXCLUSIVE); - - slotno = ZeroCommitTsPage(pageno, false); - SimpleLruWritePage(CommitTsCtl, slotno); - Assert(!CommitTsCtl->shared->page_dirty[slotno]); - - LWLockRelease(lock); + SimpleLruZeroAndWritePage(CommitTsCtl, pageno); } else if (info == COMMIT_TS_TRUNCATE) { diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 3c06ac45532..3cb09c3d598 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -401,8 +401,6 @@ static void mXactCachePut(MultiXactId multi, int nmembers, static char *mxstatus_to_string(MultiXactStatus status); /* management of SLRU infrastructure */ -static int ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog); -static int ZeroMultiXactMemberPage(int64 pageno, bool writeXlog); static bool MultiXactOffsetPagePrecedes(int64 page1, int64 page2); static bool MultiXactMemberPagePrecedes(int64 page1, int64 page2); static bool MultiXactOffsetPrecedes(MultiXactOffset offset1, @@ -413,7 +411,6 @@ static bool MultiXactOffsetWouldWrap(MultiXactOffset boundary, MultiXactOffset start, uint32 distance); static bool SetOffsetVacuumLimit(bool is_startup); static bool find_multixact_start(MultiXactId multi, MultiXactOffset *result); -static void WriteMZeroPageXlogRec(int64 pageno, uint8 info); static void WriteMTruncateXlogRec(Oid oldestMultiDB, MultiXactId startTruncOff, MultiXactId endTruncOff, @@ -1847,7 +1844,7 @@ AtPrepare_MultiXact(void) * Clean up after successful PREPARE TRANSACTION */ void -PostPrepare_MultiXact(TransactionId xid) +PostPrepare_MultiXact(FullTransactionId fxid) { MultiXactId myOldestMember; @@ -1858,7 +1855,7 @@ PostPrepare_MultiXact(TransactionId xid) myOldestMember = OldestMemberMXactId[MyProcNumber]; if (MultiXactIdIsValid(myOldestMember)) { - ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, false); + ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false); /* * Even though storing MultiXactId is atomic, acquire lock to make @@ -1896,10 +1893,10 @@ PostPrepare_MultiXact(TransactionId xid) * Recover the state of a prepared transaction at startup */ void -multixact_twophase_recover(TransactionId xid, uint16 info, +multixact_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { - ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, false); + ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, false); MultiXactId oldestMember; /* @@ -1917,10 +1914,10 @@ multixact_twophase_recover(TransactionId xid, uint16 info, * Similar to AtEOXact_MultiXact but for COMMIT PREPARED */ void -multixact_twophase_postcommit(TransactionId xid, uint16 info, +multixact_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { - ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(xid, true); + ProcNumber dummyProcNumber = TwoPhaseGetDummyProcNumber(fxid, true); Assert(len == sizeof(MultiXactId)); @@ -1932,10 +1929,10 @@ multixact_twophase_postcommit(TransactionId xid, uint16 info, * This is actually just the same as the COMMIT case. */ void -multixact_twophase_postabort(TransactionId xid, uint16 info, +multixact_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { - multixact_twophase_postcommit(xid, info, recdata, len); + multixact_twophase_postcommit(fxid, info, recdata, len); } /* @@ -2033,70 +2030,9 @@ check_multixact_member_buffers(int *newval, void **extra, GucSource source) void BootStrapMultiXact(void) { - int slotno; - LWLock *lock; - - lock = SimpleLruGetBankLock(MultiXactOffsetCtl, 0); - LWLockAcquire(lock, LW_EXCLUSIVE); - - /* Create and zero the first page of the offsets log */ - slotno = ZeroMultiXactOffsetPage(0, false); - - /* Make sure it's written out */ - SimpleLruWritePage(MultiXactOffsetCtl, slotno); - Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]); - - LWLockRelease(lock); - - lock = SimpleLruGetBankLock(MultiXactMemberCtl, 0); - LWLockAcquire(lock, LW_EXCLUSIVE); - - /* Create and zero the first page of the members log */ - slotno = ZeroMultiXactMemberPage(0, false); - - /* Make sure it's written out */ - SimpleLruWritePage(MultiXactMemberCtl, slotno); - Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]); - - LWLockRelease(lock); -} - -/* - * Initialize (or reinitialize) a page of MultiXactOffset to zeroes. - * If writeXlog is true, also emit an XLOG record saying we did this. - * - * The page is not actually written, just set up in shared memory. - * The slot number of the new page is returned. - * - * Control lock must be held at entry, and will be held at exit. - */ -static int -ZeroMultiXactOffsetPage(int64 pageno, bool writeXlog) -{ - int slotno; - - slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno); - - if (writeXlog) - WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_OFF_PAGE); - - return slotno; -} - -/* - * Ditto, for MultiXactMember - */ -static int -ZeroMultiXactMemberPage(int64 pageno, bool writeXlog) -{ - int slotno; - - slotno = SimpleLruZeroPage(MultiXactMemberCtl, pageno); - - if (writeXlog) - WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_MEM_PAGE); - - return slotno; + /* Zero the initial pages and flush them to disk */ + SimpleLruZeroAndWritePage(MultiXactOffsetCtl, 0); + SimpleLruZeroAndWritePage(MultiXactMemberCtl, 0); } /* @@ -2134,7 +2070,7 @@ MaybeExtendOffsetSlru(void) * with creating a new segment file even if the page we're writing is * not the first in it, so this is enough. */ - slotno = ZeroMultiXactOffsetPage(pageno, false); + slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno); SimpleLruWritePage(MultiXactOffsetCtl, slotno); } @@ -2568,8 +2504,10 @@ ExtendMultiXactOffset(MultiXactId multi) LWLockAcquire(lock, LW_EXCLUSIVE); - /* Zero the page and make an XLOG entry about it */ - ZeroMultiXactOffsetPage(pageno, true); + /* Zero the page and make a WAL entry about it */ + SimpleLruZeroPage(MultiXactOffsetCtl, pageno); + XLogSimpleInsertInt64(RM_MULTIXACT_ID, XLOG_MULTIXACT_ZERO_OFF_PAGE, + pageno); LWLockRelease(lock); } @@ -2611,8 +2549,10 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers) LWLockAcquire(lock, LW_EXCLUSIVE); - /* Zero the page and make an XLOG entry about it */ - ZeroMultiXactMemberPage(pageno, true); + /* Zero the page and make a WAL entry about it */ + SimpleLruZeroPage(MultiXactMemberCtl, pageno); + XLogSimpleInsertInt64(RM_MULTIXACT_ID, + XLOG_MULTIXACT_ZERO_MEM_PAGE, pageno); LWLockRelease(lock); } @@ -3348,18 +3288,6 @@ MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2) } /* - * Write an xlog record reflecting the zeroing of either a MEMBERs or - * OFFSETs page (info shows which) - */ -static void -WriteMZeroPageXlogRec(int64 pageno, uint8 info) -{ - XLogBeginInsert(); - XLogRegisterData(&pageno, sizeof(pageno)); - (void) XLogInsert(RM_MULTIXACT_ID, info); -} - -/* * Write a TRUNCATE xlog record * * We must flush the xlog record to disk before returning --- see notes in @@ -3401,36 +3329,16 @@ multixact_redo(XLogReaderState *record) if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE) { int64 pageno; - int slotno; - LWLock *lock; memcpy(&pageno, XLogRecGetData(record), sizeof(pageno)); - - lock = SimpleLruGetBankLock(MultiXactOffsetCtl, pageno); - LWLockAcquire(lock, LW_EXCLUSIVE); - - slotno = ZeroMultiXactOffsetPage(pageno, false); - SimpleLruWritePage(MultiXactOffsetCtl, slotno); - Assert(!MultiXactOffsetCtl->shared->page_dirty[slotno]); - - LWLockRelease(lock); + SimpleLruZeroAndWritePage(MultiXactOffsetCtl, pageno); } else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE) { int64 pageno; - int slotno; - LWLock *lock; memcpy(&pageno, XLogRecGetData(record), sizeof(pageno)); - - lock = SimpleLruGetBankLock(MultiXactMemberCtl, pageno); - LWLockAcquire(lock, LW_EXCLUSIVE); - - slotno = ZeroMultiXactMemberPage(pageno, false); - SimpleLruWritePage(MultiXactMemberCtl, slotno); - Assert(!MultiXactMemberCtl->shared->page_dirty[slotno]); - - LWLockRelease(lock); + SimpleLruZeroAndWritePage(MultiXactMemberCtl, pageno); } else if (info == XLOG_MULTIXACT_CREATE_ID) { diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index fe56286d9a9..10ec259f382 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -434,6 +434,31 @@ SimpleLruZeroLSNs(SlruCtl ctl, int slotno) } /* + * This is a convenience wrapper for the common case of zeroing a page and + * immediately flushing it to disk. + * + * Control lock is acquired and released here. + */ +void +SimpleLruZeroAndWritePage(SlruCtl ctl, int64 pageno) +{ + int slotno; + LWLock *lock; + + lock = SimpleLruGetBankLock(ctl, pageno); + LWLockAcquire(lock, LW_EXCLUSIVE); + + /* Create and zero the page */ + slotno = SimpleLruZeroPage(ctl, pageno); + + /* Make sure it's written out */ + SimpleLruWritePage(ctl, slotno); + Assert(!ctl->shared->page_dirty[slotno]); + + LWLockRelease(lock); +} + +/* * Wait for any active I/O on a page slot to finish. (This does not * guarantee that new I/O hasn't been started before we return, though. * In fact the slot might not even contain the same page anymore.) diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index 15153618fad..09aace9e09f 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -74,7 +74,6 @@ static SlruCtlData SubTransCtlData; #define SubTransCtl (&SubTransCtlData) -static int ZeroSUBTRANSPage(int64 pageno); static bool SubTransPagePrecedes(int64 page1, int64 page2); @@ -269,33 +268,8 @@ check_subtrans_buffers(int *newval, void **extra, GucSource source) void BootStrapSUBTRANS(void) { - int slotno; - LWLock *lock = SimpleLruGetBankLock(SubTransCtl, 0); - - LWLockAcquire(lock, LW_EXCLUSIVE); - - /* Create and zero the first page of the subtrans log */ - slotno = ZeroSUBTRANSPage(0); - - /* Make sure it's written out */ - SimpleLruWritePage(SubTransCtl, slotno); - Assert(!SubTransCtl->shared->page_dirty[slotno]); - - LWLockRelease(lock); -} - -/* - * Initialize (or reinitialize) a page of SUBTRANS to zeroes. - * - * The page is not actually written, just set up in shared memory. - * The slot number of the new page is returned. - * - * Control lock must be held at entry, and will be held at exit. - */ -static int -ZeroSUBTRANSPage(int64 pageno) -{ - return SimpleLruZeroPage(SubTransCtl, pageno); + /* Zero the initial page and flush it to disk */ + SimpleLruZeroAndWritePage(SubTransCtl, 0); } /* @@ -335,7 +309,7 @@ StartupSUBTRANS(TransactionId oldestActiveXID) prevlock = lock; } - (void) ZeroSUBTRANSPage(startPage); + (void) SimpleLruZeroPage(SubTransCtl, startPage); if (startPage == endPage) break; @@ -395,7 +369,7 @@ ExtendSUBTRANS(TransactionId newestXact) LWLockAcquire(lock, LW_EXCLUSIVE); /* Zero the page */ - ZeroSUBTRANSPage(pageno); + SimpleLruZeroPage(SubTransCtl, pageno); LWLockRelease(lock); } diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c index a27f27cc037..186eb91f609 100644 --- a/src/backend/access/transam/timeline.c +++ b/src/backend/access/transam/timeline.c @@ -154,7 +154,7 @@ readTimeLineHistory(TimeLineID targetTLI) if (*ptr == '\0' || *ptr == '#') continue; - nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo); + nfields = sscanf(fline, "%u\t%X/%08X", &tli, &switchpoint_hi, &switchpoint_lo); if (nfields < 1) { @@ -399,7 +399,7 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, * parent file failed to end with one. */ snprintf(buffer, sizeof(buffer), - "%s%u\t%X/%X\t%s\n", + "%s%u\t%X/%08X\t%s\n", (srcfd < 0) ? "" : "\n", parentTLI, LSN_FORMAT_ARGS(switchpoint), diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c index 73a80559194..85cbe397cb2 100644 --- a/src/backend/access/transam/twophase.c +++ b/src/backend/access/transam/twophase.c @@ -159,7 +159,7 @@ typedef struct GlobalTransactionData */ XLogRecPtr prepare_start_lsn; /* XLOG offset of prepare record start */ XLogRecPtr prepare_end_lsn; /* XLOG offset of prepare record end */ - TransactionId xid; /* The GXACT id */ + FullTransactionId fxid; /* The GXACT full xid */ Oid owner; /* ID of user that executed the xact */ ProcNumber locking_backend; /* backend currently working on the xact */ @@ -197,6 +197,7 @@ static GlobalTransaction MyLockedGxact = NULL; static bool twophaseExitRegistered = false; +static void PrepareRedoRemoveFull(FullTransactionId fxid, bool giveWarning); static void RecordTransactionCommitPrepared(TransactionId xid, int nchildren, TransactionId *children, @@ -216,19 +217,19 @@ static void RecordTransactionAbortPrepared(TransactionId xid, int nstats, xl_xact_stats_item *stats, const char *gid); -static void ProcessRecords(char *bufptr, TransactionId xid, +static void ProcessRecords(char *bufptr, FullTransactionId fxid, const TwoPhaseCallback callbacks[]); static void RemoveGXact(GlobalTransaction gxact); static void XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len); -static char *ProcessTwoPhaseBuffer(TransactionId xid, +static char *ProcessTwoPhaseBuffer(FullTransactionId fxid, XLogRecPtr prepare_start_lsn, bool fromdisk, bool setParent, bool setNextXid); -static void MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, +static void MarkAsPreparingGuts(GlobalTransaction gxact, FullTransactionId fxid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid); -static void RemoveTwoPhaseFile(TransactionId xid, bool giveWarning); -static void RecreateTwoPhaseFile(TransactionId xid, void *content, int len); +static void RemoveTwoPhaseFile(FullTransactionId fxid, bool giveWarning); +static void RecreateTwoPhaseFile(FullTransactionId fxid, void *content, int len); /* * Initialization of shared memory @@ -356,7 +357,7 @@ PostPrepare_Twophase(void) * Reserve the GID for the given transaction. */ GlobalTransaction -MarkAsPreparing(TransactionId xid, const char *gid, +MarkAsPreparing(FullTransactionId fxid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid) { GlobalTransaction gxact; @@ -407,7 +408,7 @@ MarkAsPreparing(TransactionId xid, const char *gid, gxact = TwoPhaseState->freeGXacts; TwoPhaseState->freeGXacts = gxact->next; - MarkAsPreparingGuts(gxact, xid, gid, prepared_at, owner, databaseid); + MarkAsPreparingGuts(gxact, fxid, gid, prepared_at, owner, databaseid); gxact->ondisk = false; @@ -430,11 +431,13 @@ MarkAsPreparing(TransactionId xid, const char *gid, * Note: This function should be called with appropriate locks held. */ static void -MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid, - TimestampTz prepared_at, Oid owner, Oid databaseid) +MarkAsPreparingGuts(GlobalTransaction gxact, FullTransactionId fxid, + const char *gid, TimestampTz prepared_at, Oid owner, + Oid databaseid) { PGPROC *proc; int i; + TransactionId xid = XidFromFullTransactionId(fxid); Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE)); @@ -479,7 +482,7 @@ MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, const char *gid, proc->subxidStatus.count = 0; gxact->prepared_at = prepared_at; - gxact->xid = xid; + gxact->fxid = fxid; gxact->owner = owner; gxact->locking_backend = MyProcNumber; gxact->valid = false; @@ -797,12 +800,12 @@ pg_prepared_xact(PG_FUNCTION_ARGS) * caller had better hold it. */ static GlobalTransaction -TwoPhaseGetGXact(TransactionId xid, bool lock_held) +TwoPhaseGetGXact(FullTransactionId fxid, bool lock_held) { GlobalTransaction result = NULL; int i; - static TransactionId cached_xid = InvalidTransactionId; + static FullTransactionId cached_fxid = {InvalidTransactionId}; static GlobalTransaction cached_gxact = NULL; Assert(!lock_held || LWLockHeldByMe(TwoPhaseStateLock)); @@ -811,7 +814,7 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held) * During a recovery, COMMIT PREPARED, or ABORT PREPARED, we'll be called * repeatedly for the same XID. We can save work with a simple cache. */ - if (xid == cached_xid) + if (FullTransactionIdEquals(fxid, cached_fxid)) return cached_gxact; if (!lock_held) @@ -821,7 +824,7 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held) { GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; - if (gxact->xid == xid) + if (FullTransactionIdEquals(gxact->fxid, fxid)) { result = gxact; break; @@ -832,9 +835,10 @@ TwoPhaseGetGXact(TransactionId xid, bool lock_held) LWLockRelease(TwoPhaseStateLock); if (result == NULL) /* should not happen */ - elog(ERROR, "failed to find GlobalTransaction for xid %u", xid); + elog(ERROR, "failed to find GlobalTransaction for xid %u", + XidFromFullTransactionId(fxid)); - cached_xid = xid; + cached_fxid = fxid; cached_gxact = result; return result; @@ -881,7 +885,7 @@ TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid, *have_more = true; break; } - result = gxact->xid; + result = XidFromFullTransactionId(gxact->fxid); } } @@ -892,7 +896,7 @@ TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid, /* * TwoPhaseGetDummyProcNumber - * Get the dummy proc number for prepared transaction specified by XID + * Get the dummy proc number for prepared transaction * * Dummy proc numbers are similar to proc numbers of real backends. They * start at MaxBackends, and are unique across all currently active real @@ -900,24 +904,24 @@ TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid, * TwoPhaseStateLock will not be taken, so the caller had better hold it. */ ProcNumber -TwoPhaseGetDummyProcNumber(TransactionId xid, bool lock_held) +TwoPhaseGetDummyProcNumber(FullTransactionId fxid, bool lock_held) { - GlobalTransaction gxact = TwoPhaseGetGXact(xid, lock_held); + GlobalTransaction gxact = TwoPhaseGetGXact(fxid, lock_held); return gxact->pgprocno; } /* * TwoPhaseGetDummyProc - * Get the PGPROC that represents a prepared transaction specified by XID + * Get the PGPROC that represents a prepared transaction * * If lock_held is set to true, TwoPhaseStateLock will not be taken, so the * caller had better hold it. */ PGPROC * -TwoPhaseGetDummyProc(TransactionId xid, bool lock_held) +TwoPhaseGetDummyProc(FullTransactionId fxid, bool lock_held) { - GlobalTransaction gxact = TwoPhaseGetGXact(xid, lock_held); + GlobalTransaction gxact = TwoPhaseGetGXact(fxid, lock_held); return GetPGProcByNumber(gxact->pgprocno); } @@ -942,10 +946,8 @@ AdjustToFullTransactionId(TransactionId xid) } static inline int -TwoPhaseFilePath(char *path, TransactionId xid) +TwoPhaseFilePath(char *path, FullTransactionId fxid) { - FullTransactionId fxid = AdjustToFullTransactionId(xid); - return snprintf(path, MAXPGPATH, TWOPHASE_DIR "/%08X%08X", EpochFromFullTransactionId(fxid), XidFromFullTransactionId(fxid)); @@ -1049,7 +1051,7 @@ void StartPrepare(GlobalTransaction gxact) { PGPROC *proc = GetPGProcByNumber(gxact->pgprocno); - TransactionId xid = gxact->xid; + TransactionId xid = XidFromFullTransactionId(gxact->fxid); TwoPhaseFileHeader hdr; TransactionId *children; RelFileLocator *commitrels; @@ -1281,10 +1283,11 @@ RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info, * If it looks OK (has a valid magic number and CRC), return the palloc'd * contents of the file, issuing an error when finding corrupted data. If * missing_ok is true, which indicates that missing files can be safely - * ignored, then return NULL. This state can be reached when doing recovery. + * ignored, then return NULL. This state can be reached when doing recovery + * after discarding two-phase files from frozen epochs. */ static char * -ReadTwoPhaseFile(TransactionId xid, bool missing_ok) +ReadTwoPhaseFile(FullTransactionId fxid, bool missing_ok) { char path[MAXPGPATH]; char *buf; @@ -1296,7 +1299,7 @@ ReadTwoPhaseFile(TransactionId xid, bool missing_ok) file_crc; int r; - TwoPhaseFilePath(path, xid); + TwoPhaseFilePath(path, fxid); fd = OpenTransientFile(path, O_RDONLY | PG_BINARY); if (fd < 0) @@ -1426,12 +1429,12 @@ XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len) if (errormsg) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not read two-phase state from WAL at %X/%X: %s", + errmsg("could not read two-phase state from WAL at %X/%08X: %s", LSN_FORMAT_ARGS(lsn), errormsg))); else ereport(ERROR, (errcode_for_file_access(), - errmsg("could not read two-phase state from WAL at %X/%X", + errmsg("could not read two-phase state from WAL at %X/%08X", LSN_FORMAT_ARGS(lsn)))); } @@ -1439,7 +1442,7 @@ XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len) (XLogRecGetInfo(xlogreader) & XLOG_XACT_OPMASK) != XLOG_XACT_PREPARE) ereport(ERROR, (errcode_for_file_access(), - errmsg("expected two-phase state data is not present in WAL at %X/%X", + errmsg("expected two-phase state data is not present in WAL at %X/%08X", LSN_FORMAT_ARGS(lsn)))); if (len != NULL) @@ -1461,6 +1464,7 @@ StandbyTransactionIdIsPrepared(TransactionId xid) char *buf; TwoPhaseFileHeader *hdr; bool result; + FullTransactionId fxid; Assert(TransactionIdIsValid(xid)); @@ -1468,7 +1472,8 @@ StandbyTransactionIdIsPrepared(TransactionId xid) return false; /* nothing to do */ /* Read and validate file */ - buf = ReadTwoPhaseFile(xid, true); + fxid = AdjustToFullTransactionId(xid); + buf = ReadTwoPhaseFile(fxid, true); if (buf == NULL) return false; @@ -1488,6 +1493,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit) { GlobalTransaction gxact; PGPROC *proc; + FullTransactionId fxid; TransactionId xid; bool ondisk; char *buf; @@ -1509,7 +1515,8 @@ FinishPreparedTransaction(const char *gid, bool isCommit) */ gxact = LockGXact(gid, GetUserId()); proc = GetPGProcByNumber(gxact->pgprocno); - xid = gxact->xid; + fxid = gxact->fxid; + xid = XidFromFullTransactionId(fxid); /* * Read and validate 2PC state data. State data will typically be stored @@ -1517,7 +1524,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit) * to disk if for some reason they have lived for a long time. */ if (gxact->ondisk) - buf = ReadTwoPhaseFile(xid, false); + buf = ReadTwoPhaseFile(fxid, false); else XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, NULL); @@ -1636,11 +1643,11 @@ FinishPreparedTransaction(const char *gid, bool isCommit) /* And now do the callbacks */ if (isCommit) - ProcessRecords(bufptr, xid, twophase_postcommit_callbacks); + ProcessRecords(bufptr, fxid, twophase_postcommit_callbacks); else - ProcessRecords(bufptr, xid, twophase_postabort_callbacks); + ProcessRecords(bufptr, fxid, twophase_postabort_callbacks); - PredicateLockTwoPhaseFinish(xid, isCommit); + PredicateLockTwoPhaseFinish(fxid, isCommit); /* * Read this value while holding the two-phase lock, as the on-disk 2PC @@ -1664,7 +1671,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit) * And now we can clean up any files we may have left. */ if (ondisk) - RemoveTwoPhaseFile(xid, true); + RemoveTwoPhaseFile(fxid, true); MyLockedGxact = NULL; @@ -1677,7 +1684,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit) * Scan 2PC state data in memory and call the indicated callbacks for each 2PC record. */ static void -ProcessRecords(char *bufptr, TransactionId xid, +ProcessRecords(char *bufptr, FullTransactionId fxid, const TwoPhaseCallback callbacks[]) { for (;;) @@ -1691,24 +1698,28 @@ ProcessRecords(char *bufptr, TransactionId xid, bufptr += MAXALIGN(sizeof(TwoPhaseRecordOnDisk)); if (callbacks[record->rmid] != NULL) - callbacks[record->rmid] (xid, record->info, bufptr, record->len); + callbacks[record->rmid] (fxid, record->info, bufptr, record->len); bufptr += MAXALIGN(record->len); } } /* - * Remove the 2PC file for the specified XID. + * Remove the 2PC file. * * If giveWarning is false, do not complain about file-not-present; * this is an expected case during WAL replay. + * + * This routine is used at early stages at recovery where future and + * past orphaned files are checked, hence the FullTransactionId to build + * a complete file name fit for the removal. */ static void -RemoveTwoPhaseFile(TransactionId xid, bool giveWarning) +RemoveTwoPhaseFile(FullTransactionId fxid, bool giveWarning) { char path[MAXPGPATH]; - TwoPhaseFilePath(path, xid); + TwoPhaseFilePath(path, fxid); if (unlink(path)) if (errno != ENOENT || giveWarning) ereport(WARNING, @@ -1723,7 +1734,7 @@ RemoveTwoPhaseFile(TransactionId xid, bool giveWarning) * Note: content and len don't include CRC. */ static void -RecreateTwoPhaseFile(TransactionId xid, void *content, int len) +RecreateTwoPhaseFile(FullTransactionId fxid, void *content, int len) { char path[MAXPGPATH]; pg_crc32c statefile_crc; @@ -1734,7 +1745,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len) COMP_CRC32C(statefile_crc, content, len); FIN_CRC32C(statefile_crc); - TwoPhaseFilePath(path, xid); + TwoPhaseFilePath(path, fxid); fd = OpenTransientFile(path, O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY); @@ -1846,7 +1857,7 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon) int len; XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, &len); - RecreateTwoPhaseFile(gxact->xid, buf, len); + RecreateTwoPhaseFile(gxact->fxid, buf, len); gxact->ondisk = true; gxact->prepare_start_lsn = InvalidXLogRecPtr; gxact->prepare_end_lsn = InvalidXLogRecPtr; @@ -1897,19 +1908,17 @@ restoreTwoPhaseData(void) if (strlen(clde->d_name) == 16 && strspn(clde->d_name, "0123456789ABCDEF") == 16) { - TransactionId xid; FullTransactionId fxid; char *buf; fxid = FullTransactionIdFromU64(strtou64(clde->d_name, NULL, 16)); - xid = XidFromFullTransactionId(fxid); - buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr, + buf = ProcessTwoPhaseBuffer(fxid, InvalidXLogRecPtr, true, false, false); if (buf == NULL) continue; - PrepareRedoAdd(buf, InvalidXLogRecPtr, + PrepareRedoAdd(fxid, buf, InvalidXLogRecPtr, InvalidXLogRecPtr, InvalidRepOriginId); } } @@ -1968,9 +1977,7 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p) Assert(gxact->inredo); - xid = gxact->xid; - - buf = ProcessTwoPhaseBuffer(xid, + buf = ProcessTwoPhaseBuffer(gxact->fxid, gxact->prepare_start_lsn, gxact->ondisk, false, true); @@ -1981,6 +1988,7 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p) * OK, we think this file is valid. Incorporate xid into the * running-minimum result. */ + xid = XidFromFullTransactionId(gxact->fxid); if (TransactionIdPrecedes(xid, result)) result = xid; @@ -2036,15 +2044,12 @@ StandbyRecoverPreparedTransactions(void) LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { - TransactionId xid; char *buf; GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; Assert(gxact->inredo); - xid = gxact->xid; - - buf = ProcessTwoPhaseBuffer(xid, + buf = ProcessTwoPhaseBuffer(gxact->fxid, gxact->prepare_start_lsn, gxact->ondisk, true, false); if (buf != NULL) @@ -2077,16 +2082,14 @@ RecoverPreparedTransactions(void) LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); for (i = 0; i < TwoPhaseState->numPrepXacts; i++) { - TransactionId xid; char *buf; GlobalTransaction gxact = TwoPhaseState->prepXacts[i]; + FullTransactionId fxid = gxact->fxid; char *bufptr; TwoPhaseFileHeader *hdr; TransactionId *subxids; const char *gid; - xid = gxact->xid; - /* * Reconstruct subtrans state for the transaction --- needed because * pg_subtrans is not preserved over a restart. Note that we are @@ -2096,17 +2099,20 @@ RecoverPreparedTransactions(void) * SubTransSetParent has been set before, if the prepared transaction * generated xid assignment records. */ - buf = ProcessTwoPhaseBuffer(xid, + buf = ProcessTwoPhaseBuffer(gxact->fxid, gxact->prepare_start_lsn, gxact->ondisk, true, false); if (buf == NULL) continue; ereport(LOG, - (errmsg("recovering prepared transaction %u from shared memory", xid))); + (errmsg("recovering prepared transaction %u of epoch %u from shared memory", + XidFromFullTransactionId(gxact->fxid), + EpochFromFullTransactionId(gxact->fxid)))); hdr = (TwoPhaseFileHeader *) buf; - Assert(TransactionIdEquals(hdr->xid, xid)); + Assert(TransactionIdEquals(hdr->xid, + XidFromFullTransactionId(gxact->fxid))); bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader)); gid = (const char *) bufptr; bufptr += MAXALIGN(hdr->gidlen); @@ -2122,7 +2128,7 @@ RecoverPreparedTransactions(void) * Recreate its GXACT and dummy PGPROC. But, check whether it was * added in redo and already has a shmem entry for it. */ - MarkAsPreparingGuts(gxact, xid, gid, + MarkAsPreparingGuts(gxact, gxact->fxid, gid, hdr->prepared_at, hdr->owner, hdr->database); @@ -2137,7 +2143,7 @@ RecoverPreparedTransactions(void) /* * Recover other state (notably locks) using resource managers. */ - ProcessRecords(bufptr, xid, twophase_recover_callbacks); + ProcessRecords(bufptr, fxid, twophase_recover_callbacks); /* * Release locks held by the standby process after we process each @@ -2145,7 +2151,7 @@ RecoverPreparedTransactions(void) * additional locks at any one time. */ if (InHotStandby) - StandbyReleaseLockTree(xid, hdr->nsubxacts, subxids); + StandbyReleaseLockTree(hdr->xid, hdr->nsubxacts, subxids); /* * We're done with recovering this transaction. Clear MyLockedGxact, @@ -2164,7 +2170,7 @@ RecoverPreparedTransactions(void) /* * ProcessTwoPhaseBuffer * - * Given a transaction id, read it either from disk or read it directly + * Given a FullTransactionId, read it either from disk or read it directly * via shmem xlog record pointer using the provided "prepare_start_lsn". * * If setParent is true, set up subtransaction parent linkages. @@ -2173,13 +2179,12 @@ RecoverPreparedTransactions(void) * value scanned. */ static char * -ProcessTwoPhaseBuffer(TransactionId xid, +ProcessTwoPhaseBuffer(FullTransactionId fxid, XLogRecPtr prepare_start_lsn, bool fromdisk, bool setParent, bool setNextXid) { FullTransactionId nextXid = TransamVariables->nextXid; - TransactionId origNextXid = XidFromFullTransactionId(nextXid); TransactionId *subxids; char *buf; TwoPhaseFileHeader *hdr; @@ -2191,41 +2196,46 @@ ProcessTwoPhaseBuffer(TransactionId xid, Assert(prepare_start_lsn != InvalidXLogRecPtr); /* Already processed? */ - if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid)) + if (TransactionIdDidCommit(XidFromFullTransactionId(fxid)) || + TransactionIdDidAbort(XidFromFullTransactionId(fxid))) { if (fromdisk) { ereport(WARNING, - (errmsg("removing stale two-phase state file for transaction %u", - xid))); - RemoveTwoPhaseFile(xid, true); + (errmsg("removing stale two-phase state file for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); + RemoveTwoPhaseFile(fxid, true); } else { ereport(WARNING, - (errmsg("removing stale two-phase state from memory for transaction %u", - xid))); - PrepareRedoRemove(xid, true); + (errmsg("removing stale two-phase state from memory for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); + PrepareRedoRemoveFull(fxid, true); } return NULL; } /* Reject XID if too new */ - if (TransactionIdFollowsOrEquals(xid, origNextXid)) + if (FullTransactionIdFollowsOrEquals(fxid, nextXid)) { if (fromdisk) { ereport(WARNING, - (errmsg("removing future two-phase state file for transaction %u", - xid))); - RemoveTwoPhaseFile(xid, true); + (errmsg("removing future two-phase state file for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); + RemoveTwoPhaseFile(fxid, true); } else { ereport(WARNING, - (errmsg("removing future two-phase state from memory for transaction %u", - xid))); - PrepareRedoRemove(xid, true); + (errmsg("removing future two-phase state from memory for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); + PrepareRedoRemoveFull(fxid, true); } return NULL; } @@ -2233,7 +2243,7 @@ ProcessTwoPhaseBuffer(TransactionId xid, if (fromdisk) { /* Read and validate file */ - buf = ReadTwoPhaseFile(xid, false); + buf = ReadTwoPhaseFile(fxid, false); } else { @@ -2243,18 +2253,20 @@ ProcessTwoPhaseBuffer(TransactionId xid, /* Deconstruct header */ hdr = (TwoPhaseFileHeader *) buf; - if (!TransactionIdEquals(hdr->xid, xid)) + if (!TransactionIdEquals(hdr->xid, XidFromFullTransactionId(fxid))) { if (fromdisk) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("corrupted two-phase state file for transaction %u", - xid))); + errmsg("corrupted two-phase state file for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); else ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("corrupted two-phase state in memory for transaction %u", - xid))); + errmsg("corrupted two-phase state in memory for transaction %u of epoch %u", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)))); } /* @@ -2268,14 +2280,14 @@ ProcessTwoPhaseBuffer(TransactionId xid, { TransactionId subxid = subxids[i]; - Assert(TransactionIdFollows(subxid, xid)); + Assert(TransactionIdFollows(subxid, XidFromFullTransactionId(fxid))); /* update nextXid if needed */ if (setNextXid) AdvanceNextFullTransactionIdPastXid(subxid); if (setParent) - SubTransSetParent(subxid, xid); + SubTransSetParent(subxid, XidFromFullTransactionId(fxid)); } return buf; @@ -2466,8 +2478,9 @@ RecordTransactionAbortPrepared(TransactionId xid, * data, the entry is marked as located on disk. */ void -PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, - XLogRecPtr end_lsn, RepOriginId origin_id) +PrepareRedoAdd(FullTransactionId fxid, char *buf, + XLogRecPtr start_lsn, XLogRecPtr end_lsn, + RepOriginId origin_id) { TwoPhaseFileHeader *hdr = (TwoPhaseFileHeader *) buf; char *bufptr; @@ -2477,6 +2490,13 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, Assert(LWLockHeldByMeInMode(TwoPhaseStateLock, LW_EXCLUSIVE)); Assert(RecoveryInProgress()); + if (!FullTransactionIdIsValid(fxid)) + { + Assert(InRecovery); + fxid = FullTransactionIdFromAllowableAt(TransamVariables->nextXid, + hdr->xid); + } + bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader)); gid = (const char *) bufptr; @@ -2505,14 +2525,15 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, { char path[MAXPGPATH]; - TwoPhaseFilePath(path, hdr->xid); + Assert(InRecovery); + TwoPhaseFilePath(path, fxid); if (access(path, F_OK) == 0) { ereport(reachedConsistency ? ERROR : WARNING, (errmsg("could not recover two-phase state file for transaction %u", hdr->xid), - errdetail("Two-phase state file has been found in WAL record %X/%X, but this transaction has already been restored from disk.", + errdetail("Two-phase state file has been found in WAL record %X/%08X, but this transaction has already been restored from disk.", LSN_FORMAT_ARGS(start_lsn)))); return; } @@ -2536,7 +2557,7 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, gxact->prepared_at = hdr->prepared_at; gxact->prepare_start_lsn = start_lsn; gxact->prepare_end_lsn = end_lsn; - gxact->xid = hdr->xid; + gxact->fxid = fxid; gxact->owner = hdr->owner; gxact->locking_backend = INVALID_PROC_NUMBER; gxact->valid = false; @@ -2555,11 +2576,13 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, false /* backward */ , false /* WAL */ ); } - elog(DEBUG2, "added 2PC data in shared memory for transaction %u", gxact->xid); + elog(DEBUG2, "added 2PC data in shared memory for transaction %u of epoch %u", + XidFromFullTransactionId(gxact->fxid), + EpochFromFullTransactionId(gxact->fxid)); } /* - * PrepareRedoRemove + * PrepareRedoRemoveFull * * Remove the corresponding gxact entry from TwoPhaseState. Also remove * the 2PC file if a prepared transaction was saved via an earlier checkpoint. @@ -2567,8 +2590,8 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, * Caller must hold TwoPhaseStateLock in exclusive mode, because TwoPhaseState * is updated. */ -void -PrepareRedoRemove(TransactionId xid, bool giveWarning) +static void +PrepareRedoRemoveFull(FullTransactionId fxid, bool giveWarning) { GlobalTransaction gxact = NULL; int i; @@ -2581,7 +2604,7 @@ PrepareRedoRemove(TransactionId xid, bool giveWarning) { gxact = TwoPhaseState->prepXacts[i]; - if (gxact->xid == xid) + if (FullTransactionIdEquals(gxact->fxid, fxid)) { Assert(gxact->inredo); found = true; @@ -2598,13 +2621,29 @@ PrepareRedoRemove(TransactionId xid, bool giveWarning) /* * And now we can clean up any files we may have left. */ - elog(DEBUG2, "removing 2PC data for transaction %u", xid); + elog(DEBUG2, "removing 2PC data for transaction %u of epoch %u ", + XidFromFullTransactionId(fxid), + EpochFromFullTransactionId(fxid)); + if (gxact->ondisk) - RemoveTwoPhaseFile(xid, giveWarning); + RemoveTwoPhaseFile(fxid, giveWarning); + RemoveGXact(gxact); } /* + * Wrapper of PrepareRedoRemoveFull(), for TransactionIds. + */ +void +PrepareRedoRemove(TransactionId xid, bool giveWarning) +{ + FullTransactionId fxid = + FullTransactionIdFromAllowableAt(TransamVariables->nextXid, xid); + + PrepareRedoRemoveFull(fxid, giveWarning); +} + +/* * LookupGXact * Check if the prepared transaction with the given GID, lsn and timestamp * exists. @@ -2648,7 +2687,7 @@ LookupGXact(const char *gid, XLogRecPtr prepare_end_lsn, * between publisher and subscriber. */ if (gxact->ondisk) - buf = ReadTwoPhaseFile(gxact->xid, false); + buf = ReadTwoPhaseFile(gxact->fxid, false); else { Assert(gxact->prepare_start_lsn); diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index b885513f765..41601fcb280 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -2515,7 +2515,7 @@ static void PrepareTransaction(void) { TransactionState s = CurrentTransactionState; - TransactionId xid = GetCurrentTransactionId(); + FullTransactionId fxid = GetCurrentFullTransactionId(); GlobalTransaction gxact; TimestampTz prepared_at; @@ -2644,7 +2644,7 @@ PrepareTransaction(void) * Reserve the GID for this transaction. This could fail if the requested * GID is invalid or already in use. */ - gxact = MarkAsPreparing(xid, prepareGID, prepared_at, + gxact = MarkAsPreparing(fxid, prepareGID, prepared_at, GetUserId(), MyDatabaseId); prepareGID = NULL; @@ -2694,7 +2694,7 @@ PrepareTransaction(void) * ProcArrayClearTransaction(). Otherwise, a GetLockConflicts() would * conclude "xact already committed or aborted" for our locks. */ - PostPrepare_Locks(xid); + PostPrepare_Locks(fxid); /* * Let others know about no transaction in progress by me. This has to be @@ -2738,9 +2738,9 @@ PrepareTransaction(void) PostPrepare_smgr(); - PostPrepare_MultiXact(xid); + PostPrepare_MultiXact(fxid); - PostPrepare_PredicateLocks(xid); + PostPrepare_PredicateLocks(fxid); ResourceOwnerRelease(TopTransactionResourceOwner, RESOURCE_RELEASE_LOCKS, @@ -6420,7 +6420,8 @@ xact_redo(XLogReaderState *record) * gxact entry. */ LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE); - PrepareRedoAdd(XLogRecGetData(record), + PrepareRedoAdd(InvalidFullTransactionId, + XLogRecGetData(record), record->ReadRecPtr, record->EndRecPtr, XLogRecGetOrigin(record)); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 1914859b2ee..a8cc6402d62 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -1028,7 +1028,7 @@ XLogInsertRecord(XLogRecData *rdata, oldCxt = MemoryContextSwitchTo(walDebugCxt); initStringInfo(&buf); - appendStringInfo(&buf, "INSERT @ %X/%X: ", LSN_FORMAT_ARGS(EndPos)); + appendStringInfo(&buf, "INSERT @ %X/%08X: ", LSN_FORMAT_ARGS(EndPos)); /* * We have to piece together the WAL record data from the XLogRecData @@ -1549,8 +1549,8 @@ WaitXLogInsertionsToFinish(XLogRecPtr upto) if (upto > reservedUpto) { ereport(LOG, - (errmsg("request to flush past end of generated WAL; request %X/%X, current position %X/%X", - LSN_FORMAT_ARGS(upto), LSN_FORMAT_ARGS(reservedUpto)))); + errmsg("request to flush past end of generated WAL; request %X/%08X, current position %X/%08X", + LSN_FORMAT_ARGS(upto), LSN_FORMAT_ARGS(reservedUpto))); upto = reservedUpto; } @@ -1716,7 +1716,7 @@ GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli) endptr = pg_atomic_read_u64(&XLogCtl->xlblocks[idx]); if (expectedEndPtr != endptr) - elog(PANIC, "could not find WAL buffer for %X/%X", + elog(PANIC, "could not find WAL buffer for %X/%08X", LSN_FORMAT_ARGS(ptr)); } else @@ -1776,7 +1776,7 @@ WALReadFromBuffers(char *dstbuf, XLogRecPtr startptr, Size count, inserted = pg_atomic_read_u64(&XLogCtl->logInsertResult); if (startptr + count > inserted) ereport(ERROR, - errmsg("cannot read past end of generated WAL: requested %X/%X, current position %X/%X", + errmsg("cannot read past end of generated WAL: requested %X/%08X, current position %X/%08X", LSN_FORMAT_ARGS(startptr + count), LSN_FORMAT_ARGS(inserted))); @@ -2281,7 +2281,7 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli, bool opportunistic) #ifdef WAL_DEBUG if (XLOG_DEBUG && npages > 0) { - elog(DEBUG1, "initialized %d pages, up to %X/%X", + elog(DEBUG1, "initialized %d pages, up to %X/%08X", npages, LSN_FORMAT_ARGS(NewPageEndPtr)); } #endif @@ -2492,7 +2492,7 @@ XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible) XLogRecPtr EndPtr = pg_atomic_read_u64(&XLogCtl->xlblocks[curridx]); if (LogwrtResult.Write >= EndPtr) - elog(PANIC, "xlog write request %X/%X is past end of log %X/%X", + elog(PANIC, "xlog write request %X/%08X is past end of log %X/%08X", LSN_FORMAT_ARGS(LogwrtResult.Write), LSN_FORMAT_ARGS(EndPtr)); @@ -2892,7 +2892,7 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force) newMinRecoveryPoint = GetCurrentReplayRecPtr(&newMinRecoveryPointTLI); if (!force && newMinRecoveryPoint < lsn) elog(WARNING, - "xlog min recovery request %X/%X is past current point %X/%X", + "xlog min recovery request %X/%08X is past current point %X/%08X", LSN_FORMAT_ARGS(lsn), LSN_FORMAT_ARGS(newMinRecoveryPoint)); /* update control file */ @@ -2905,9 +2905,9 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force) LocalMinRecoveryPointTLI = newMinRecoveryPointTLI; ereport(DEBUG2, - (errmsg_internal("updated min recovery point to %X/%X on timeline %u", - LSN_FORMAT_ARGS(newMinRecoveryPoint), - newMinRecoveryPointTLI))); + errmsg_internal("updated min recovery point to %X/%08X on timeline %u", + LSN_FORMAT_ARGS(newMinRecoveryPoint), + newMinRecoveryPointTLI)); } } LWLockRelease(ControlFileLock); @@ -2945,7 +2945,7 @@ XLogFlush(XLogRecPtr record) #ifdef WAL_DEBUG if (XLOG_DEBUG) - elog(LOG, "xlog flush request %X/%X; write %X/%X; flush %X/%X", + elog(LOG, "xlog flush request %X/%08X; write %X/%08X; flush %X/%08X", LSN_FORMAT_ARGS(record), LSN_FORMAT_ARGS(LogwrtResult.Write), LSN_FORMAT_ARGS(LogwrtResult.Flush)); @@ -3078,7 +3078,7 @@ XLogFlush(XLogRecPtr record) */ if (LogwrtResult.Flush < record) elog(ERROR, - "xlog flush request %X/%X is not satisfied --- flushed only to %X/%X", + "xlog flush request %X/%08X is not satisfied --- flushed only to %X/%08X", LSN_FORMAT_ARGS(record), LSN_FORMAT_ARGS(LogwrtResult.Flush)); } @@ -3205,7 +3205,7 @@ XLogBackgroundFlush(void) #ifdef WAL_DEBUG if (XLOG_DEBUG) - elog(LOG, "xlog bg flush request write %X/%X; flush: %X/%X, current is write %X/%X; flush %X/%X", + elog(LOG, "xlog bg flush request write %X/%08X; flush: %X/%08X, current is write %X/%08X; flush %X/%08X", LSN_FORMAT_ARGS(WriteRqst.Write), LSN_FORMAT_ARGS(WriteRqst.Flush), LSN_FORMAT_ARGS(LogwrtResult.Write), @@ -6921,7 +6921,7 @@ LogCheckpointEnd(bool restartpoint) "%d removed, %d recycled; write=%ld.%03d s, " "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, " "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, " - "estimate=%d kB; lsn=%X/%X, redo lsn=%X/%X", + "estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X", CheckpointStats.ckpt_bufs_written, (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers, CheckpointStats.ckpt_slru_written, @@ -6945,7 +6945,7 @@ LogCheckpointEnd(bool restartpoint) "%d removed, %d recycled; write=%ld.%03d s, " "sync=%ld.%03d s, total=%ld.%03d s; sync files=%d, " "longest=%ld.%03d s, average=%ld.%03d s; distance=%d kB, " - "estimate=%d kB; lsn=%X/%X, redo lsn=%X/%X", + "estimate=%d kB; lsn=%X/%08X, redo lsn=%X/%08X", CheckpointStats.ckpt_bufs_written, (double) CheckpointStats.ckpt_bufs_written * 100 / NBuffers, CheckpointStats.ckpt_slru_written, @@ -7498,6 +7498,10 @@ CreateCheckPoint(int flags) if (PriorRedoPtr != InvalidXLogRecPtr) UpdateCheckPointDistanceEstimate(RedoRecPtr - PriorRedoPtr); +#ifdef USE_INJECTION_POINTS + INJECTION_POINT("checkpoint-before-old-wal-removal", NULL); +#endif + /* * Delete old log files, those no longer needed for last checkpoint to * prevent the disk holding the xlog from growing full. @@ -7637,7 +7641,7 @@ CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, if (!RecoveryInProgress()) elog(ERROR, "can only be used at end of recovery"); if (pagePtr % XLOG_BLCKSZ != 0) - elog(ERROR, "invalid position for missing continuation record %X/%X", + elog(ERROR, "invalid position for missing continuation record %X/%08X", LSN_FORMAT_ARGS(pagePtr)); /* The current WAL insert position should be right after the page header */ @@ -7648,7 +7652,7 @@ CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, startPos += SizeOfXLogShortPHD; recptr = GetXLogInsertRecPtr(); if (recptr != startPos) - elog(ERROR, "invalid WAL insert position %X/%X for OVERWRITE_CONTRECORD", + elog(ERROR, "invalid WAL insert position %X/%08X for OVERWRITE_CONTRECORD", LSN_FORMAT_ARGS(recptr)); START_CRIT_SECTION(); @@ -7678,7 +7682,7 @@ CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn, XLogRecPtr pagePtr, /* check that the record was inserted to the right place */ if (ProcLastRecPtr != startPos) - elog(ERROR, "OVERWRITE_CONTRECORD was inserted to unexpected position %X/%X", + elog(ERROR, "OVERWRITE_CONTRECORD was inserted to unexpected position %X/%08X", LSN_FORMAT_ARGS(ProcLastRecPtr)); XLogFlush(recptr); @@ -7747,8 +7751,7 @@ RecoveryRestartPoint(const CheckPoint *checkPoint, XLogReaderState *record) if (XLogHaveInvalidPages()) { elog(DEBUG2, - "could not record restart point at %X/%X because there " - "are unresolved references to invalid pages", + "could not record restart point at %X/%08X because there are unresolved references to invalid pages", LSN_FORMAT_ARGS(checkPoint->redo)); return; } @@ -7828,8 +7831,8 @@ CreateRestartPoint(int flags) lastCheckPoint.redo <= ControlFile->checkPointCopy.redo) { ereport(DEBUG2, - (errmsg_internal("skipping restartpoint, already performed at %X/%X", - LSN_FORMAT_ARGS(lastCheckPoint.redo)))); + errmsg_internal("skipping restartpoint, already performed at %X/%08X", + LSN_FORMAT_ARGS(lastCheckPoint.redo))); UpdateMinRecoveryPoint(InvalidXLogRecPtr, true); if (flags & CHECKPOINT_IS_SHUTDOWN) @@ -8013,10 +8016,10 @@ CreateRestartPoint(int flags) xtime = GetLatestXTime(); ereport((log_checkpoints ? LOG : DEBUG2), - (errmsg("recovery restart point at %X/%X", - LSN_FORMAT_ARGS(lastCheckPoint.redo)), - xtime ? errdetail("Last completed transaction was at log time %s.", - timestamptz_to_str(xtime)) : 0)); + errmsg("recovery restart point at %X/%08X", + LSN_FORMAT_ARGS(lastCheckPoint.redo)), + xtime ? errdetail("Last completed transaction was at log time %s.", + timestamptz_to_str(xtime)) : 0); /* * Finally, execute archive_cleanup_command, if any. @@ -8277,8 +8280,8 @@ XLogRestorePoint(const char *rpName) RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT); ereport(LOG, - (errmsg("restore point \"%s\" created at %X/%X", - rpName, LSN_FORMAT_ARGS(RecPtr)))); + errmsg("restore point \"%s\" created at %X/%08X", + rpName, LSN_FORMAT_ARGS(RecPtr))); return RecPtr; } diff --git a/src/backend/access/transam/xlogbackup.c b/src/backend/access/transam/xlogbackup.c index 342590e0a46..cda4b38b7d6 100644 --- a/src/backend/access/transam/xlogbackup.c +++ b/src/backend/access/transam/xlogbackup.c @@ -42,7 +42,7 @@ build_backup_content(BackupState *state, bool ishistoryfile) XLByteToSeg(state->startpoint, startsegno, wal_segment_size); XLogFileName(startxlogfile, state->starttli, startsegno, wal_segment_size); - appendStringInfo(result, "START WAL LOCATION: %X/%X (file %s)\n", + appendStringInfo(result, "START WAL LOCATION: %X/%08X (file %s)\n", LSN_FORMAT_ARGS(state->startpoint), startxlogfile); if (ishistoryfile) @@ -52,11 +52,11 @@ build_backup_content(BackupState *state, bool ishistoryfile) XLByteToSeg(state->stoppoint, stopsegno, wal_segment_size); XLogFileName(stopxlogfile, state->stoptli, stopsegno, wal_segment_size); - appendStringInfo(result, "STOP WAL LOCATION: %X/%X (file %s)\n", + appendStringInfo(result, "STOP WAL LOCATION: %X/%08X (file %s)\n", LSN_FORMAT_ARGS(state->stoppoint), stopxlogfile); } - appendStringInfo(result, "CHECKPOINT LOCATION: %X/%X\n", + appendStringInfo(result, "CHECKPOINT LOCATION: %X/%08X\n", LSN_FORMAT_ARGS(state->checkpointloc)); appendStringInfoString(result, "BACKUP METHOD: streamed\n"); appendStringInfo(result, "BACKUP FROM: %s\n", @@ -81,7 +81,7 @@ build_backup_content(BackupState *state, bool ishistoryfile) Assert(XLogRecPtrIsInvalid(state->istartpoint) == (state->istarttli == 0)); if (!XLogRecPtrIsInvalid(state->istartpoint)) { - appendStringInfo(result, "INCREMENTAL FROM LSN: %X/%X\n", + appendStringInfo(result, "INCREMENTAL FROM LSN: %X/%08X\n", LSN_FORMAT_ARGS(state->istartpoint)); appendStringInfo(result, "INCREMENTAL FROM TLI: %u\n", state->istarttli); diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index 5ee9d0b028e..c7571429e8e 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -530,6 +530,18 @@ XLogInsert(RmgrId rmid, uint8 info) } /* + * Simple wrapper to XLogInsert to insert a WAL record with elementary + * contents (only an int64 is supported as value currently). + */ +XLogRecPtr +XLogSimpleInsertInt64(RmgrId rmid, uint8 info, int64 value) +{ + XLogBeginInsert(); + XLogRegisterData(&value, sizeof(value)); + return XLogInsert(rmid, info); +} + +/* * Assemble a WAL record from the registered data and buffers into an * XLogRecData chain, ready for insertion with XLogInsertRecord(). * diff --git a/src/backend/access/transam/xlogprefetcher.c b/src/backend/access/transam/xlogprefetcher.c index 7735562db01..ed3aacabc98 100644 --- a/src/backend/access/transam/xlogprefetcher.c +++ b/src/backend/access/transam/xlogprefetcher.c @@ -546,7 +546,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn) #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "suppressing all readahead until %X/%X is replayed due to possible TLI change", + "suppressing all readahead until %X/%08X is replayed due to possible TLI change", LSN_FORMAT_ARGS(record->lsn)); #endif @@ -579,7 +579,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn) #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "suppressing prefetch in database %u until %X/%X is replayed due to raw file copy", + "suppressing prefetch in database %u until %X/%08X is replayed due to raw file copy", rlocator.dbOid, LSN_FORMAT_ARGS(record->lsn)); #endif @@ -607,7 +607,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn) #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "suppressing prefetch in relation %u/%u/%u until %X/%X is replayed, which creates the relation", + "suppressing prefetch in relation %u/%u/%u until %X/%08X is replayed, which creates the relation", xlrec->rlocator.spcOid, xlrec->rlocator.dbOid, xlrec->rlocator.relNumber, @@ -630,7 +630,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn) #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "suppressing prefetch in relation %u/%u/%u from block %u until %X/%X is replayed, which truncates the relation", + "suppressing prefetch in relation %u/%u/%u from block %u until %X/%08X is replayed, which truncates the relation", xlrec->rlocator.spcOid, xlrec->rlocator.dbOid, xlrec->rlocator.relNumber, @@ -729,7 +729,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn) { #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "suppressing all prefetch in relation %u/%u/%u until %X/%X is replayed, because the relation does not exist on disk", + "suppressing all prefetch in relation %u/%u/%u until %X/%08X is replayed, because the relation does not exist on disk", reln->smgr_rlocator.locator.spcOid, reln->smgr_rlocator.locator.dbOid, reln->smgr_rlocator.locator.relNumber, @@ -750,7 +750,7 @@ XLogPrefetcherNextBlock(uintptr_t pgsr_private, XLogRecPtr *lsn) { #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "suppressing prefetch in relation %u/%u/%u from block %u until %X/%X is replayed, because the relation is too small", + "suppressing prefetch in relation %u/%u/%u from block %u until %X/%08X is replayed, because the relation is too small", reln->smgr_rlocator.locator.spcOid, reln->smgr_rlocator.locator.dbOid, reln->smgr_rlocator.locator.relNumber, @@ -928,7 +928,7 @@ XLogPrefetcherIsFiltered(XLogPrefetcher *prefetcher, RelFileLocator rlocator, { #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%X is replayed (blocks >= %u filtered)", + "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%08X is replayed (blocks >= %u filtered)", rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno, LSN_FORMAT_ARGS(filter->filter_until_replayed), filter->filter_from_block); @@ -944,7 +944,7 @@ XLogPrefetcherIsFiltered(XLogPrefetcher *prefetcher, RelFileLocator rlocator, { #ifdef XLOGPREFETCHER_DEBUG_LEVEL elog(XLOGPREFETCHER_DEBUG_LEVEL, - "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%X is replayed (whole database)", + "prefetch of %u/%u/%u block %u suppressed; filtering until LSN %X/%08X is replayed (whole database)", rlocator.spcOid, rlocator.dbOid, rlocator.relNumber, blockno, LSN_FORMAT_ARGS(filter->filter_until_replayed)); #endif diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 2790ade1f91..ac1f801b1eb 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -617,7 +617,7 @@ restart: } else if (targetRecOff < pageHeaderSize) { - report_invalid_record(state, "invalid record offset at %X/%X: expected at least %u, got %u", + report_invalid_record(state, "invalid record offset at %X/%08X: expected at least %u, got %u", LSN_FORMAT_ARGS(RecPtr), pageHeaderSize, targetRecOff); goto err; @@ -626,7 +626,7 @@ restart: if ((((XLogPageHeader) state->readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) && targetRecOff == pageHeaderSize) { - report_invalid_record(state, "contrecord is requested by %X/%X", + report_invalid_record(state, "contrecord is requested by %X/%08X", LSN_FORMAT_ARGS(RecPtr)); goto err; } @@ -667,7 +667,7 @@ restart: if (total_len < SizeOfXLogRecord) { report_invalid_record(state, - "invalid record length at %X/%X: expected at least %u, got %u", + "invalid record length at %X/%08X: expected at least %u, got %u", LSN_FORMAT_ARGS(RecPtr), (uint32) SizeOfXLogRecord, total_len); goto err; @@ -756,7 +756,7 @@ restart: if (!(pageHeader->xlp_info & XLP_FIRST_IS_CONTRECORD)) { report_invalid_record(state, - "there is no contrecord flag at %X/%X", + "there is no contrecord flag at %X/%08X", LSN_FORMAT_ARGS(RecPtr)); goto err; } @@ -769,7 +769,7 @@ restart: total_len != (pageHeader->xlp_rem_len + gotlen)) { report_invalid_record(state, - "invalid contrecord length %u (expected %lld) at %X/%X", + "invalid contrecord length %u (expected %lld) at %X/%08X", pageHeader->xlp_rem_len, ((long long) total_len) - gotlen, LSN_FORMAT_ARGS(RecPtr)); @@ -1132,7 +1132,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr, if (record->xl_tot_len < SizeOfXLogRecord) { report_invalid_record(state, - "invalid record length at %X/%X: expected at least %u, got %u", + "invalid record length at %X/%08X: expected at least %u, got %u", LSN_FORMAT_ARGS(RecPtr), (uint32) SizeOfXLogRecord, record->xl_tot_len); return false; @@ -1140,7 +1140,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr, if (!RmgrIdIsValid(record->xl_rmid)) { report_invalid_record(state, - "invalid resource manager ID %u at %X/%X", + "invalid resource manager ID %u at %X/%08X", record->xl_rmid, LSN_FORMAT_ARGS(RecPtr)); return false; } @@ -1153,7 +1153,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr, if (!(record->xl_prev < RecPtr)) { report_invalid_record(state, - "record with incorrect prev-link %X/%X at %X/%X", + "record with incorrect prev-link %X/%08X at %X/%08X", LSN_FORMAT_ARGS(record->xl_prev), LSN_FORMAT_ARGS(RecPtr)); return false; @@ -1169,7 +1169,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr, if (record->xl_prev != PrevRecPtr) { report_invalid_record(state, - "record with incorrect prev-link %X/%X at %X/%X", + "record with incorrect prev-link %X/%08X at %X/%08X", LSN_FORMAT_ARGS(record->xl_prev), LSN_FORMAT_ARGS(RecPtr)); return false; @@ -1207,7 +1207,7 @@ ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr) if (!EQ_CRC32C(record->xl_crc, crc)) { report_invalid_record(state, - "incorrect resource manager data checksum in record at %X/%X", + "incorrect resource manager data checksum in record at %X/%08X", LSN_FORMAT_ARGS(recptr)); return false; } @@ -1241,7 +1241,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr, XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize); report_invalid_record(state, - "invalid magic number %04X in WAL segment %s, LSN %X/%X, offset %u", + "invalid magic number %04X in WAL segment %s, LSN %X/%08X, offset %u", hdr->xlp_magic, fname, LSN_FORMAT_ARGS(recptr), @@ -1256,7 +1256,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr, XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize); report_invalid_record(state, - "invalid info bits %04X in WAL segment %s, LSN %X/%X, offset %u", + "invalid info bits %04X in WAL segment %s, LSN %X/%08X, offset %u", hdr->xlp_info, fname, LSN_FORMAT_ARGS(recptr), @@ -1298,7 +1298,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr, /* hmm, first page of file doesn't have a long header? */ report_invalid_record(state, - "invalid info bits %04X in WAL segment %s, LSN %X/%X, offset %u", + "invalid info bits %04X in WAL segment %s, LSN %X/%08X, offset %u", hdr->xlp_info, fname, LSN_FORMAT_ARGS(recptr), @@ -1318,7 +1318,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr, XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize); report_invalid_record(state, - "unexpected pageaddr %X/%X in WAL segment %s, LSN %X/%X, offset %u", + "unexpected pageaddr %X/%08X in WAL segment %s, LSN %X/%08X, offset %u", LSN_FORMAT_ARGS(hdr->xlp_pageaddr), fname, LSN_FORMAT_ARGS(recptr), @@ -1344,7 +1344,7 @@ XLogReaderValidatePageHeader(XLogReaderState *state, XLogRecPtr recptr, XLogFileName(fname, state->seg.ws_tli, segno, state->segcxt.ws_segsize); report_invalid_record(state, - "out-of-sequence timeline ID %u (after %u) in WAL segment %s, LSN %X/%X, offset %u", + "out-of-sequence timeline ID %u (after %u) in WAL segment %s, LSN %X/%08X, offset %u", hdr->xlp_tli, state->latestPageTLI, fname, @@ -1756,7 +1756,7 @@ DecodeXLogRecord(XLogReaderState *state, if (block_id <= decoded->max_block_id) { report_invalid_record(state, - "out-of-order block_id %u at %X/%X", + "out-of-order block_id %u at %X/%08X", block_id, LSN_FORMAT_ARGS(state->ReadRecPtr)); goto err; @@ -1780,14 +1780,14 @@ DecodeXLogRecord(XLogReaderState *state, if (blk->has_data && blk->data_len == 0) { report_invalid_record(state, - "BKPBLOCK_HAS_DATA set, but no data included at %X/%X", + "BKPBLOCK_HAS_DATA set, but no data included at %X/%08X", LSN_FORMAT_ARGS(state->ReadRecPtr)); goto err; } if (!blk->has_data && blk->data_len != 0) { report_invalid_record(state, - "BKPBLOCK_HAS_DATA not set, but data length is %u at %X/%X", + "BKPBLOCK_HAS_DATA not set, but data length is %u at %X/%08X", (unsigned int) blk->data_len, LSN_FORMAT_ARGS(state->ReadRecPtr)); goto err; @@ -1823,7 +1823,7 @@ DecodeXLogRecord(XLogReaderState *state, blk->bimg_len == BLCKSZ)) { report_invalid_record(state, - "BKPIMAGE_HAS_HOLE set, but hole offset %u length %u block image length %u at %X/%X", + "BKPIMAGE_HAS_HOLE set, but hole offset %u length %u block image length %u at %X/%08X", (unsigned int) blk->hole_offset, (unsigned int) blk->hole_length, (unsigned int) blk->bimg_len, @@ -1839,7 +1839,7 @@ DecodeXLogRecord(XLogReaderState *state, (blk->hole_offset != 0 || blk->hole_length != 0)) { report_invalid_record(state, - "BKPIMAGE_HAS_HOLE not set, but hole offset %u length %u at %X/%X", + "BKPIMAGE_HAS_HOLE not set, but hole offset %u length %u at %X/%08X", (unsigned int) blk->hole_offset, (unsigned int) blk->hole_length, LSN_FORMAT_ARGS(state->ReadRecPtr)); @@ -1853,7 +1853,7 @@ DecodeXLogRecord(XLogReaderState *state, blk->bimg_len == BLCKSZ) { report_invalid_record(state, - "BKPIMAGE_COMPRESSED set, but block image length %u at %X/%X", + "BKPIMAGE_COMPRESSED set, but block image length %u at %X/%08X", (unsigned int) blk->bimg_len, LSN_FORMAT_ARGS(state->ReadRecPtr)); goto err; @@ -1868,7 +1868,7 @@ DecodeXLogRecord(XLogReaderState *state, blk->bimg_len != BLCKSZ) { report_invalid_record(state, - "neither BKPIMAGE_HAS_HOLE nor BKPIMAGE_COMPRESSED set, but block image length is %u at %X/%X", + "neither BKPIMAGE_HAS_HOLE nor BKPIMAGE_COMPRESSED set, but block image length is %u at %X/%08X", (unsigned int) blk->data_len, LSN_FORMAT_ARGS(state->ReadRecPtr)); goto err; @@ -1884,7 +1884,7 @@ DecodeXLogRecord(XLogReaderState *state, if (rlocator == NULL) { report_invalid_record(state, - "BKPBLOCK_SAME_REL set but no previous rel at %X/%X", + "BKPBLOCK_SAME_REL set but no previous rel at %X/%08X", LSN_FORMAT_ARGS(state->ReadRecPtr)); goto err; } @@ -1896,7 +1896,7 @@ DecodeXLogRecord(XLogReaderState *state, else { report_invalid_record(state, - "invalid block_id %u at %X/%X", + "invalid block_id %u at %X/%08X", block_id, LSN_FORMAT_ARGS(state->ReadRecPtr)); goto err; } @@ -1963,7 +1963,7 @@ DecodeXLogRecord(XLogReaderState *state, shortdata_err: report_invalid_record(state, - "record with invalid length at %X/%X", + "record with invalid length at %X/%08X", LSN_FORMAT_ARGS(state->ReadRecPtr)); err: *errormsg = state->errormsg_buf; @@ -2073,14 +2073,14 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page) !record->record->blocks[block_id].in_use) { report_invalid_record(record, - "could not restore image at %X/%X with invalid block %d specified", + "could not restore image at %X/%08X with invalid block %d specified", LSN_FORMAT_ARGS(record->ReadRecPtr), block_id); return false; } if (!record->record->blocks[block_id].has_image) { - report_invalid_record(record, "could not restore image at %X/%X with invalid state, block %d", + report_invalid_record(record, "could not restore image at %X/%08X with invalid state, block %d", LSN_FORMAT_ARGS(record->ReadRecPtr), block_id); return false; @@ -2107,7 +2107,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page) bkpb->bimg_len, BLCKSZ - bkpb->hole_length) <= 0) decomp_success = false; #else - report_invalid_record(record, "could not restore image at %X/%X compressed with %s not supported by build, block %d", + report_invalid_record(record, "could not restore image at %X/%08X compressed with %s not supported by build, block %d", LSN_FORMAT_ARGS(record->ReadRecPtr), "LZ4", block_id); @@ -2124,7 +2124,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page) if (ZSTD_isError(decomp_result)) decomp_success = false; #else - report_invalid_record(record, "could not restore image at %X/%X compressed with %s not supported by build, block %d", + report_invalid_record(record, "could not restore image at %X/%08X compressed with %s not supported by build, block %d", LSN_FORMAT_ARGS(record->ReadRecPtr), "zstd", block_id); @@ -2133,7 +2133,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page) } else { - report_invalid_record(record, "could not restore image at %X/%X compressed with unknown method, block %d", + report_invalid_record(record, "could not restore image at %X/%08X compressed with unknown method, block %d", LSN_FORMAT_ARGS(record->ReadRecPtr), block_id); return false; @@ -2141,7 +2141,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page) if (!decomp_success) { - report_invalid_record(record, "could not decompress image at %X/%X, block %d", + report_invalid_record(record, "could not decompress image at %X/%08X, block %d", LSN_FORMAT_ARGS(record->ReadRecPtr), block_id); return false; diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index 6ce979f2d8b..23878b2dd91 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -620,10 +620,10 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, * than ControlFile->checkPoint is used. */ ereport(LOG, - (errmsg("starting backup recovery with redo LSN %X/%X, checkpoint LSN %X/%X, on timeline ID %u", - LSN_FORMAT_ARGS(RedoStartLSN), - LSN_FORMAT_ARGS(CheckPointLoc), - CheckPointTLI))); + errmsg("starting backup recovery with redo LSN %X/%08X, checkpoint LSN %X/%08X, on timeline ID %u", + LSN_FORMAT_ARGS(RedoStartLSN), + LSN_FORMAT_ARGS(CheckPointLoc), + CheckPointTLI)); /* * When a backup_label file is present, we want to roll forward from @@ -636,8 +636,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint)); wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN); ereport(DEBUG1, - (errmsg_internal("checkpoint record is at %X/%X", - LSN_FORMAT_ARGS(CheckPointLoc)))); + errmsg_internal("checkpoint record is at %X/%08X", + LSN_FORMAT_ARGS(CheckPointLoc))); InRecovery = true; /* force recovery even if SHUTDOWNED */ /* @@ -652,23 +652,23 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, if (!ReadRecord(xlogprefetcher, LOG, false, checkPoint.ThisTimeLineID)) ereport(FATAL, - (errmsg("could not find redo location %X/%X referenced by checkpoint record at %X/%X", - LSN_FORMAT_ARGS(checkPoint.redo), LSN_FORMAT_ARGS(CheckPointLoc)), - errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n" - "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n" - "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.", - DataDir, DataDir, DataDir, DataDir))); + errmsg("could not find redo location %X/%08X referenced by checkpoint record at %X/%08X", + LSN_FORMAT_ARGS(checkPoint.redo), LSN_FORMAT_ARGS(CheckPointLoc)), + errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n" + "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n" + "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.", + DataDir, DataDir, DataDir, DataDir)); } } else { ereport(FATAL, - (errmsg("could not locate required checkpoint record at %X/%X", - LSN_FORMAT_ARGS(CheckPointLoc)), - errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n" - "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n" - "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.", - DataDir, DataDir, DataDir, DataDir))); + errmsg("could not locate required checkpoint record at %X/%08X", + LSN_FORMAT_ARGS(CheckPointLoc)), + errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" or \"%s/standby.signal\" and add required recovery options.\n" + "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n" + "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.", + DataDir, DataDir, DataDir, DataDir)); wasShutdown = false; /* keep compiler quiet */ } @@ -773,8 +773,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, */ if (!XLogRecPtrIsInvalid(ControlFile->backupStartPoint)) ereport(LOG, - (errmsg("restarting backup recovery with redo LSN %X/%X", - LSN_FORMAT_ARGS(ControlFile->backupStartPoint)))); + errmsg("restarting backup recovery with redo LSN %X/%08X", + LSN_FORMAT_ARGS(ControlFile->backupStartPoint))); /* Get the last valid checkpoint record. */ CheckPointLoc = ControlFile->checkPoint; @@ -786,8 +786,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, if (record != NULL) { ereport(DEBUG1, - (errmsg_internal("checkpoint record is at %X/%X", - LSN_FORMAT_ARGS(CheckPointLoc)))); + errmsg_internal("checkpoint record is at %X/%08X", + LSN_FORMAT_ARGS(CheckPointLoc))); } else { @@ -798,8 +798,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, * simplify processing around checkpoints. */ ereport(PANIC, - (errmsg("could not locate a valid checkpoint record at %X/%X", - LSN_FORMAT_ARGS(CheckPointLoc)))); + errmsg("could not locate a valid checkpoint record at %X/%08X", + LSN_FORMAT_ARGS(CheckPointLoc))); } memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint)); wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN); @@ -824,8 +824,8 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, recoveryTargetName))); else if (recoveryTarget == RECOVERY_TARGET_LSN) ereport(LOG, - (errmsg("starting point-in-time recovery to WAL location (LSN) \"%X/%X\"", - LSN_FORMAT_ARGS(recoveryTargetLSN)))); + errmsg("starting point-in-time recovery to WAL location (LSN) \"%X/%08X\"", + LSN_FORMAT_ARGS(recoveryTargetLSN))); else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE) ereport(LOG, (errmsg("starting point-in-time recovery to earliest consistent point"))); @@ -855,7 +855,7 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, (errmsg("requested timeline %u is not a child of this server's history", recoveryTargetTLI), /* translator: %s is a backup_label file or a pg_control file */ - errdetail("Latest checkpoint in file \"%s\" is at %X/%X on timeline %u, but in the history of the requested timeline, the server forked off from that timeline at %X/%X.", + errdetail("Latest checkpoint in file \"%s\" is at %X/%08X on timeline %u, but in the history of the requested timeline, the server forked off from that timeline at %X/%08X.", haveBackupLabel ? "backup_label" : "pg_control", LSN_FORMAT_ARGS(CheckPointLoc), CheckPointTLI, @@ -870,15 +870,15 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr, tliOfPointInHistory(ControlFile->minRecoveryPoint - 1, expectedTLEs) != ControlFile->minRecoveryPointTLI) ereport(FATAL, - (errmsg("requested timeline %u does not contain minimum recovery point %X/%X on timeline %u", - recoveryTargetTLI, - LSN_FORMAT_ARGS(ControlFile->minRecoveryPoint), - ControlFile->minRecoveryPointTLI))); + errmsg("requested timeline %u does not contain minimum recovery point %X/%08X on timeline %u", + recoveryTargetTLI, + LSN_FORMAT_ARGS(ControlFile->minRecoveryPoint), + ControlFile->minRecoveryPointTLI)); ereport(DEBUG1, - (errmsg_internal("redo record is at %X/%X; shutdown %s", - LSN_FORMAT_ARGS(checkPoint.redo), - wasShutdown ? "true" : "false"))); + errmsg_internal("redo record is at %X/%08X; shutdown %s", + LSN_FORMAT_ARGS(checkPoint.redo), + wasShutdown ? "true" : "false")); ereport(DEBUG1, (errmsg_internal("next transaction ID: " UINT64_FORMAT "; next OID: %u", U64FromFullTransactionId(checkPoint.nextXid), @@ -1253,14 +1253,14 @@ read_backup_label(XLogRecPtr *checkPointLoc, TimeLineID *backupLabelTLI, * is pretty crude, but we are not expecting any variability in the file * format). */ - if (fscanf(lfp, "START WAL LOCATION: %X/%X (file %08X%16s)%c", + if (fscanf(lfp, "START WAL LOCATION: %X/%08X (file %08X%16s)%c", &hi, &lo, &tli_from_walseg, startxlogfilename, &ch) != 5 || ch != '\n') ereport(FATAL, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE))); RedoStartLSN = ((uint64) hi) << 32 | lo; RedoStartTLI = tli_from_walseg; - if (fscanf(lfp, "CHECKPOINT LOCATION: %X/%X%c", + if (fscanf(lfp, "CHECKPOINT LOCATION: %X/%08X%c", &hi, &lo, &ch) != 3 || ch != '\n') ereport(FATAL, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), @@ -1332,7 +1332,7 @@ read_backup_label(XLogRecPtr *checkPointLoc, TimeLineID *backupLabelTLI, tli_from_file, BACKUP_LABEL_FILE))); } - if (fscanf(lfp, "INCREMENTAL FROM LSN: %X/%X\n", &hi, &lo) > 0) + if (fscanf(lfp, "INCREMENTAL FROM LSN: %X/%08X\n", &hi, &lo) > 0) ereport(FATAL, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("this is an incremental backup, not a data directory"), @@ -1722,8 +1722,8 @@ PerformWalRecovery(void) if (record->xl_rmid != RM_XLOG_ID || (record->xl_info & ~XLR_INFO_MASK) != XLOG_CHECKPOINT_REDO) ereport(FATAL, - (errmsg("unexpected record type found at redo point %X/%X", - LSN_FORMAT_ARGS(xlogreader->ReadRecPtr)))); + errmsg("unexpected record type found at redo point %X/%08X", + LSN_FORMAT_ARGS(xlogreader->ReadRecPtr))); } else { @@ -1745,8 +1745,8 @@ PerformWalRecovery(void) RmgrStartup(); ereport(LOG, - (errmsg("redo starts at %X/%X", - LSN_FORMAT_ARGS(xlogreader->ReadRecPtr)))); + errmsg("redo starts at %X/%08X", + LSN_FORMAT_ARGS(xlogreader->ReadRecPtr))); /* Prepare to report progress of the redo phase. */ if (!StandbyMode) @@ -1758,7 +1758,7 @@ PerformWalRecovery(void) do { if (!StandbyMode) - ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X", + ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%08X", LSN_FORMAT_ARGS(xlogreader->ReadRecPtr)); #ifdef WAL_DEBUG @@ -1767,7 +1767,7 @@ PerformWalRecovery(void) StringInfoData buf; initStringInfo(&buf); - appendStringInfo(&buf, "REDO @ %X/%X; LSN %X/%X: ", + appendStringInfo(&buf, "REDO @ %X/%08X; LSN %X/%08X: ", LSN_FORMAT_ARGS(xlogreader->ReadRecPtr), LSN_FORMAT_ARGS(xlogreader->EndRecPtr)); xlog_outrec(&buf, xlogreader); @@ -1880,9 +1880,9 @@ PerformWalRecovery(void) RmgrCleanup(); ereport(LOG, - (errmsg("redo done at %X/%X system usage: %s", - LSN_FORMAT_ARGS(xlogreader->ReadRecPtr), - pg_rusage_show(&ru0)))); + errmsg("redo done at %X/%08X system usage: %s", + LSN_FORMAT_ARGS(xlogreader->ReadRecPtr), + pg_rusage_show(&ru0))); xtime = GetLatestXTime(); if (xtime) ereport(LOG, @@ -2092,7 +2092,7 @@ xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI) memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_overwrite_contrecord)); if (xlrec.overwritten_lsn != record->overwrittenRecPtr) - elog(FATAL, "mismatching overwritten LSN %X/%X -> %X/%X", + elog(FATAL, "mismatching overwritten LSN %X/%08X -> %X/%08X", LSN_FORMAT_ARGS(xlrec.overwritten_lsn), LSN_FORMAT_ARGS(record->overwrittenRecPtr)); @@ -2101,9 +2101,9 @@ xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI) missingContrecPtr = InvalidXLogRecPtr; ereport(LOG, - (errmsg("successfully skipped missing contrecord at %X/%X, overwritten at %s", - LSN_FORMAT_ARGS(xlrec.overwritten_lsn), - timestamptz_to_str(xlrec.overwrite_time)))); + errmsg("successfully skipped missing contrecord at %X/%08X, overwritten at %s", + LSN_FORMAT_ARGS(xlrec.overwritten_lsn), + timestamptz_to_str(xlrec.overwrite_time))); /* Verifying the record should only happen once */ record->overwrittenRecPtr = InvalidXLogRecPtr; @@ -2129,7 +2129,7 @@ xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI) backupEndPoint = lsn; } else - elog(DEBUG1, "saw end-of-backup record for backup starting at %X/%X, waiting for %X/%X", + elog(DEBUG1, "saw end-of-backup record for backup starting at %X/%08X, waiting for %X/%08X", LSN_FORMAT_ARGS(startpoint), LSN_FORMAT_ARGS(backupStartPoint)); } } @@ -2224,9 +2224,9 @@ CheckRecoveryConsistency(void) backupEndRequired = false; ereport(LOG, - (errmsg("completed backup recovery with redo LSN %X/%X and end LSN %X/%X", - LSN_FORMAT_ARGS(saveBackupStartPoint), - LSN_FORMAT_ARGS(saveBackupEndPoint)))); + errmsg("completed backup recovery with redo LSN %X/%08X and end LSN %X/%08X", + LSN_FORMAT_ARGS(saveBackupStartPoint), + LSN_FORMAT_ARGS(saveBackupEndPoint))); } /* @@ -2255,8 +2255,8 @@ CheckRecoveryConsistency(void) reachedConsistency = true; SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT); ereport(LOG, - (errmsg("consistent recovery state reached at %X/%X", - LSN_FORMAT_ARGS(lastReplayedEndRecPtr)))); + errmsg("consistent recovery state reached at %X/%08X", + LSN_FORMAT_ARGS(lastReplayedEndRecPtr))); } /* @@ -2293,7 +2293,7 @@ rm_redo_error_callback(void *arg) xlog_block_info(&buf, record); /* translator: %s is a WAL record description */ - errcontext("WAL redo at %X/%X for %s", + errcontext("WAL redo at %X/%08X for %s", LSN_FORMAT_ARGS(record->ReadRecPtr), buf.data); @@ -2328,7 +2328,7 @@ xlog_outdesc(StringInfo buf, XLogReaderState *record) static void xlog_outrec(StringInfo buf, XLogReaderState *record) { - appendStringInfo(buf, "prev %X/%X; xid %u", + appendStringInfo(buf, "prev %X/%08X; xid %u", LSN_FORMAT_ARGS(XLogRecGetPrev(record)), XLogRecGetXid(record)); @@ -2416,10 +2416,10 @@ checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI, lsn < minRecoveryPoint && newTLI > minRecoveryPointTLI) ereport(PANIC, - (errmsg("unexpected timeline ID %u in checkpoint record, before reaching minimum recovery point %X/%X on timeline %u", - newTLI, - LSN_FORMAT_ARGS(minRecoveryPoint), - minRecoveryPointTLI))); + errmsg("unexpected timeline ID %u in checkpoint record, before reaching minimum recovery point %X/%08X on timeline %u", + newTLI, + LSN_FORMAT_ARGS(minRecoveryPoint), + minRecoveryPointTLI)); /* Looks good */ } @@ -2621,8 +2621,8 @@ recoveryStopsBefore(XLogReaderState *record) recoveryStopTime = 0; recoveryStopName[0] = '\0'; ereport(LOG, - (errmsg("recovery stopping before WAL location (LSN) \"%X/%X\"", - LSN_FORMAT_ARGS(recoveryStopLSN)))); + errmsg("recovery stopping before WAL location (LSN) \"%X/%08X\"", + LSN_FORMAT_ARGS(recoveryStopLSN))); return true; } @@ -2789,8 +2789,8 @@ recoveryStopsAfter(XLogReaderState *record) recoveryStopTime = 0; recoveryStopName[0] = '\0'; ereport(LOG, - (errmsg("recovery stopping after WAL location (LSN) \"%X/%X\"", - LSN_FORMAT_ARGS(recoveryStopLSN)))); + errmsg("recovery stopping after WAL location (LSN) \"%X/%08X\"", + LSN_FORMAT_ARGS(recoveryStopLSN))); return true; } @@ -2910,7 +2910,7 @@ getRecoveryStopReason(void) timestamptz_to_str(recoveryStopTime)); else if (recoveryTarget == RECOVERY_TARGET_LSN) snprintf(reason, sizeof(reason), - "%s LSN %X/%X\n", + "%s LSN %X/%08X\n", recoveryStopAfter ? "after" : "before", LSN_FORMAT_ARGS(recoveryStopLSN)); else if (recoveryTarget == RECOVERY_TARGET_NAME) @@ -3213,11 +3213,11 @@ ReadRecord(XLogPrefetcher *xlogprefetcher, int emode, XLogFileName(fname, xlogreader->seg.ws_tli, segno, wal_segment_size); ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr), - (errmsg("unexpected timeline ID %u in WAL segment %s, LSN %X/%X, offset %u", - xlogreader->latestPageTLI, - fname, - LSN_FORMAT_ARGS(xlogreader->latestPagePtr), - offset))); + errmsg("unexpected timeline ID %u in WAL segment %s, LSN %X/%08X, offset %u", + xlogreader->latestPageTLI, + fname, + LSN_FORMAT_ARGS(xlogreader->latestPagePtr), + offset)); record = NULL; } @@ -3429,14 +3429,14 @@ retry: errno = save_errno; ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen), (errcode_for_file_access(), - errmsg("could not read from WAL segment %s, LSN %X/%X, offset %u: %m", + errmsg("could not read from WAL segment %s, LSN %X/%08X, offset %u: %m", fname, LSN_FORMAT_ARGS(targetPagePtr), readOff))); } else ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen), (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("could not read from WAL segment %s, LSN %X/%X, offset %u: read %d of %zu", + errmsg("could not read from WAL segment %s, LSN %X/%08X, offset %u: read %d of %zu", fname, LSN_FORMAT_ARGS(targetPagePtr), readOff, r, (Size) XLOG_BLCKSZ))); goto next_record_is_invalid; @@ -3718,7 +3718,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, wait_time = wal_retrieve_retry_interval - TimestampDifferenceMilliseconds(last_fail_time, now); - elog(LOG, "waiting for WAL to become available at %X/%X", + elog(LOG, "waiting for WAL to become available at %X/%08X", LSN_FORMAT_ARGS(RecPtr)); /* Do background tasks that might benefit us later. */ @@ -3864,7 +3864,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, tli = tliOfPointInHistory(tliRecPtr, expectedTLEs); if (curFileTLI > 0 && tli < curFileTLI) - elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u", + elog(ERROR, "according to history file, WAL location %X/%08X belongs to timeline %u, but previous recovered WAL file came from timeline %u", LSN_FORMAT_ARGS(tliRecPtr), tli, curFileTLI); } @@ -4177,10 +4177,10 @@ rescanLatestTimeLine(TimeLineID replayTLI, XLogRecPtr replayLSN) if (currentTle->end < replayLSN) { ereport(LOG, - (errmsg("new timeline %u forked off current database system timeline %u before current recovery point %X/%X", - newtarget, - replayTLI, - LSN_FORMAT_ARGS(replayLSN)))); + errmsg("new timeline %u forked off current database system timeline %u before current recovery point %X/%08X", + newtarget, + replayTLI, + LSN_FORMAT_ARGS(replayLSN))); return false; } @@ -4994,13 +4994,25 @@ check_recovery_target_timeline(char **newval, void **extra, GucSource source) rttg = RECOVERY_TARGET_TIMELINE_LATEST; else { + char *endp; + uint64 timeline; + rttg = RECOVERY_TARGET_TIMELINE_NUMERIC; errno = 0; - strtoul(*newval, NULL, 0); - if (errno == EINVAL || errno == ERANGE) + timeline = strtou64(*newval, &endp, 0); + + if (*endp != '\0' || errno == EINVAL || errno == ERANGE) + { + GUC_check_errdetail("\"%s\" is not a valid number.", + "recovery_target_timeline"); + return false; + } + + if (timeline < 1 || timeline > PG_UINT32_MAX) { - GUC_check_errdetail("\"recovery_target_timeline\" is not a valid number."); + GUC_check_errdetail("\"%s\" must be between %u and %u.", + "recovery_target_timeline", 1, UINT_MAX); return false; } } diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index c389b27f77d..27ea52fdfee 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -795,7 +795,7 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, list_free_deep(timelineHistory); - elog(DEBUG3, "switched to timeline %u valid until %X/%X", + elog(DEBUG3, "switched to timeline %u valid until %X/%08X", state->currTLI, LSN_FORMAT_ARGS(state->currTLIValidUntil)); } diff --git a/src/backend/backup/backup_manifest.c b/src/backend/backup/backup_manifest.c index 22e2be37c95..d05252f383c 100644 --- a/src/backend/backup/backup_manifest.c +++ b/src/backend/backup/backup_manifest.c @@ -281,7 +281,7 @@ AddWALInfoToBackupManifest(backup_manifest_info *manifest, XLogRecPtr startptr, } AppendToManifest(manifest, - "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }", + "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%08X\", \"End-LSN\": \"%X/%08X\" }", first_wal_range ? "" : ",\n", entry->tli, LSN_FORMAT_ARGS(tl_beginptr), diff --git a/src/backend/backup/basebackup_copy.c b/src/backend/backup/basebackup_copy.c index a284ce318ff..18b0b5a52d3 100644 --- a/src/backend/backup/basebackup_copy.c +++ b/src/backend/backup/basebackup_copy.c @@ -361,7 +361,7 @@ SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli) tstate = begin_tup_output_tupdesc(dest, tupdesc, &TTSOpsVirtual); /* Data row */ - values[0] = CStringGetTextDatum(psprintf("%X/%X", LSN_FORMAT_ARGS(ptr))); + values[0] = CStringGetTextDatum(psprintf("%X/%08X", LSN_FORMAT_ARGS(ptr))); values[1] = Int64GetDatum(tli); do_tup_output(tstate, values, nulls); diff --git a/src/backend/backup/basebackup_incremental.c b/src/backend/backup/basebackup_incremental.c index 28491b1e0ab..a0d48ff0fef 100644 --- a/src/backend/backup/basebackup_incremental.c +++ b/src/backend/backup/basebackup_incremental.c @@ -409,7 +409,7 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib, if (range->start_lsn < tlep[i]->begin) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("manifest requires WAL from initial timeline %u starting at %X/%X, but that timeline begins at %X/%X", + errmsg("manifest requires WAL from initial timeline %u starting at %X/%08X, but that timeline begins at %X/%08X", range->tli, LSN_FORMAT_ARGS(range->start_lsn), LSN_FORMAT_ARGS(tlep[i]->begin)))); @@ -419,7 +419,7 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib, if (range->start_lsn != tlep[i]->begin) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("manifest requires WAL from continuation timeline %u starting at %X/%X, but that timeline begins at %X/%X", + errmsg("manifest requires WAL from continuation timeline %u starting at %X/%08X, but that timeline begins at %X/%08X", range->tli, LSN_FORMAT_ARGS(range->start_lsn), LSN_FORMAT_ARGS(tlep[i]->begin)))); @@ -430,7 +430,7 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib, if (range->end_lsn > backup_state->startpoint) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("manifest requires WAL from final timeline %u ending at %X/%X, but this backup starts at %X/%X", + errmsg("manifest requires WAL from final timeline %u ending at %X/%08X, but this backup starts at %X/%08X", range->tli, LSN_FORMAT_ARGS(range->end_lsn), LSN_FORMAT_ARGS(backup_state->startpoint)), @@ -441,7 +441,7 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib, if (range->end_lsn != tlep[i]->end) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("manifest requires WAL from non-final timeline %u ending at %X/%X, but this server switched timelines at %X/%X", + errmsg("manifest requires WAL from non-final timeline %u ending at %X/%08X, but this server switched timelines at %X/%08X", range->tli, LSN_FORMAT_ARGS(range->end_lsn), LSN_FORMAT_ARGS(tlep[i]->end)))); @@ -522,18 +522,18 @@ PrepareForIncrementalBackup(IncrementalBackupInfo *ib, if (XLogRecPtrIsInvalid(tli_missing_lsn)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("WAL summaries are required on timeline %u from %X/%X to %X/%X, but no summaries for that timeline and LSN range exist", + errmsg("WAL summaries are required on timeline %u from %X/%08X to %X/%08X, but no summaries for that timeline and LSN range exist", tle->tli, LSN_FORMAT_ARGS(tli_start_lsn), LSN_FORMAT_ARGS(tli_end_lsn)))); else ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("WAL summaries are required on timeline %u from %X/%X to %X/%X, but the summaries for that timeline and LSN range are incomplete", + errmsg("WAL summaries are required on timeline %u from %X/%08X to %X/%08X, but the summaries for that timeline and LSN range are incomplete", tle->tli, LSN_FORMAT_ARGS(tli_start_lsn), LSN_FORMAT_ARGS(tli_end_lsn)), - errdetail("The first unsummarized LSN in this range is %X/%X.", + errdetail("The first unsummarized LSN in this range is %X/%08X.", LSN_FORMAT_ARGS(tli_missing_lsn)))); } diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 6db864892d0..fc8638c1b61 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -109,6 +109,8 @@ static const struct typinfo TypInfo[] = { F_REGROLEIN, F_REGROLEOUT}, {"regnamespace", REGNAMESPACEOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid, F_REGNAMESPACEIN, F_REGNAMESPACEOUT}, + {"regdatabase", REGDATABASEOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid, + F_REGDATABASEIN, F_REGDATABASEOUT}, {"text", TEXTOID, 0, -1, false, TYPALIGN_INT, TYPSTORAGE_EXTENDED, DEFAULT_COLLATION_OID, F_TEXTIN, F_TEXTOUT}, {"oid", OIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid, diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 18316a3968b..7dded634eb8 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -1850,6 +1850,17 @@ find_expr_references_walker(Node *node, errmsg("constant of the type %s cannot be used here", "regrole"))); break; + + /* + * Dependencies for regdatabase should be shared among all + * databases, so explicitly inhibit to have dependencies. + */ + case REGDATABASEOID: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("constant of the type %s cannot be used here", + "regdatabase"))); + break; } } return false; diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index fbaed5359ad..fd6537567ea 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -665,6 +665,15 @@ CheckAttributeType(const char *attname, } /* + * For consistency with check_virtual_generated_security(). + */ + if ((flags & CHKATYPE_IS_VIRTUAL) && atttypid >= FirstUnpinnedObjectId) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("virtual generated column \"%s\" cannot have a user-defined type", attname), + errdetail("Virtual generated columns that make use of user-defined types are not yet supported.")); + + /* * This might not be strictly invalid per SQL standard, but it is pretty * useless, and it cannot be dumped, so we must disallow it. */ @@ -1100,6 +1109,7 @@ AddNewRelationType(const char *typeName, * if false, relacl is always set NULL * allow_system_table_mods: true to allow creation in system namespaces * is_internal: is this a system-generated catalog? + * relrewrite: link to original relation during a table rewrite * * Output parameters: * typaddress: if not null, gets the object address of the new pg_type entry @@ -2996,7 +3006,7 @@ AddRelationNotNullConstraints(Relation rel, List *constraints, if (constr->is_no_inherit) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("cannot define not-null constraint on column \"%s\" with NO INHERIT", + errmsg("cannot define not-null constraint with NO INHERIT on column \"%s\"", strVal(linitial(constr->keys))), errdetail("The column has an inherited not-null constraint."))); @@ -3215,6 +3225,86 @@ check_nested_generated(ParseState *pstate, Node *node) } /* + * Check security of virtual generated column expression. + * + * Just like selecting from a view is exploitable (CVE-2024-7348), selecting + * from a table with virtual generated columns is exploitable. Users who are + * concerned about this can avoid selecting from views, but telling them to + * avoid selecting from tables is less practical. + * + * To address this, this restricts generation expressions for virtual + * generated columns are restricted to using built-in functions and types. We + * assume that built-in functions and types cannot be exploited for this + * purpose. Note the overall security also requires that all functions in use + * a immutable. (For example, there are some built-in non-immutable functions + * that can run arbitrary SQL.) The immutability is checked elsewhere, since + * that is a property that needs to hold independent of security + * considerations. + * + * In the future, this could be expanded by some new mechanism to declare + * other functions and types as safe or trusted for this purpose, but that is + * to be designed. + */ + +/* + * Callback for check_functions_in_node() that determines whether a function + * is user-defined. + */ +static bool +contains_user_functions_checker(Oid func_id, void *context) +{ + return (func_id >= FirstUnpinnedObjectId); +} + +/* + * Checks for all the things we don't want in the generation expressions of + * virtual generated columns for security reasons. Errors out if it finds + * one. + */ +static bool +check_virtual_generated_security_walker(Node *node, void *context) +{ + ParseState *pstate = context; + + if (node == NULL) + return false; + + if (!IsA(node, List)) + { + if (check_functions_in_node(node, contains_user_functions_checker, NULL)) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("generation expression uses user-defined function"), + errdetail("Virtual generated columns that make use of user-defined functions are not yet supported."), + parser_errposition(pstate, exprLocation(node))); + + /* + * check_functions_in_node() doesn't check some node types (see + * comment there). We handle CoerceToDomain and MinMaxExpr by + * checking for built-in types. The other listed node types cannot + * call user-definable SQL-visible functions. + * + * We furthermore need this type check to handle built-in, immutable + * polymorphic functions such as array_eq(). + */ + if (exprType(node) >= FirstUnpinnedObjectId) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("generation expression uses user-defined type"), + errdetail("Virtual generated columns that make use of user-defined types are not yet supported."), + parser_errposition(pstate, exprLocation(node))); + } + + return expression_tree_walker(node, check_virtual_generated_security_walker, context); +} + +static void +check_virtual_generated_security(ParseState *pstate, Node *node) +{ + check_virtual_generated_security_walker(node, pstate); +} + +/* * Take a raw default and convert it to a cooked format ready for * storage. * @@ -3253,6 +3343,10 @@ cookDefault(ParseState *pstate, ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("generation expression is not immutable"))); + + /* Check security of expressions for virtual generated column */ + if (attgenerated == ATTRIBUTE_GENERATED_VIRTUAL) + check_virtual_generated_security(pstate, expr); } else { diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 739a92bdcc1..aa216683b74 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -3020,7 +3020,7 @@ index_build(Relation heapRelation, /* * Determine worker process details for parallel CREATE INDEX. Currently, - * only btree and BRIN have support for parallel builds. + * only btree, GIN, and BRIN have support for parallel builds. * * Note that planner considers parallel safety for us. */ diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 15efb02badb..e5dbbe61b81 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -674,11 +674,6 @@ GRANT SELECT ON pg_backend_memory_contexts TO pg_read_all_stats; REVOKE EXECUTE ON FUNCTION pg_get_backend_memory_contexts() FROM PUBLIC; GRANT EXECUTE ON FUNCTION pg_get_backend_memory_contexts() TO pg_read_all_stats; -REVOKE EXECUTE ON FUNCTION - pg_get_process_memory_contexts(integer, boolean, float) FROM PUBLIC; -GRANT EXECUTE ON FUNCTION - pg_get_process_memory_contexts(integer, boolean, float) TO pg_read_all_stats; - -- Statistics views CREATE VIEW pg_stat_all_tables AS @@ -900,7 +895,7 @@ CREATE VIEW pg_stat_activity AS S.wait_event, S.state, S.backend_xid, - s.backend_xmin, + S.backend_xmin, S.query_id, S.query, S.backend_type diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 4fffb76e557..7111d5d5334 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -76,7 +76,7 @@ static BufferAccessStrategy vac_strategy; static void do_analyze_rel(Relation onerel, - VacuumParams *params, List *va_cols, + const VacuumParams params, List *va_cols, AcquireSampleRowsFunc acquirefunc, BlockNumber relpages, bool inh, bool in_outer_xact, int elevel); static void compute_index_stats(Relation onerel, double totalrows, @@ -107,7 +107,7 @@ static Datum ind_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull); */ void analyze_rel(Oid relid, RangeVar *relation, - VacuumParams *params, List *va_cols, bool in_outer_xact, + const VacuumParams params, List *va_cols, bool in_outer_xact, BufferAccessStrategy bstrategy) { Relation onerel; @@ -116,7 +116,7 @@ analyze_rel(Oid relid, RangeVar *relation, BlockNumber relpages = 0; /* Select logging level */ - if (params->options & VACOPT_VERBOSE) + if (params.options & VACOPT_VERBOSE) elevel = INFO; else elevel = DEBUG2; @@ -138,8 +138,8 @@ analyze_rel(Oid relid, RangeVar *relation, * * Make sure to generate only logs for ANALYZE in this case. */ - onerel = vacuum_open_relation(relid, relation, params->options & ~(VACOPT_VACUUM), - params->log_min_duration >= 0, + onerel = vacuum_open_relation(relid, relation, params.options & ~(VACOPT_VACUUM), + params.log_min_duration >= 0, ShareUpdateExclusiveLock); /* leave if relation could not be opened or locked */ @@ -155,7 +155,7 @@ analyze_rel(Oid relid, RangeVar *relation, */ if (!vacuum_is_permitted_for_relation(RelationGetRelid(onerel), onerel->rd_rel, - params->options & ~VACOPT_VACUUM)) + params.options & ~VACOPT_VACUUM)) { relation_close(onerel, ShareUpdateExclusiveLock); return; @@ -227,7 +227,7 @@ analyze_rel(Oid relid, RangeVar *relation, else { /* No need for a WARNING if we already complained during VACUUM */ - if (!(params->options & VACOPT_VACUUM)) + if (!(params.options & VACOPT_VACUUM)) ereport(WARNING, (errmsg("skipping \"%s\" --- cannot analyze non-tables or special system tables", RelationGetRelationName(onerel)))); @@ -275,7 +275,7 @@ analyze_rel(Oid relid, RangeVar *relation, * appropriate acquirefunc for each child table. */ static void -do_analyze_rel(Relation onerel, VacuumParams *params, +do_analyze_rel(Relation onerel, const VacuumParams params, List *va_cols, AcquireSampleRowsFunc acquirefunc, BlockNumber relpages, bool inh, bool in_outer_xact, int elevel) @@ -309,9 +309,9 @@ do_analyze_rel(Relation onerel, VacuumParams *params, PgStat_Counter startreadtime = 0; PgStat_Counter startwritetime = 0; - verbose = (params->options & VACOPT_VERBOSE) != 0; + verbose = (params.options & VACOPT_VERBOSE) != 0; instrument = (verbose || (AmAutoVacuumWorkerProcess() && - params->log_min_duration >= 0)); + params.log_min_duration >= 0)); if (inh) ereport(elevel, (errmsg("analyzing \"%s.%s\" inheritance tree", @@ -706,7 +706,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params, * amvacuumcleanup() when called in ANALYZE-only mode. The only exception * among core index AMs is GIN/ginvacuumcleanup(). */ - if (!(params->options & VACOPT_VACUUM)) + if (!(params.options & VACOPT_VACUUM)) { for (ind = 0; ind < nindexes; ind++) { @@ -736,9 +736,9 @@ do_analyze_rel(Relation onerel, VacuumParams *params, { TimestampTz endtime = GetCurrentTimestamp(); - if (verbose || params->log_min_duration == 0 || + if (verbose || params.log_min_duration == 0 || TimestampDifferenceExceeds(starttime, endtime, - params->log_min_duration)) + params.log_min_duration)) { long delay_in_ms; WalUsage walusage; diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 54a08e4102e..b55221d44cd 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -917,7 +917,7 @@ copy_table_data(Relation NewHeap, Relation OldHeap, Relation OldIndex, bool verb * not to be aggressive about this. */ memset(¶ms, 0, sizeof(VacuumParams)); - vacuum_get_cutoffs(OldHeap, ¶ms, &cutoffs); + vacuum_get_cutoffs(OldHeap, params, &cutoffs); /* * FreezeXid will become the table's new relfrozenxid, and that mustn't go diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 74ae42b19a7..fae9c41db65 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -322,11 +322,13 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt, } /* - * Extract a CopyHeaderChoice value from a DefElem. This is like - * defGetBoolean() but also accepts the special value "match". + * Extract the CopyFormatOptions.header_line value from a DefElem. + * + * Parses the HEADER option for COPY, which can be a boolean, a non-negative + * integer (number of lines to skip), or the special value "match". */ -static CopyHeaderChoice -defGetCopyHeaderChoice(DefElem *def, bool is_from) +static int +defGetCopyHeaderOption(DefElem *def, bool is_from) { /* * If no parameter value given, assume "true" is meant. @@ -335,20 +337,27 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from) return COPY_HEADER_TRUE; /* - * Allow 0, 1, "true", "false", "on", "off", or "match". + * Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or + * "match". */ switch (nodeTag(def->arg)) { case T_Integer: - switch (intVal(def->arg)) { - case 0: - return COPY_HEADER_FALSE; - case 1: - return COPY_HEADER_TRUE; - default: - /* otherwise, error out below */ - break; + int ival = intVal(def->arg); + + if (ival < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("a negative integer value cannot be " + "specified for %s", def->defname))); + + if (!is_from && ival > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use multi-line header in COPY TO"))); + + return ival; } break; default: @@ -381,7 +390,8 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from) } ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("%s requires a Boolean value or \"match\"", + errmsg("%s requires a Boolean value, a non-negative integer, " + "or the string \"match\"", def->defname))); return COPY_HEADER_FALSE; /* keep compiler quiet */ } @@ -566,7 +576,7 @@ ProcessCopyOptions(ParseState *pstate, if (header_specified) errorConflictingDefElem(defel, pstate); header_specified = true; - opts_out->header_line = defGetCopyHeaderChoice(defel, is_from); + opts_out->header_line = defGetCopyHeaderOption(defel, is_from); } else if (strcmp(defel->defname, "quote") == 0) { @@ -769,7 +779,7 @@ ProcessCopyOptions(ParseState *pstate, errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim))); /* Check header */ - if (opts_out->binary && opts_out->header_line) + if (opts_out->binary && opts_out->header_line != COPY_HEADER_FALSE) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index f5fc346e201..b1ae97b833d 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -771,21 +771,30 @@ static pg_attribute_always_inline bool NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv) { int fldct; - bool done; + bool done = false; /* only available for text or csv input */ Assert(!cstate->opts.binary); /* on input check that the header line is correct if needed */ - if (cstate->cur_lineno == 0 && cstate->opts.header_line) + if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE) { ListCell *cur; TupleDesc tupDesc; + int lines_to_skip = cstate->opts.header_line; + + /* If set to "match", one header line is skipped */ + if (cstate->opts.header_line == COPY_HEADER_MATCH) + lines_to_skip = 1; tupDesc = RelationGetDescr(cstate->rel); - cstate->cur_lineno++; - done = CopyReadLine(cstate, is_csv); + for (int i = 0; i < lines_to_skip; i++) + { + cstate->cur_lineno++; + if ((done = CopyReadLine(cstate, is_csv))) + break; + } if (cstate->opts.header_line == COPY_HEADER_MATCH) { @@ -1538,7 +1547,7 @@ GetDecimalFromHex(char hex) if (isdigit((unsigned char) hex)) return hex - '0'; else - return tolower((unsigned char) hex) - 'a' + 10; + return pg_ascii_tolower((unsigned char) hex) - 'a' + 10; } /* diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index ea6f18f2c80..67b94b91cae 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc) cstate->file_encoding); /* if a header has been requested send the line */ - if (cstate->opts.header_line) + if (cstate->opts.header_line == COPY_HEADER_TRUE) { ListCell *cur; bool hdr_delim = false; diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 5fbbcdaabb1..c95eb945016 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -1065,16 +1065,41 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) /* Check that the chosen locales are valid, and get canonical spellings */ if (!check_locale(LC_COLLATE, dbcollate, &canonname)) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate), - errhint("If the locale name is specific to ICU, use ICU_LOCALE."))); + { + if (dblocprovider == COLLPROVIDER_BUILTIN) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate), + errhint("If the locale name is specific to the builtin provider, use BUILTIN_LOCALE."))); + else if (dblocprovider == COLLPROVIDER_ICU) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate), + errhint("If the locale name is specific to the ICU provider, use ICU_LOCALE."))); + else + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate))); + } dbcollate = canonname; if (!check_locale(LC_CTYPE, dbctype, &canonname)) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype), - errhint("If the locale name is specific to ICU, use ICU_LOCALE."))); + { + if (dblocprovider == COLLPROVIDER_BUILTIN) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype), + errhint("If the locale name is specific to the builtin provider, use BUILTIN_LOCALE."))); + else if (dblocprovider == COLLPROVIDER_ICU) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype), + errhint("If the locale name is specific to the ICU provider, use ICU_LOCALE."))); + else + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype))); + } + dbctype = canonname; check_encoding_locale_matches(encoding, dbcollate, dbctype); diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index bfa83fbc3fe..7e2792ead71 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -811,14 +811,10 @@ ExplainPrintPlan(ExplainState *es, QueryDesc *queryDesc) * the queryid in any of the EXPLAIN plans to keep stable the results * generated by regression test suites. */ - if (es->verbose && queryDesc->plannedstmt->queryId != UINT64CONST(0) && + if (es->verbose && queryDesc->plannedstmt->queryId != INT64CONST(0) && compute_query_id != COMPUTE_QUERY_ID_REGRESS) { - /* - * Output the queryid as an int64 rather than a uint64 so we match - * what would be seen in the BIGINT pg_stat_statements.queryid column. - */ - ExplainPropertyInteger("Query Identifier", NULL, (int64) + ExplainPropertyInteger("Query Identifier", NULL, queryDesc->plannedstmt->queryId, es); } } diff --git a/src/backend/commands/foreigncmds.c b/src/backend/commands/foreigncmds.c index c14e038d54f..8d2d7431544 100644 --- a/src/backend/commands/foreigncmds.c +++ b/src/backend/commands/foreigncmds.c @@ -71,15 +71,26 @@ optionListToArray(List *options) foreach(cell, options) { DefElem *def = lfirst(cell); + const char *name; const char *value; Size len; text *t; + name = def->defname; value = defGetString(def); - len = VARHDRSZ + strlen(def->defname) + 1 + strlen(value); + + /* Insist that name not contain "=", else "a=b=c" is ambiguous */ + if (strchr(name, '=') != NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid option name \"%s\": must not contain \"=\"", + name))); + + len = VARHDRSZ + strlen(name) + 1 + strlen(value); + /* +1 leaves room for sprintf's trailing null */ t = palloc(len + 1); SET_VARSIZE(t, len); - sprintf(VARDATA(t), "%s=%s", def->defname, value); + sprintf(VARDATA(t), "%s=%s", name, value); astate = accumArrayResult(astate, PointerGetDatum(t), false, TEXTOID, diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index d962fe392cd..6f753ab6d7a 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -2469,8 +2469,8 @@ GetOperatorFromCompareType(Oid opclass, Oid rhstype, CompareType cmptype, cmptype == COMPARE_EQ ? errmsg("could not identify an equality operator for type %s", format_type_be(opcintype)) : cmptype == COMPARE_OVERLAP ? errmsg("could not identify an overlaps operator for type %s", format_type_be(opcintype)) : cmptype == COMPARE_CONTAINED_BY ? errmsg("could not identify a contained-by operator for type %s", format_type_be(opcintype)) : 0, - errdetail("Could not translate compare type %d for operator family \"%s\", input type %s, access method \"%s\".", - cmptype, get_opfamily_name(opfamily, false), format_type_be(opcintype), get_am_name(amid))); + errdetail("Could not translate compare type %d for operator family \"%s\" of access method \"%s\".", + cmptype, get_opfamily_name(opfamily, false), get_am_name(amid))); /* * We parameterize rhstype so foreign keys can ask for a <@ operator @@ -2592,7 +2592,9 @@ makeObjectName(const char *name1, const char *name2, const char *label) * constraint names.) * * Note: it is theoretically possible to get a collision anyway, if someone - * else chooses the same name concurrently. This is fairly unlikely to be + * else chooses the same name concurrently. We shorten the race condition + * window by checking for conflicting relations using SnapshotDirty, but + * that doesn't close the window entirely. This is fairly unlikely to be * a problem in practice, especially if one is holding an exclusive lock on * the relation identified by name1. However, if choosing multiple names * within a single command, you'd better create the new object and do @@ -2608,15 +2610,45 @@ ChooseRelationName(const char *name1, const char *name2, int pass = 0; char *relname = NULL; char modlabel[NAMEDATALEN]; + SnapshotData SnapshotDirty; + Relation pgclassrel; + + /* prepare to search pg_class with a dirty snapshot */ + InitDirtySnapshot(SnapshotDirty); + pgclassrel = table_open(RelationRelationId, AccessShareLock); /* try the unmodified label first */ strlcpy(modlabel, label, sizeof(modlabel)); for (;;) { + ScanKeyData key[2]; + SysScanDesc scan; + bool collides; + relname = makeObjectName(name1, name2, modlabel); - if (!OidIsValid(get_relname_relid(relname, namespaceid))) + /* is there any conflicting relation name? */ + ScanKeyInit(&key[0], + Anum_pg_class_relname, + BTEqualStrategyNumber, F_NAMEEQ, + CStringGetDatum(relname)); + ScanKeyInit(&key[1], + Anum_pg_class_relnamespace, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(namespaceid)); + + scan = systable_beginscan(pgclassrel, ClassNameNspIndexId, + true /* indexOK */ , + &SnapshotDirty, + 2, key); + + collides = HeapTupleIsValid(systable_getnext(scan)); + + systable_endscan(scan); + + /* break out of loop if no conflict */ + if (!collides) { if (!isconstraint || !ConstraintNameExists(relname, namespaceid)) @@ -2628,6 +2660,8 @@ ChooseRelationName(const char *name1, const char *name2, snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass); } + table_close(pgclassrel, AccessShareLock); + return relname; } @@ -4226,7 +4260,7 @@ ReindexRelationConcurrently(const ReindexStmt *stmt, Oid relationOid, const Rein false); /* - * Updating pg_index might involve TOAST table access, so ensure we + * Swapping the indexes might involve TOAST table access, so ensure we * have a valid snapshot. */ PushActiveSnapshot(GetTransactionSnapshot()); diff --git a/src/backend/commands/matview.c b/src/backend/commands/matview.c index 27c2cb26ef5..188e26f0e6e 100644 --- a/src/backend/commands/matview.c +++ b/src/backend/commands/matview.c @@ -835,7 +835,8 @@ refresh_by_match_merge(Oid matviewOid, Oid tempOid, Oid relowner, if (!foundUniqueIndex) ereport(ERROR, errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("could not find suitable unique index on materialized view")); + errmsg("could not find suitable unique index on materialized view \"%s\"", + RelationGetRelationName(matviewRel))); appendStringInfoString(&querybuf, " AND newdata.* OPERATOR(pg_catalog.*=) mv.*) " diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c index 0b23d94c38e..1bf7eaae5b3 100644 --- a/src/backend/commands/publicationcmds.c +++ b/src/backend/commands/publicationcmds.c @@ -2130,8 +2130,8 @@ defGetGeneratedColsOption(DefElem *def) ereport(ERROR, errcode(ERRCODE_SYNTAX_ERROR), - errmsg("%s requires a \"none\" or \"stored\" value", - def->defname)); + errmsg("invalid value for publication parameter \"%s\": \"%s\"", def->defname, sval), + errdetail("Valid values are \"%s\" and \"%s\".", "none", "stored")); return PUBLISH_GENCOLS_NONE; /* keep compiler quiet */ } diff --git a/src/backend/commands/subscriptioncmds.c b/src/backend/commands/subscriptioncmds.c index 4aec73bcc6b..e23b0de7242 100644 --- a/src/backend/commands/subscriptioncmds.c +++ b/src/backend/commands/subscriptioncmds.c @@ -1267,7 +1267,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, IsSet(opts.specified_opts, SUBOPT_SLOT_NAME)) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("slot_name and two_phase cannot be altered at the same time"))); + errmsg("\"slot_name\" and \"two_phase\" cannot be altered at the same time"))); /* * Note that workers may still survive even if the @@ -1283,7 +1283,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, if (logicalrep_workers_find(subid, true, true)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("cannot alter two_phase when logical replication worker is still running"), + errmsg("cannot alter \"two_phase\" when logical replication worker is still running"), errhint("Try again after some time."))); /* @@ -1297,7 +1297,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, LookupGXactBySubid(subid)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("cannot disable two_phase when prepared transactions are present"), + errmsg("cannot disable \"two_phase\" when prepared transactions exist"), errhint("Resolve these transactions and try again."))); /* Change system catalog accordingly */ @@ -1539,7 +1539,7 @@ AlterSubscription(ParseState *pstate, AlterSubscriptionStmt *stmt, if (!XLogRecPtrIsInvalid(remote_lsn) && opts.lsn < remote_lsn) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("skip WAL location (LSN %X/%X) must be greater than origin LSN %X/%X", + errmsg("skip WAL location (LSN %X/%08X) must be greater than origin LSN %X/%08X", LSN_FORMAT_ARGS(opts.lsn), LSN_FORMAT_ARGS(remote_lsn)))); } diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 54ad38247aa..cb811520c29 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -430,8 +430,8 @@ static void AlterConstrUpdateConstraintEntry(ATAlterConstraint *cmdcon, Relation static ObjectAddress ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName, bool recurse, bool recursing, LOCKMODE lockmode); -static void QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel, - HeapTuple contuple, LOCKMODE lockmode); +static void QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation fkrel, + Oid pkrelid, HeapTuple contuple, LOCKMODE lockmode); static void QueueCheckConstraintValidation(List **wqueue, Relation conrel, Relation rel, char *constrName, HeapTuple contuple, bool recurse, bool recursing, LOCKMODE lockmode); @@ -2711,8 +2711,7 @@ MergeAttributes(List *columns, const List *supers, char relpersistence, RelationGetRelationName(relation)))); /* If existing rel is temp, it must belong to this session */ - if (relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP && - !relation->rd_islocaltemp) + if (RELATION_IS_OTHER_TEMP(relation)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg(!is_partition @@ -7374,7 +7373,7 @@ ATExecAddColumn(List **wqueue, AlteredTableInfo *tab, Relation rel, /* make sure datatype is legal for a column */ CheckAttributeType(NameStr(attribute->attname), attribute->atttypid, attribute->attcollation, list_make1_oid(rel->rd_rel->reltype), - 0); + (attribute->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL ? CHKATYPE_IS_VIRTUAL : 0)); InsertPgAttributeTuples(attrdesc, tupdesc, myrelid, NULL, NULL); @@ -8609,7 +8608,7 @@ ATExecSetExpression(AlteredTableInfo *tab, Relation rel, const char *colName, rel->rd_att->constr && rel->rd_att->constr->num_check > 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns on tables with check constraints"), + errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns in tables with check constraints"), errdetail("Column \"%s\" of relation \"%s\" is a virtual generated column.", colName, RelationGetRelationName(rel)))); @@ -8627,7 +8626,7 @@ ATExecSetExpression(AlteredTableInfo *tab, Relation rel, const char *colName, GetRelationPublications(RelationGetRelid(rel)) != NIL) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns on tables that are part of a publication"), + errmsg("ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns in tables that are part of a publication"), errdetail("Column \"%s\" of relation \"%s\" is a virtual generated column.", colName, RelationGetRelationName(rel)))); @@ -10189,7 +10188,7 @@ ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel, if (pk_has_without_overlaps && !with_period) ereport(ERROR, errcode(ERRCODE_INVALID_FOREIGN_KEY), - errmsg("foreign key must use PERIOD when referencing a primary using WITHOUT OVERLAPS")); + errmsg("foreign key must use PERIOD when referencing a primary key using WITHOUT OVERLAPS")); /* * Now we can check permissions. @@ -10330,8 +10329,8 @@ ATAddForeignKeyConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel, for_overlaps ? errmsg("could not identify an overlaps operator for foreign key") : errmsg("could not identify an equality operator for foreign key"), - errdetail("Could not translate compare type %d for operator family \"%s\", input type %s, access method \"%s\".", - cmptype, get_opfamily_name(opfamily, false), format_type_be(opcintype), get_am_name(amid))); + errdetail("Could not translate compare type %d for operator family \"%s\" of access method \"%s\".", + cmptype, get_opfamily_name(opfamily, false), get_am_name(amid))); /* * There had better be a primary equality operator for the index. @@ -11858,6 +11857,7 @@ AttachPartitionForeignKey(List **wqueue, if (queueValidation) { Relation conrel; + Oid confrelid; conrel = table_open(ConstraintRelationId, RowExclusiveLock); @@ -11865,9 +11865,11 @@ AttachPartitionForeignKey(List **wqueue, if (!HeapTupleIsValid(partcontup)) elog(ERROR, "cache lookup failed for constraint %u", partConstrOid); + confrelid = ((Form_pg_constraint) GETSTRUCT(partcontup))->confrelid; + /* Use the same lock as for AT_ValidateConstraint */ - QueueFKConstraintValidation(wqueue, conrel, partition, partcontup, - ShareUpdateExclusiveLock); + QueueFKConstraintValidation(wqueue, conrel, partition, confrelid, + partcontup, ShareUpdateExclusiveLock); ReleaseSysCache(partcontup); table_close(conrel, RowExclusiveLock); } @@ -12463,9 +12465,12 @@ ATExecAlterConstrEnforceability(List **wqueue, ATAlterConstraint *cmdcon, /* * Tell Phase 3 to check that the constraint is satisfied by existing - * rows. + * rows. Only applies to leaf partitions, and (for constraints that + * reference a partitioned table) only if this is not one of the + * pg_constraint rows that exist solely to support action triggers. */ - if (rel->rd_rel->relkind == RELKIND_RELATION) + if (rel->rd_rel->relkind == RELKIND_RELATION && + currcon->confrelid == pkrelid) { AlteredTableInfo *tab; NewConstraint *newcon; @@ -12907,8 +12912,9 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName, con->contype != CONSTRAINT_NOTNULL) ereport(ERROR, errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("constraint \"%s\" of relation \"%s\" is not a foreign key, check, or not-null constraint", - constrName, RelationGetRelationName(rel))); + errmsg("cannot validate constraint \"%s\" of relation \"%s\"", + constrName, RelationGetRelationName(rel)), + errdetail("This operation is not supported for this type of constraint.")); if (!con->conenforced) ereport(ERROR, @@ -12919,7 +12925,8 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName, { if (con->contype == CONSTRAINT_FOREIGN) { - QueueFKConstraintValidation(wqueue, conrel, rel, tuple, lockmode); + QueueFKConstraintValidation(wqueue, conrel, rel, con->confrelid, + tuple, lockmode); } else if (con->contype == CONSTRAINT_CHECK) { @@ -12952,8 +12959,8 @@ ATExecValidateConstraint(List **wqueue, Relation rel, char *constrName, * for the specified relation and all its children. */ static void -QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel, - HeapTuple contuple, LOCKMODE lockmode) +QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation fkrel, + Oid pkrelid, HeapTuple contuple, LOCKMODE lockmode) { Form_pg_constraint con; AlteredTableInfo *tab; @@ -12964,7 +12971,17 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel, Assert(con->contype == CONSTRAINT_FOREIGN); Assert(!con->convalidated); - if (rel->rd_rel->relkind == RELKIND_RELATION) + /* + * Add the validation to phase 3's queue; not needed for partitioned + * tables themselves, only for their partitions. + * + * When the referenced table (pkrelid) is partitioned, the referencing + * table (fkrel) has one pg_constraint row pointing to each partition + * thereof. These rows are there only to support action triggers and no + * table scan is needed, therefore skip this for them as well. + */ + if (fkrel->rd_rel->relkind == RELKIND_RELATION && + con->confrelid == pkrelid) { NewConstraint *newcon; Constraint *fkconstraint; @@ -12983,15 +13000,16 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel, newcon->qual = (Node *) fkconstraint; /* Find or create work queue entry for this table */ - tab = ATGetQueueEntry(wqueue, rel); + tab = ATGetQueueEntry(wqueue, fkrel); tab->constraints = lappend(tab->constraints, newcon); } /* * If the table at either end of the constraint is partitioned, we need to - * recurse and handle every constraint that is a child of this constraint. + * recurse and handle every unvalidate constraint that is a child of this + * constraint. */ - if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE || + if (fkrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE || get_rel_relkind(con->confrelid) == RELKIND_PARTITIONED_TABLE) { ScanKeyData pkey; @@ -13023,8 +13041,12 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel, childrel = table_open(childcon->conrelid, lockmode); - QueueFKConstraintValidation(wqueue, conrel, childrel, childtup, - lockmode); + /* + * NB: Note that pkrelid should be passed as-is during recursion, + * as it is required to identify the root referenced table. + */ + QueueFKConstraintValidation(wqueue, conrel, childrel, pkrelid, + childtup, lockmode); table_close(childrel, NoLock); } @@ -13032,7 +13054,11 @@ QueueFKConstraintValidation(List **wqueue, Relation conrel, Relation rel, } /* - * Now update the catalog, while we have the door open. + * Now mark the pg_constraint row as validated (even if we didn't check, + * notably the ones for partitions on the referenced side). + * + * We rely on transaction abort to roll back this change if phase 3 + * ultimately finds violating rows. This is a bit ugly. */ copyTuple = heap_copytuple(contuple); copy_con = (Form_pg_constraint) GETSTRUCT(copyTuple); @@ -14400,7 +14426,7 @@ ATPrepAlterColumnType(List **wqueue, /* make sure datatype is legal for a column */ CheckAttributeType(colName, targettype, targetcollid, list_make1_oid(rel->rd_rel->reltype), - 0); + (attTup->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL ? CHKATYPE_IS_VIRTUAL : 0)); if (attTup->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL) { @@ -14458,6 +14484,9 @@ ATPrepAlterColumnType(List **wqueue, /* Fix collations after all else */ assign_expr_collations(pstate, transform); + /* Expand virtual generated columns in the expr. */ + transform = expand_generated_columns_in_expr(transform, rel, 1); + /* Plan the expr now so we can accurately assess the need to rewrite. */ transform = (Node *) expression_planner((Expr *) transform); @@ -15385,9 +15414,12 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode) /* * Re-parse the index and constraint definitions, and attach them to the * appropriate work queue entries. We do this before dropping because in - * the case of a FOREIGN KEY constraint, we might not yet have exclusive - * lock on the table the constraint is attached to, and we need to get - * that before reparsing/dropping. + * the case of a constraint on another table, we might not yet have + * exclusive lock on the table the constraint is attached to, and we need + * to get that before reparsing/dropping. (That's possible at least for + * FOREIGN KEY, CHECK, and EXCLUSION constraints; in non-FK cases it + * requires a dependency on the target table's composite type in the other + * table's constraint expressions.) * * We can't rely on the output of deparsing to tell us which relation to * operate on, because concurrent activity might have made the name @@ -15403,7 +15435,6 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode) Form_pg_constraint con; Oid relid; Oid confrelid; - char contype; bool conislocal; tup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(oldId)); @@ -15420,7 +15451,6 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode) elog(ERROR, "could not identify relation associated with constraint %u", oldId); } confrelid = con->confrelid; - contype = con->contype; conislocal = con->conislocal; ReleaseSysCache(tup); @@ -15438,12 +15468,12 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode) continue; /* - * When rebuilding an FK constraint that references the table we're - * modifying, we might not yet have any lock on the FK's table, so get - * one now. We'll need AccessExclusiveLock for the DROP CONSTRAINT - * step, so there's no value in asking for anything weaker. + * When rebuilding another table's constraint that references the + * table we're modifying, we might not yet have any lock on the other + * table, so get one now. We'll need AccessExclusiveLock for the DROP + * CONSTRAINT step, so there's no value in asking for anything weaker. */ - if (relid != tab->relid && contype == CONSTRAINT_FOREIGN) + if (relid != tab->relid) LockRelationOid(relid, AccessExclusiveLock); ATPostAlterTypeParse(oldId, relid, confrelid, @@ -15457,6 +15487,14 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode) Oid relid; relid = IndexGetRelation(oldId, false); + + /* + * As above, make sure we have lock on the index's table if it's not + * the same table. + */ + if (relid != tab->relid) + LockRelationOid(relid, AccessExclusiveLock); + ATPostAlterTypeParse(oldId, relid, InvalidOid, (char *) lfirst(def_item), wqueue, lockmode, tab->rewrite); @@ -15473,6 +15511,20 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode) Oid relid; relid = StatisticsGetRelation(oldId, false); + + /* + * As above, make sure we have lock on the statistics object's table + * if it's not the same table. However, we take + * ShareUpdateExclusiveLock here, aligning with the lock level used in + * CreateStatistics and RemoveStatisticsById. + * + * CAUTION: this should be done after all cases that grab + * AccessExclusiveLock, else we risk causing deadlock due to needing + * to promote our table lock. + */ + if (relid != tab->relid) + LockRelationOid(relid, ShareUpdateExclusiveLock); + ATPostAlterTypeParse(oldId, relid, InvalidOid, (char *) lfirst(def_item), wqueue, lockmode, tab->rewrite); @@ -15696,7 +15748,7 @@ ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd, { AlterDomainStmt *stmt = (AlterDomainStmt *) stm; - if (stmt->subtype == 'C') /* ADD CONSTRAINT */ + if (stmt->subtype == AD_AddConstraint) { Constraint *con = castNode(Constraint, stmt->def); AlterTableCmd *cmd = makeNode(AlterTableCmd); @@ -17199,15 +17251,13 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode) RelationGetRelationName(parent_rel)))); /* If parent rel is temp, it must belong to this session */ - if (parent_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && - !parent_rel->rd_islocaltemp) + if (RELATION_IS_OTHER_TEMP(parent_rel)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot inherit from temporary relation of another session"))); /* Ditto for the child */ - if (child_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && - !child_rel->rd_islocaltemp) + if (RELATION_IS_OTHER_TEMP(child_rel)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot inherit to temporary relation of another session"))); @@ -20278,15 +20328,13 @@ ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd, RelationGetRelationName(rel)))); /* If the parent is temp, it must belong to this session */ - if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && - !rel->rd_islocaltemp) + if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot attach as partition of temporary relation of another session"))); /* Ditto for the partition */ - if (attachrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && - !attachrel->rd_islocaltemp) + if (RELATION_IS_OTHER_TEMP(attachrel)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot attach temporary relation of another session as partition"))); @@ -20964,9 +21012,17 @@ ATExecDetachPartition(List **wqueue, AlteredTableInfo *tab, Relation rel, tab->rel = rel; } + /* + * Detaching the partition might involve TOAST table access, so ensure we + * have a valid snapshot. + */ + PushActiveSnapshot(GetTransactionSnapshot()); + /* Do the final part of detaching */ DetachPartitionFinalize(rel, partRel, concurrent, defaultPartOid); + PopActiveSnapshot(); + ObjectAddressSet(address, RelationRelationId, RelationGetRelid(partRel)); /* keep our lock until commit */ diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index 45ae7472ab5..26d985193ae 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -939,11 +939,19 @@ DefineDomain(ParseState *pstate, CreateDomainStmt *stmt) break; case CONSTR_NOTNULL: - if (nullDefined && !typNotNull) + if (nullDefined) + { + if (!typNotNull) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting NULL/NOT NULL constraints"), + parser_errposition(pstate, constr->location)); + ereport(ERROR, - errcode(ERRCODE_SYNTAX_ERROR), - errmsg("conflicting NULL/NOT NULL constraints"), + errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("redundant NOT NULL constraint definition"), parser_errposition(pstate, constr->location)); + } if (constr->is_no_inherit) ereport(ERROR, errcode(ERRCODE_INVALID_OBJECT_DEFINITION), diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 33a33bf6b1c..733ef40ae7c 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -56,6 +56,7 @@ #include "utils/fmgroids.h" #include "utils/guc.h" #include "utils/guc_hooks.h" +#include "utils/injection_point.h" #include "utils/memutils.h" #include "utils/snapmgr.h" #include "utils/syscache.h" @@ -123,7 +124,7 @@ static void vac_truncate_clog(TransactionId frozenXID, MultiXactId minMulti, TransactionId lastSaneFrozenXid, MultiXactId lastSaneMinMulti); -static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, +static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params, BufferAccessStrategy bstrategy); static double compute_parallel_delay(void); static VacOptValue get_vacoptval_from_boolean(DefElem *def); @@ -464,7 +465,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel) } /* Now go through the common routine */ - vacuum(vacstmt->rels, ¶ms, bstrategy, vac_context, isTopLevel); + vacuum(vacstmt->rels, params, bstrategy, vac_context, isTopLevel); /* Finally, clean up the vacuum memory context */ MemoryContextDelete(vac_context); @@ -493,7 +494,7 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel) * memory context that will not disappear at transaction commit. */ void -vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, +vacuum(List *relations, const VacuumParams params, BufferAccessStrategy bstrategy, MemoryContext vac_context, bool isTopLevel) { static bool in_vacuum = false; @@ -502,9 +503,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, volatile bool in_outer_xact, use_own_xacts; - Assert(params != NULL); - - stmttype = (params->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE"; + stmttype = (params.options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE"; /* * We cannot run VACUUM inside a user transaction block; if we were inside @@ -514,7 +513,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, * * ANALYZE (without VACUUM) can run either way. */ - if (params->options & VACOPT_VACUUM) + if (params.options & VACOPT_VACUUM) { PreventInTransactionBlock(isTopLevel, stmttype); in_outer_xact = false; @@ -537,7 +536,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, * Build list of relation(s) to process, putting any new data in * vac_context for safekeeping. */ - if (params->options & VACOPT_ONLY_DATABASE_STATS) + if (params.options & VACOPT_ONLY_DATABASE_STATS) { /* We don't process any tables in this case */ Assert(relations == NIL); @@ -553,7 +552,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, List *sublist; MemoryContext old_context; - sublist = expand_vacuum_rel(vrel, vac_context, params->options); + sublist = expand_vacuum_rel(vrel, vac_context, params.options); old_context = MemoryContextSwitchTo(vac_context); newrels = list_concat(newrels, sublist); MemoryContextSwitchTo(old_context); @@ -561,7 +560,7 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, relations = newrels; } else - relations = get_all_vacuum_rels(vac_context, params->options); + relations = get_all_vacuum_rels(vac_context, params.options); /* * Decide whether we need to start/commit our own transactions. @@ -577,11 +576,11 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, * transaction block, and also in an autovacuum worker, use own * transactions so we can release locks sooner. */ - if (params->options & VACOPT_VACUUM) + if (params.options & VACOPT_VACUUM) use_own_xacts = true; else { - Assert(params->options & VACOPT_ANALYZE); + Assert(params.options & VACOPT_ANALYZE); if (AmAutoVacuumWorkerProcess()) use_own_xacts = true; else if (in_outer_xact) @@ -632,13 +631,13 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, { VacuumRelation *vrel = lfirst_node(VacuumRelation, cur); - if (params->options & VACOPT_VACUUM) + if (params.options & VACOPT_VACUUM) { if (!vacuum_rel(vrel->oid, vrel->relation, params, bstrategy)) continue; } - if (params->options & VACOPT_ANALYZE) + if (params.options & VACOPT_ANALYZE) { /* * If using separate xacts, start one for analyze. Otherwise, @@ -702,8 +701,8 @@ vacuum(List *relations, VacuumParams *params, BufferAccessStrategy bstrategy, StartTransactionCommand(); } - if ((params->options & VACOPT_VACUUM) && - !(params->options & VACOPT_SKIP_DATABASE_STATS)) + if ((params.options & VACOPT_VACUUM) && + !(params.options & VACOPT_SKIP_DATABASE_STATS)) { /* * Update pg_database.datfrozenxid, and truncate pg_xact if possible. @@ -1101,7 +1100,7 @@ get_all_vacuum_rels(MemoryContext vac_context, int options) * minimum). */ bool -vacuum_get_cutoffs(Relation rel, const VacuumParams *params, +vacuum_get_cutoffs(Relation rel, const VacuumParams params, struct VacuumCutoffs *cutoffs) { int freeze_min_age, @@ -1117,10 +1116,10 @@ vacuum_get_cutoffs(Relation rel, const VacuumParams *params, aggressiveMXIDCutoff; /* Use mutable copies of freeze age parameters */ - freeze_min_age = params->freeze_min_age; - multixact_freeze_min_age = params->multixact_freeze_min_age; - freeze_table_age = params->freeze_table_age; - multixact_freeze_table_age = params->multixact_freeze_table_age; + freeze_min_age = params.freeze_min_age; + multixact_freeze_min_age = params.multixact_freeze_min_age; + freeze_table_age = params.freeze_table_age; + multixact_freeze_table_age = params.multixact_freeze_table_age; /* Set pg_class fields in cutoffs */ cutoffs->relfrozenxid = rel->rd_rel->relfrozenxid; @@ -1997,7 +1996,7 @@ vac_truncate_clog(TransactionId frozenXID, * At entry and exit, we are not inside a transaction. */ static bool -vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, +vacuum_rel(Oid relid, RangeVar *relation, VacuumParams params, BufferAccessStrategy bstrategy) { LOCKMODE lmode; @@ -2008,13 +2007,18 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, Oid save_userid; int save_sec_context; int save_nestlevel; + VacuumParams toast_vacuum_params; - Assert(params != NULL); + /* + * This function scribbles on the parameters, so make a copy early to + * avoid affecting the TOAST table (if we do end up recursing to it). + */ + memcpy(&toast_vacuum_params, ¶ms, sizeof(VacuumParams)); /* Begin a transaction for vacuuming this relation */ StartTransactionCommand(); - if (!(params->options & VACOPT_FULL)) + if (!(params.options & VACOPT_FULL)) { /* * In lazy vacuum, we can set the PROC_IN_VACUUM flag, which lets @@ -2040,7 +2044,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, */ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); MyProc->statusFlags |= PROC_IN_VACUUM; - if (params->is_wraparound) + if (params.is_wraparound) MyProc->statusFlags |= PROC_VACUUM_FOR_WRAPAROUND; ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags; LWLockRelease(ProcArrayLock); @@ -2064,12 +2068,12 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, * vacuum, but just ShareUpdateExclusiveLock for concurrent vacuum. Either * way, we can be sure that no other backend is vacuuming the same table. */ - lmode = (params->options & VACOPT_FULL) ? + lmode = (params.options & VACOPT_FULL) ? AccessExclusiveLock : ShareUpdateExclusiveLock; /* open the relation and get the appropriate lock on it */ - rel = vacuum_open_relation(relid, relation, params->options, - params->log_min_duration >= 0, lmode); + rel = vacuum_open_relation(relid, relation, params.options, + params.log_min_duration >= 0, lmode); /* leave if relation could not be opened or locked */ if (!rel) @@ -2084,8 +2088,8 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, * This is only safe to do because we hold a session lock on the main * relation that prevents concurrent deletion. */ - if (OidIsValid(params->toast_parent)) - priv_relid = params->toast_parent; + if (OidIsValid(params.toast_parent)) + priv_relid = params.toast_parent; else priv_relid = RelationGetRelid(rel); @@ -2098,7 +2102,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, */ if (!vacuum_is_permitted_for_relation(priv_relid, rel->rd_rel, - params->options & ~VACOPT_ANALYZE)) + params.options & ~VACOPT_ANALYZE)) { relation_close(rel, lmode); PopActiveSnapshot(); @@ -2169,7 +2173,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, * Set index_cleanup option based on index_cleanup reloption if it wasn't * specified in VACUUM command, or when running in an autovacuum worker */ - if (params->index_cleanup == VACOPTVALUE_UNSPECIFIED) + if (params.index_cleanup == VACOPTVALUE_UNSPECIFIED) { StdRdOptIndexCleanup vacuum_index_cleanup; @@ -2180,56 +2184,74 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, ((StdRdOptions *) rel->rd_options)->vacuum_index_cleanup; if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_AUTO) - params->index_cleanup = VACOPTVALUE_AUTO; + params.index_cleanup = VACOPTVALUE_AUTO; else if (vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_ON) - params->index_cleanup = VACOPTVALUE_ENABLED; + params.index_cleanup = VACOPTVALUE_ENABLED; else { Assert(vacuum_index_cleanup == STDRD_OPTION_VACUUM_INDEX_CLEANUP_OFF); - params->index_cleanup = VACOPTVALUE_DISABLED; + params.index_cleanup = VACOPTVALUE_DISABLED; } } +#ifdef USE_INJECTION_POINTS + if (params.index_cleanup == VACOPTVALUE_AUTO) + INJECTION_POINT("vacuum-index-cleanup-auto", NULL); + else if (params.index_cleanup == VACOPTVALUE_DISABLED) + INJECTION_POINT("vacuum-index-cleanup-disabled", NULL); + else if (params.index_cleanup == VACOPTVALUE_ENABLED) + INJECTION_POINT("vacuum-index-cleanup-enabled", NULL); +#endif + /* * Check if the vacuum_max_eager_freeze_failure_rate table storage * parameter was specified. This overrides the GUC value. */ if (rel->rd_options != NULL && ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate >= 0) - params->max_eager_freeze_failure_rate = + params.max_eager_freeze_failure_rate = ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate; /* * Set truncate option based on truncate reloption or GUC if it wasn't * specified in VACUUM command, or when running in an autovacuum worker */ - if (params->truncate == VACOPTVALUE_UNSPECIFIED) + if (params.truncate == VACOPTVALUE_UNSPECIFIED) { StdRdOptions *opts = (StdRdOptions *) rel->rd_options; if (opts && opts->vacuum_truncate_set) { if (opts->vacuum_truncate) - params->truncate = VACOPTVALUE_ENABLED; + params.truncate = VACOPTVALUE_ENABLED; else - params->truncate = VACOPTVALUE_DISABLED; + params.truncate = VACOPTVALUE_DISABLED; } else if (vacuum_truncate) - params->truncate = VACOPTVALUE_ENABLED; + params.truncate = VACOPTVALUE_ENABLED; else - params->truncate = VACOPTVALUE_DISABLED; + params.truncate = VACOPTVALUE_DISABLED; } +#ifdef USE_INJECTION_POINTS + if (params.truncate == VACOPTVALUE_AUTO) + INJECTION_POINT("vacuum-truncate-auto", NULL); + else if (params.truncate == VACOPTVALUE_DISABLED) + INJECTION_POINT("vacuum-truncate-disabled", NULL); + else if (params.truncate == VACOPTVALUE_ENABLED) + INJECTION_POINT("vacuum-truncate-enabled", NULL); +#endif + /* * Remember the relation's TOAST relation for later, if the caller asked * us to process it. In VACUUM FULL, though, the toast table is * automatically rebuilt by cluster_rel so we shouldn't recurse to it, * unless PROCESS_MAIN is disabled. */ - if ((params->options & VACOPT_PROCESS_TOAST) != 0 && - ((params->options & VACOPT_FULL) == 0 || - (params->options & VACOPT_PROCESS_MAIN) == 0)) + if ((params.options & VACOPT_PROCESS_TOAST) != 0 && + ((params.options & VACOPT_FULL) == 0 || + (params.options & VACOPT_PROCESS_MAIN) == 0)) toast_relid = rel->rd_rel->reltoastrelid; else toast_relid = InvalidOid; @@ -2252,16 +2274,16 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, * table is required (e.g., PROCESS_TOAST is set), we force PROCESS_MAIN * to be set when we recurse to the TOAST table. */ - if (params->options & VACOPT_PROCESS_MAIN) + if (params.options & VACOPT_PROCESS_MAIN) { /* * Do the actual work --- either FULL or "lazy" vacuum */ - if (params->options & VACOPT_FULL) + if (params.options & VACOPT_FULL) { ClusterParams cluster_params = {0}; - if ((params->options & VACOPT_VERBOSE) != 0) + if ((params.options & VACOPT_VERBOSE) != 0) cluster_params.options |= CLUOPT_VERBOSE; /* VACUUM FULL is now a variant of CLUSTER; see cluster.c */ @@ -2299,19 +2321,16 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params, */ if (toast_relid != InvalidOid) { - VacuumParams toast_vacuum_params; - /* * Force VACOPT_PROCESS_MAIN so vacuum_rel() processes it. Likewise, * set toast_parent so that the privilege checks are done on the main * relation. NB: This is only safe to do because we hold a session * lock on the main relation that prevents concurrent deletion. */ - memcpy(&toast_vacuum_params, params, sizeof(VacuumParams)); toast_vacuum_params.options |= VACOPT_PROCESS_MAIN; toast_vacuum_params.toast_parent = relid; - vacuum_rel(toast_relid, NULL, &toast_vacuum_params, bstrategy); + vacuum_rel(toast_relid, NULL, toast_vacuum_params, bstrategy); } /* diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c index 2b9d548cdeb..0feea1d30ec 100644 --- a/src/backend/commands/vacuumparallel.c +++ b/src/backend/commands/vacuumparallel.c @@ -63,7 +63,7 @@ typedef struct PVShared */ Oid relid; int elevel; - uint64 queryid; + int64 queryid; /* * Fields for both index vacuum and cleanup. diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 255bd795361..b5400749353 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -144,7 +144,7 @@ execTuplesHashPrepare(int numCols, * hashfunctions: FmgrInfos of datatype-specific hashing functions to use * collations: collations to use in comparisons * nbuckets: initial estimate of hashtable size - * additionalsize: size of data stored in ->additional + * additionalsize: size of data that may be stored along with the hash entry * metacxt: memory context for long-lived allocation, but not per-entry data * tablecxt: memory context in which to store table entries * tempcxt: short-lived context for evaluation hash and comparison functions @@ -288,7 +288,7 @@ ResetTupleHashTable(TupleHashTable hashtable) * * If isnew isn't NULL, then a new entry is created if no existing entry * matches. On return, *isnew is true if the entry is newly created, - * false if it existed already. ->additional_data in the new entry has + * false if it existed already. The additional data in the new entry has * been zeroed. */ TupleHashEntry diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index bdf862b2406..ca33a854278 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -279,7 +279,7 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) * executor is performing an UPDATE that could not use an * optimization like heapam's HOT (in more general terms a * call to table_tuple_update() took place and set - * 'update_indexes' to TUUI_All). Receiving this hint makes + * 'update_indexes' to TU_All). Receiving this hint makes * us consider if we should pass down the 'indexUnchanged' * hint in turn. That's something that we figure out for * each index_insert() call iff 'update' is true. @@ -290,7 +290,7 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) * HOT has been applied and any updated columns are indexed * only by summarizing indexes (or in more general terms a * call to table_tuple_update() took place and set - * 'update_indexes' to TUUI_Summarizing). We can (and must) + * 'update_indexes' to TU_Summarizing). We can (and must) * therefore only update the indexes that have * 'amsummarizing' = true. * diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 2bc89bf84dc..54da8e7995b 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -64,6 +64,7 @@ #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" #include "rewrite/rewriteHandler.h" +#include "rewrite/rewriteManip.h" #include "storage/lmgr.h" #include "utils/builtins.h" #include "utils/datum.h" @@ -3735,6 +3736,7 @@ ExecInitMerge(ModifyTableState *mtstate, EState *estate) switch (action->commandType) { case CMD_INSERT: + /* INSERT actions always use rootRelInfo */ ExecCheckPlanOutput(rootRelInfo->ri_RelationDesc, action->targetList); @@ -3774,9 +3776,23 @@ ExecInitMerge(ModifyTableState *mtstate, EState *estate) } else { - /* not partitioned? use the stock relation and slot */ - tgtslot = resultRelInfo->ri_newTupleSlot; - tgtdesc = RelationGetDescr(resultRelInfo->ri_RelationDesc); + /* + * If the MERGE targets an inherited table, we insert + * into the root table, so we must initialize its + * "new" tuple slot, if not already done, and use its + * relation descriptor for the projection. + * + * For non-inherited tables, rootRelInfo and + * resultRelInfo are the same, and the "new" tuple + * slot will already have been initialized. + */ + if (rootRelInfo->ri_newTupleSlot == NULL) + rootRelInfo->ri_newTupleSlot = + table_slot_create(rootRelInfo->ri_RelationDesc, + &estate->es_tupleTable); + + tgtslot = rootRelInfo->ri_newTupleSlot; + tgtdesc = RelationGetDescr(rootRelInfo->ri_RelationDesc); } action_state->mas_proj = @@ -3809,6 +3825,114 @@ ExecInitMerge(ModifyTableState *mtstate, EState *estate) } } } + + /* + * If the MERGE targets an inherited table, any INSERT actions will use + * rootRelInfo, and rootRelInfo will not be in the resultRelInfo array. + * Therefore we must initialize its WITH CHECK OPTION constraints and + * RETURNING projection, as ExecInitModifyTable did for the resultRelInfo + * entries. + * + * Note that the planner does not build a withCheckOptionList or + * returningList for the root relation, but as in ExecInitPartitionInfo, + * we can use the first resultRelInfo entry as a reference to calculate + * the attno's for the root table. + */ + if (rootRelInfo != mtstate->resultRelInfo && + rootRelInfo->ri_RelationDesc->rd_rel->relkind != RELKIND_PARTITIONED_TABLE && + (mtstate->mt_merge_subcommands & MERGE_INSERT) != 0) + { + ModifyTable *node = (ModifyTable *) mtstate->ps.plan; + Relation rootRelation = rootRelInfo->ri_RelationDesc; + Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc; + int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex; + AttrMap *part_attmap = NULL; + bool found_whole_row; + + if (node->withCheckOptionLists != NIL) + { + List *wcoList; + List *wcoExprs = NIL; + + /* There should be as many WCO lists as result rels */ + Assert(list_length(node->withCheckOptionLists) == + list_length(node->resultRelations)); + + /* + * Use the first WCO list as a reference. In the most common case, + * this will be for the same relation as rootRelInfo, and so there + * will be no need to adjust its attno's. + */ + wcoList = linitial(node->withCheckOptionLists); + if (rootRelation != firstResultRel) + { + /* Convert any Vars in it to contain the root's attno's */ + part_attmap = + build_attrmap_by_name(RelationGetDescr(rootRelation), + RelationGetDescr(firstResultRel), + false); + + wcoList = (List *) + map_variable_attnos((Node *) wcoList, + firstVarno, 0, + part_attmap, + RelationGetForm(rootRelation)->reltype, + &found_whole_row); + } + + foreach(lc, wcoList) + { + WithCheckOption *wco = lfirst_node(WithCheckOption, lc); + ExprState *wcoExpr = ExecInitQual(castNode(List, wco->qual), + &mtstate->ps); + + wcoExprs = lappend(wcoExprs, wcoExpr); + } + + rootRelInfo->ri_WithCheckOptions = wcoList; + rootRelInfo->ri_WithCheckOptionExprs = wcoExprs; + } + + if (node->returningLists != NIL) + { + List *returningList; + + /* There should be as many returning lists as result rels */ + Assert(list_length(node->returningLists) == + list_length(node->resultRelations)); + + /* + * Use the first returning list as a reference. In the most common + * case, this will be for the same relation as rootRelInfo, and so + * there will be no need to adjust its attno's. + */ + returningList = linitial(node->returningLists); + if (rootRelation != firstResultRel) + { + /* Convert any Vars in it to contain the root's attno's */ + if (part_attmap == NULL) + part_attmap = + build_attrmap_by_name(RelationGetDescr(rootRelation), + RelationGetDescr(firstResultRel), + false); + + returningList = (List *) + map_variable_attnos((Node *) returningList, + firstVarno, 0, + part_attmap, + RelationGetForm(rootRelation)->reltype, + &found_whole_row); + } + rootRelInfo->ri_returningList = returningList; + + /* Initialize the RETURNING projection */ + rootRelInfo->ri_projectReturning = + ExecBuildProjectionInfo(returningList, econtext, + mtstate->ps.ps_ResultTupleSlot, + &mtstate->ps, + RelationGetDescr(rootRelation)); + } + } } /* diff --git a/src/backend/executor/nodeTidrangescan.c b/src/backend/executor/nodeTidrangescan.c index ab2eab9596e..26f7420b64b 100644 --- a/src/backend/executor/nodeTidrangescan.c +++ b/src/backend/executor/nodeTidrangescan.c @@ -128,9 +128,11 @@ TidExprListCreate(TidRangeScanState *tidrangestate) * TidRangeEval * * Compute and set node's block and offset range to scan by evaluating - * the trss_tidexprs. Returns false if we detect the range cannot + * node->trss_tidexprs. Returns false if we detect the range cannot * contain any tuples. Returns true if it's possible for the range to - * contain tuples. + * contain tuples. We don't bother validating that trss_mintid is less + * than or equal to trss_maxtid, as the scan_set_tidrange() table AM + * function will handle that. * ---------------------------------------------------------------- */ static bool diff --git a/src/backend/jit/README b/src/backend/jit/README index 5427bdf2153..a40950dfb03 100644 --- a/src/backend/jit/README +++ b/src/backend/jit/README @@ -205,7 +205,7 @@ The ability to do so allows us to get the LLVM IR for all operators bitcode files get installed into the server's $pkglibdir/bitcode/postgres/ Using existing LLVM functionality (for parallel LTO compilation), -additionally an index is over these is stored to +additionally an index over these is stored to $pkglibdir/bitcode/postgres.index.bc Similarly extensions can install code into diff --git a/src/backend/jit/llvm/meson.build b/src/backend/jit/llvm/meson.build index c8e06dfbe35..805fbd69006 100644 --- a/src/backend/jit/llvm/meson.build +++ b/src/backend/jit/llvm/meson.build @@ -53,7 +53,7 @@ llvm_irgen_args = [ if ccache.found() llvm_irgen_command = ccache - llvm_irgen_args = [clang.path()] + llvm_irgen_args + llvm_irgen_args = [clang.full_path()] + llvm_irgen_args else llvm_irgen_command = clang endif diff --git a/src/backend/lib/README b/src/backend/lib/README index f2fb591237d..c28cbe356f0 100644 --- a/src/backend/lib/README +++ b/src/backend/lib/README @@ -1,8 +1,6 @@ This directory contains a general purpose data structures, for use anywhere in the backend: -binaryheap.c - a binary heap - bipartite_match.c - Hopcroft-Karp maximum cardinality algorithm for bipartite graphs bloomfilter.c - probabilistic, space-efficient set membership testing @@ -21,8 +19,6 @@ pairingheap.c - a pairing heap rbtree.c - a red-black tree -stringinfo.c - an extensible string type - Aside from the inherent characteristics of the data structures, there are a few practical differences between the binary heap and the pairing heap. The diff --git a/src/backend/libpq/be-secure-gssapi.c b/src/backend/libpq/be-secure-gssapi.c index 717ba9824f9..5d98c58ffa8 100644 --- a/src/backend/libpq/be-secure-gssapi.c +++ b/src/backend/libpq/be-secure-gssapi.c @@ -46,11 +46,18 @@ * don't want the other side to send arbitrarily huge packets as we * would have to allocate memory for them to then pass them to GSSAPI. * - * Therefore, these two #define's are effectively part of the protocol + * Therefore, this #define is effectively part of the protocol * spec and can't ever be changed. */ -#define PQ_GSS_SEND_BUFFER_SIZE 16384 -#define PQ_GSS_RECV_BUFFER_SIZE 16384 +#define PQ_GSS_MAX_PACKET_SIZE 16384 /* includes uint32 header word */ + +/* + * However, during the authentication exchange we must cope with whatever + * message size the GSSAPI library wants to send (because our protocol + * doesn't support splitting those messages). Depending on configuration + * those messages might be as much as 64kB. + */ +#define PQ_GSS_AUTH_BUFFER_SIZE 65536 /* includes uint32 header word */ /* * Since we manage at most one GSS-encrypted connection per backend, @@ -114,9 +121,9 @@ be_gssapi_write(Port *port, const void *ptr, size_t len) * again, so if it offers a len less than that, something is wrong. * * Note: it may seem attractive to report partial write completion once - * we've successfully sent any encrypted packets. However, that can cause - * problems for callers; notably, pqPutMsgEnd's heuristic to send only - * full 8K blocks interacts badly with such a hack. We won't save much, + * we've successfully sent any encrypted packets. However, doing that + * expands the state space of this processing and has been responsible for + * bugs in the past (cf. commit d053a879b). We won't save much, * typically, by letting callers discard data early, so don't risk it. */ if (len < PqGSSSendConsumed) @@ -210,12 +217,12 @@ be_gssapi_write(Port *port, const void *ptr, size_t len) errno = ECONNRESET; return -1; } - if (output.length > PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32)) + if (output.length > PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32)) { ereport(COMMERROR, (errmsg("server tried to send oversize GSSAPI packet (%zu > %zu)", (size_t) output.length, - PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32)))); + PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32)))); errno = ECONNRESET; return -1; } @@ -346,12 +353,12 @@ be_gssapi_read(Port *port, void *ptr, size_t len) /* Decode the packet length and check for overlength packet */ input.length = pg_ntoh32(*(uint32 *) PqGSSRecvBuffer); - if (input.length > PQ_GSS_RECV_BUFFER_SIZE - sizeof(uint32)) + if (input.length > PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32)) { ereport(COMMERROR, (errmsg("oversize GSSAPI packet sent by the client (%zu > %zu)", (size_t) input.length, - PQ_GSS_RECV_BUFFER_SIZE - sizeof(uint32)))); + PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32)))); errno = ECONNRESET; return -1; } @@ -517,10 +524,13 @@ secure_open_gssapi(Port *port) * that will never use them, and we ensure that the buffers are * sufficiently aligned for the length-word accesses that we do in some * places in this file. + * + * We'll use PQ_GSS_AUTH_BUFFER_SIZE-sized buffers until transport + * negotiation is complete, then switch to PQ_GSS_MAX_PACKET_SIZE. */ - PqGSSSendBuffer = malloc(PQ_GSS_SEND_BUFFER_SIZE); - PqGSSRecvBuffer = malloc(PQ_GSS_RECV_BUFFER_SIZE); - PqGSSResultBuffer = malloc(PQ_GSS_RECV_BUFFER_SIZE); + PqGSSSendBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE); + PqGSSRecvBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE); + PqGSSResultBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE); if (!PqGSSSendBuffer || !PqGSSRecvBuffer || !PqGSSResultBuffer) ereport(FATAL, (errcode(ERRCODE_OUT_OF_MEMORY), @@ -568,16 +578,16 @@ secure_open_gssapi(Port *port) /* * During initialization, packets are always fully consumed and - * shouldn't ever be over PQ_GSS_RECV_BUFFER_SIZE in length. + * shouldn't ever be over PQ_GSS_AUTH_BUFFER_SIZE in total length. * * Verify on our side that the client doesn't do something funny. */ - if (input.length > PQ_GSS_RECV_BUFFER_SIZE) + if (input.length > PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32)) { ereport(COMMERROR, - (errmsg("oversize GSSAPI packet sent by the client (%zu > %d)", + (errmsg("oversize GSSAPI packet sent by the client (%zu > %zu)", (size_t) input.length, - PQ_GSS_RECV_BUFFER_SIZE))); + PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32)))); return -1; } @@ -631,12 +641,12 @@ secure_open_gssapi(Port *port) { uint32 netlen = pg_hton32(output.length); - if (output.length > PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32)) + if (output.length > PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32)) { ereport(COMMERROR, (errmsg("server tried to send oversize GSSAPI packet (%zu > %zu)", (size_t) output.length, - PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32)))); + PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32)))); gss_release_buffer(&minor, &output); return -1; } @@ -692,11 +702,28 @@ secure_open_gssapi(Port *port) } /* + * Release the large authentication buffers and allocate the ones we want + * for normal operation. + */ + free(PqGSSSendBuffer); + free(PqGSSRecvBuffer); + free(PqGSSResultBuffer); + PqGSSSendBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE); + PqGSSRecvBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE); + PqGSSResultBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE); + if (!PqGSSSendBuffer || !PqGSSRecvBuffer || !PqGSSResultBuffer) + ereport(FATAL, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + PqGSSSendLength = PqGSSSendNext = PqGSSSendConsumed = 0; + PqGSSRecvLength = PqGSSResultLength = PqGSSResultNext = 0; + + /* * Determine the max packet size which will fit in our buffer, after * accounting for the length. be_gssapi_write will need this. */ major = gss_wrap_size_limit(&minor, port->gss->ctx, 1, GSS_C_QOP_DEFAULT, - PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32), + PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32), &PqGSSMaxPktSize); if (GSS_ERROR(major)) diff --git a/src/backend/libpq/be-secure-openssl.c b/src/backend/libpq/be-secure-openssl.c index 64ff3ce3d6a..c8b63ef8249 100644 --- a/src/backend/libpq/be-secure-openssl.c +++ b/src/backend/libpq/be-secure-openssl.c @@ -1436,10 +1436,10 @@ initialize_ecdh(SSL_CTX *context, bool isServerStart) */ ereport(isServerStart ? FATAL : LOG, errcode(ERRCODE_CONFIG_FILE_ERROR), - errmsg("failed to set group names specified in ssl_groups: %s", + errmsg("could not set group names specified in ssl_groups: %s", SSLerrmessageExt(ERR_get_error(), _("No valid groups found"))), - errhint("Ensure that each group name is spelled correctly and supported by the installed version of OpenSSL")); + errhint("Ensure that each group name is spelled correctly and supported by the installed version of OpenSSL.")); return false; } #endif diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl index 77659b0f760..9ecddb14231 100644 --- a/src/backend/nodes/gen_node_support.pl +++ b/src/backend/nodes/gen_node_support.pl @@ -1039,6 +1039,11 @@ _read${n}(void) print $off "\tWRITE_UINT_FIELD($f);\n"; print $rff "\tREAD_UINT_FIELD($f);\n" unless $no_read; } + elsif ($t eq 'int64') + { + print $off "\tWRITE_INT64_FIELD($f);\n"; + print $rff "\tREAD_INT64_FIELD($f);\n" unless $no_read; + } elsif ($t eq 'uint64' || $t eq 'AclMode') { @@ -1324,7 +1329,7 @@ _jumble${n}(JumbleState *jstate, Node *node) # Node type. Squash constants if requested. if ($query_jumble_squash) { - print $jff "\tJUMBLE_ELEMENTS($f);\n" + print $jff "\tJUMBLE_ELEMENTS($f, node);\n" unless $query_jumble_ignore; } else diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index ceac3fd8620..eaf391fc2ab 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -51,6 +51,12 @@ static void outDouble(StringInfo str, double d); #define WRITE_UINT_FIELD(fldname) \ appendStringInfo(str, " :" CppAsString(fldname) " %u", node->fldname) +/* Write a signed integer field (anything written with INT64_FORMAT) */ +#define WRITE_INT64_FIELD(fldname) \ + appendStringInfo(str, \ + " :" CppAsString(fldname) " " INT64_FORMAT, \ + node->fldname) + /* Write an unsigned integer field (anything written with UINT64_FORMAT) */ #define WRITE_UINT64_FIELD(fldname) \ appendStringInfo(str, " :" CppAsString(fldname) " " UINT64_FORMAT, \ @@ -647,6 +653,8 @@ _outA_Expr(StringInfo str, const A_Expr *node) WRITE_NODE_FIELD(lexpr); WRITE_NODE_FIELD(rexpr); + WRITE_LOCATION_FIELD(rexpr_list_start); + WRITE_LOCATION_FIELD(rexpr_list_end); WRITE_LOCATION_FIELD(location); } diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index d1e82a63f09..31f97151977 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -21,6 +21,11 @@ * tree(s) generated from the query. The executor can then use this value * to blame query costs on the proper queryId. * + * Arrays of two or more constants and PARAM_EXTERN parameters are "squashed" + * and contribute only once to the jumble. This has the effect that queries + * that differ only on the length of such lists have the same queryId. + * + * * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * @@ -56,16 +61,18 @@ int compute_query_id = COMPUTE_QUERY_ID_AUTO; bool query_id_enabled = false; static JumbleState *InitJumble(void); -static uint64 DoJumble(JumbleState *jstate, Node *node); +static int64 DoJumble(JumbleState *jstate, Node *node); static void AppendJumble(JumbleState *jstate, const unsigned char *value, Size size); static void FlushPendingNulls(JumbleState *jstate); static void RecordConstLocation(JumbleState *jstate, - int location, bool squashed); + bool extern_param, + int location, int len); static void _jumbleNode(JumbleState *jstate, Node *node); -static void _jumbleElements(JumbleState *jstate, List *elements); -static void _jumbleA_Const(JumbleState *jstate, Node *node); static void _jumbleList(JumbleState *jstate, Node *node); +static void _jumbleElements(JumbleState *jstate, List *elements, Node *node); +static void _jumbleParam(JumbleState *jstate, Node *node); +static void _jumbleA_Const(JumbleState *jstate, Node *node); static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node); static void _jumbleRangeTblEntry_eref(JumbleState *jstate, RangeTblEntry *rte, @@ -141,12 +148,12 @@ JumbleQuery(Query *query) * If we are unlucky enough to get a hash of zero, use 1 instead for * normal statements and 2 for utility queries. */ - if (query->queryId == UINT64CONST(0)) + if (query->queryId == INT64CONST(0)) { if (query->utilityStmt) - query->queryId = UINT64CONST(2); + query->queryId = INT64CONST(2); else - query->queryId = UINT64CONST(1); + query->queryId = INT64CONST(1); } return jstate; @@ -185,6 +192,7 @@ InitJumble(void) jstate->clocations_count = 0; jstate->highest_extern_param_id = 0; jstate->pending_nulls = 0; + jstate->has_squashed_lists = false; #ifdef USE_ASSERT_CHECKING jstate->total_jumble_len = 0; #endif @@ -197,7 +205,7 @@ InitJumble(void) * Jumble the given Node using the given JumbleState and return the resulting * jumble hash. */ -static uint64 +static int64 DoJumble(JumbleState *jstate, Node *node) { /* Jumble the given node */ @@ -207,10 +215,14 @@ DoJumble(JumbleState *jstate, Node *node) if (jstate->pending_nulls > 0) FlushPendingNulls(jstate); + /* Squashed list found, reset highest_extern_param_id */ + if (jstate->has_squashed_lists) + jstate->highest_extern_param_id = 0; + /* Process the jumble buffer and produce the hash value */ - return DatumGetUInt64(hash_any_extended(jstate->jumble, - jstate->jumble_len, - 0)); + return DatumGetInt64(hash_any_extended(jstate->jumble, + jstate->jumble_len, + 0)); } /* @@ -256,10 +268,10 @@ AppendJumbleInternal(JumbleState *jstate, const unsigned char *item, if (unlikely(jumble_len >= JUMBLE_SIZE)) { - uint64 start_hash; + int64 start_hash; - start_hash = DatumGetUInt64(hash_any_extended(jumble, - JUMBLE_SIZE, 0)); + start_hash = DatumGetInt64(hash_any_extended(jumble, + JUMBLE_SIZE, 0)); memcpy(jumble, &start_hash, sizeof(start_hash)); jumble_len = sizeof(start_hash); } @@ -373,15 +385,17 @@ FlushPendingNulls(JumbleState *jstate) /* - * Record location of constant within query string of query tree that is - * currently being walked. + * Record the location of some kind of constant within a query string. + * These are not only bare constants but also expressions that ultimately + * constitute a constant, such as those inside casts and simple function + * calls; if extern_param, then it corresponds to a PARAM_EXTERN Param. * - * 'squashed' signals that the constant represents the first or the last - * element in a series of merged constants, and everything but the first/last - * element contributes nothing to the jumble hash. + * If length is -1, it indicates a single such constant element. If + * it's a positive integer, it indicates the length of a squashable + * list of them. */ static void -RecordConstLocation(JumbleState *jstate, int location, bool squashed) +RecordConstLocation(JumbleState *jstate, bool extern_param, int location, int len) { /* -1 indicates unknown or undefined location */ if (location >= 0) @@ -396,9 +410,15 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed) sizeof(LocationLen)); } jstate->clocations[jstate->clocations_count].location = location; - /* initialize lengths to -1 to simplify third-party module usage */ - jstate->clocations[jstate->clocations_count].squashed = squashed; - jstate->clocations[jstate->clocations_count].length = -1; + + /* + * Lengths are either positive integers (indicating a squashable + * list), or -1. + */ + Assert(len > -1 || len == -1); + jstate->clocations[jstate->clocations_count].length = len; + jstate->clocations[jstate->clocations_count].squashed = (len > -1); + jstate->clocations[jstate->clocations_count].extern_param = extern_param; jstate->clocations_count++; } } @@ -407,47 +427,74 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed) * Subroutine for _jumbleElements: Verify a few simple cases where we can * deduce that the expression is a constant: * - * - Ignore a possible wrapping RelabelType and CoerceViaIO. - * - If it's a FuncExpr, check that the function is an implicit + * - See through any wrapping RelabelType and CoerceViaIO layers. + * - If it's a FuncExpr, check that the function is a builtin * cast and its arguments are Const. - * - Otherwise test if the expression is a simple Const. + * - Otherwise test if the expression is a simple Const or a + * PARAM_EXTERN param. */ static bool -IsSquashableConst(Node *element) +IsSquashableConstant(Node *element) { - if (IsA(element, RelabelType)) - element = (Node *) ((RelabelType *) element)->arg; - - if (IsA(element, CoerceViaIO)) - element = (Node *) ((CoerceViaIO *) element)->arg; - - if (IsA(element, FuncExpr)) +restart: + switch (nodeTag(element)) { - FuncExpr *func = (FuncExpr *) element; - ListCell *temp; + case T_RelabelType: + /* Unwrap RelabelType */ + element = (Node *) ((RelabelType *) element)->arg; + goto restart; - if (func->funcformat != COERCE_IMPLICIT_CAST && - func->funcformat != COERCE_EXPLICIT_CAST) - return false; + case T_CoerceViaIO: + /* Unwrap CoerceViaIO */ + element = (Node *) ((CoerceViaIO *) element)->arg; + goto restart; - if (func->funcid > FirstGenbkiObjectId) - return false; + case T_Const: + return true; - foreach(temp, func->args) - { - Node *arg = lfirst(temp); + case T_Param: + return castNode(Param, element)->paramkind == PARAM_EXTERN; - if (!IsA(arg, Const)) /* XXX we could recurse here instead */ - return false; - } + case T_FuncExpr: + { + FuncExpr *func = (FuncExpr *) element; + ListCell *temp; - return true; - } + if (func->funcformat != COERCE_IMPLICIT_CAST && + func->funcformat != COERCE_EXPLICIT_CAST) + return false; - if (!IsA(element, Const)) - return false; + if (func->funcid > FirstGenbkiObjectId) + return false; - return true; + /* + * We can check function arguments recursively, being careful + * about recursing too deep. At each recursion level it's + * enough to test the stack on the first element. (Note that + * I wasn't able to hit this without bloating the stack + * artificially in this function: the parser errors out before + * stack size becomes a problem here.) + */ + foreach(temp, func->args) + { + Node *arg = lfirst(temp); + + if (!IsA(arg, Const)) + { + if (foreach_current_index(temp) == 0 && + stack_is_too_deep()) + return false; + else if (!IsSquashableConstant(arg)) + return false; + } + } + + return true; + } + + default: + return false; + } } /* @@ -457,39 +504,33 @@ IsSquashableConst(Node *element) * Return value indicates if squashing is possible. * * Note that this function searches only for explicit Const nodes with - * possibly very simple decorations on top, and does not try to simplify - * expressions. + * possibly very simple decorations on top and PARAM_EXTERN parameters, + * and does not try to simplify expressions. */ static bool -IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr) +IsSquashableConstantList(List *elements) { ListCell *temp; - /* - * If squashing is disabled, or the list is too short, we don't try to - * squash it. - */ + /* If the list is too short, we don't try to squash it. */ if (list_length(elements) < 2) return false; foreach(temp, elements) { - if (!IsSquashableConst(lfirst(temp))) + if (!IsSquashableConstant(lfirst(temp))) return false; } - *firstExpr = linitial(elements); - *lastExpr = llast(elements); - return true; } #define JUMBLE_NODE(item) \ _jumbleNode(jstate, (Node *) expr->item) -#define JUMBLE_ELEMENTS(list) \ - _jumbleElements(jstate, (List *) expr->list) +#define JUMBLE_ELEMENTS(list, node) \ + _jumbleElements(jstate, (List *) expr->list, node) #define JUMBLE_LOCATION(location) \ - RecordConstLocation(jstate, expr->location, false) + RecordConstLocation(jstate, false, expr->location, -1) #define JUMBLE_FIELD(item) \ do { \ if (sizeof(expr->item) == 8) \ @@ -516,42 +557,6 @@ do { \ #include "queryjumblefuncs.funcs.c" -/* - * We jumble lists of constant elements as one individual item regardless - * of how many elements are in the list. This means different queries - * jumble to the same query_id, if the only difference is the number of - * elements in the list. - */ -static void -_jumbleElements(JumbleState *jstate, List *elements) -{ - Node *first, - *last; - - if (IsSquashableConstList(elements, &first, &last)) - { - /* - * If this list of elements is squashable, keep track of the location - * of its first and last elements. When reading back the locations - * array, we'll see two consecutive locations with ->squashed set to - * true, indicating the location of initial and final elements of this - * list. - * - * For the limited set of cases we support now (implicit coerce via - * FuncExpr, Const) it's fine to use exprLocation of the 'last' - * expression, but if more complex composite expressions are to be - * supported (e.g., OpExpr or FuncExpr as an explicit call), more - * sophisticated tracking will be needed. - */ - RecordConstLocation(jstate, exprLocation(first), true); - RecordConstLocation(jstate, exprLocation(last), true); - } - else - { - _jumbleNode(jstate, (Node *) elements); - } -} - static void _jumbleNode(JumbleState *jstate, Node *node) { @@ -593,26 +598,6 @@ _jumbleNode(JumbleState *jstate, Node *node) break; } - /* Special cases to handle outside the automated code */ - switch (nodeTag(expr)) - { - case T_Param: - { - Param *p = (Param *) node; - - /* - * Update the highest Param id seen, in order to start - * normalization correctly. - */ - if (p->paramkind == PARAM_EXTERN && - p->paramid > jstate->highest_extern_param_id) - jstate->highest_extern_param_id = p->paramid; - } - break; - default: - break; - } - /* Ensure we added something to the jumble buffer */ Assert(jstate->total_jumble_len > prev_jumble_len); } @@ -648,6 +633,79 @@ _jumbleList(JumbleState *jstate, Node *node) } } +/* + * We try to jumble lists of expressions as one individual item regardless + * of how many elements are in the list. This is know as squashing, which + * results in different queries jumbling to the same query_id, if the only + * difference is the number of elements in the list. + * + * We allow constants and PARAM_EXTERN parameters to be squashed. To normalize + * such queries, we use the start and end locations of the list of elements in + * a list. + */ +static void +_jumbleElements(JumbleState *jstate, List *elements, Node *node) +{ + bool normalize_list = false; + + if (IsSquashableConstantList(elements)) + { + if (IsA(node, ArrayExpr)) + { + ArrayExpr *aexpr = (ArrayExpr *) node; + + if (aexpr->list_start > 0 && aexpr->list_end > 0) + { + RecordConstLocation(jstate, + false, + aexpr->list_start + 1, + (aexpr->list_end - aexpr->list_start) - 1); + normalize_list = true; + jstate->has_squashed_lists = true; + } + } + } + + if (!normalize_list) + { + _jumbleNode(jstate, (Node *) elements); + } +} + +/* + * We store the highest param ID of extern params. This can later be used + * to start the numbering of the placeholder for squashed lists. + */ +static void +_jumbleParam(JumbleState *jstate, Node *node) +{ + Param *expr = (Param *) node; + + JUMBLE_FIELD(paramkind); + JUMBLE_FIELD(paramid); + JUMBLE_FIELD(paramtype); + /* paramtypmode and paramcollid are ignored */ + + if (expr->paramkind == PARAM_EXTERN) + { + /* + * At this point, only external parameter locations outside of + * squashable lists will be recorded. + */ + RecordConstLocation(jstate, true, expr->location, -1); + + /* + * Update the highest Param id seen, in order to start normalization + * correctly. + * + * Note: This value is reset at the end of jumbling if there exists a + * squashable list. See the comment in the definition of JumbleState. + */ + if (expr->paramid > jstate->highest_extern_param_id) + jstate->highest_extern_param_id = expr->paramid; + } +} + static void _jumbleA_Const(JumbleState *jstate, Node *node) { diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c index 64d3a09f765..48b5d13b9b6 100644 --- a/src/backend/nodes/readfuncs.c +++ b/src/backend/nodes/readfuncs.c @@ -68,6 +68,12 @@ token = pg_strtok(&length); /* get field value */ \ local_node->fldname = atoui(token) +/* Read a signed integer field (anything written using INT64_FORMAT) */ +#define READ_INT64_FIELD(fldname) \ + token = pg_strtok(&length); /* skip :fldname */ \ + token = pg_strtok(&length); /* get field value */ \ + local_node->fldname = strtoi64(token, NULL, 10) + /* Read an unsigned integer field (anything written using UINT64_FORMAT) */ #define READ_UINT64_FIELD(fldname) \ token = pg_strtok(&length); /* skip :fldname */ \ @@ -520,6 +526,8 @@ _readA_Expr(void) READ_NODE_FIELD(lexpr); READ_NODE_FIELD(rexpr); + READ_LOCATION_FIELD(rexpr_list_start); + READ_LOCATION_FIELD(rexpr_list_end); READ_LOCATION_FIELD(location); READ_DONE(); diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 3d44815ed5a..1f04a2c182c 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -2247,7 +2247,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers) * Determines and returns the cost of an Append node. */ void -cost_append(AppendPath *apath) +cost_append(AppendPath *apath, PlannerInfo *root) { ListCell *l; @@ -2309,26 +2309,52 @@ cost_append(AppendPath *apath) foreach(l, apath->subpaths) { Path *subpath = (Path *) lfirst(l); - Path sort_path; /* dummy for result of cost_sort */ + int presorted_keys; + Path sort_path; /* dummy for result of + * cost_sort/cost_incremental_sort */ - if (!pathkeys_contained_in(pathkeys, subpath->pathkeys)) + if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys, + &presorted_keys)) { /* * We'll need to insert a Sort node, so include costs for - * that. We can use the parent's LIMIT if any, since we + * that. We choose to use incremental sort if it is + * enabled and there are presorted keys; otherwise we use + * full sort. + * + * We can use the parent's LIMIT if any, since we * certainly won't pull more than that many tuples from * any child. */ - cost_sort(&sort_path, - NULL, /* doesn't currently need root */ - pathkeys, - subpath->disabled_nodes, - subpath->total_cost, - subpath->rows, - subpath->pathtarget->width, - 0.0, - work_mem, - apath->limit_tuples); + if (enable_incremental_sort && presorted_keys > 0) + { + cost_incremental_sort(&sort_path, + root, + pathkeys, + presorted_keys, + subpath->disabled_nodes, + subpath->startup_cost, + subpath->total_cost, + subpath->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + apath->limit_tuples); + } + else + { + cost_sort(&sort_path, + root, + pathkeys, + subpath->disabled_nodes, + subpath->total_cost, + subpath->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + apath->limit_tuples); + } + subpath = &sort_path; } diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 26f0336f1e4..ebedc5574ca 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -154,13 +154,17 @@ add_paths_to_joinrel(PlannerInfo *root, /* * See if the inner relation is provably unique for this outer rel. * - * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't - * matter since the executor can make the equivalent optimization anyway; - * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we - * must be considering a semijoin whose inner side is not provably unique - * (else reduce_unique_semijoins would've simplified it), so there's no - * point in calling innerrel_is_unique. However, if the LHS covers all of - * the semijoin's min_lefthand, then it's appropriate to set inner_unique + * We have some special cases: for JOIN_SEMI, it doesn't matter since the + * executor can make the equivalent optimization anyway. It also doesn't + * help enable use of Memoize, since a semijoin with a provably unique + * inner side should have been reduced to an inner join in that case. + * Therefore, we need not expend planner cycles on proofs. (For + * JOIN_ANTI, although it doesn't help the executor for the same reason, + * it can benefit Memoize paths.) For JOIN_UNIQUE_INNER, we must be + * considering a semijoin whose inner side is not provably unique (else + * reduce_unique_semijoins would've simplified it), so there's no point in + * calling innerrel_is_unique. However, if the LHS covers all of the + * semijoin's min_lefthand, then it's appropriate to set inner_unique * because the path produced by create_unique_path will be unique relative * to the LHS. (If we have an LHS that's only part of the min_lefthand, * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid @@ -169,12 +173,6 @@ add_paths_to_joinrel(PlannerInfo *root, switch (jointype) { case JOIN_SEMI: - case JOIN_ANTI: - - /* - * XXX it may be worth proving this to allow a Memoize to be - * considered for Nested Loop Semi/Anti Joins. - */ extra.inner_unique = false; /* well, unproven */ break; case JOIN_UNIQUE_INNER: @@ -715,16 +713,21 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel, return NULL; /* - * Currently we don't do this for SEMI and ANTI joins unless they're - * marked as inner_unique. This is because nested loop SEMI/ANTI joins - * don't scan the inner node to completion, which will mean memoize cannot - * mark the cache entry as complete. - * - * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique - * = true. Should we? See add_paths_to_joinrel() + * Currently we don't do this for SEMI and ANTI joins, because nested loop + * SEMI/ANTI joins don't scan the inner node to completion, which means + * memoize cannot mark the cache entry as complete. Nor can we mark the + * cache entry as complete after fetching the first inner tuple, because + * if that tuple and the current outer tuple don't satisfy the join + * clauses, a second inner tuple that satisfies the parameters would find + * the cache entry already marked as complete. The only exception is when + * the inner relation is provably unique, as in that case, there won't be + * a second matching tuple and we can safely mark the cache entry as + * complete after fetching the first inner tuple. Note that in such + * cases, the SEMI join should have been reduced to an inner join by + * reduce_unique_semijoins. */ - if (!extra->inner_unique && (jointype == JOIN_SEMI || - jointype == JOIN_ANTI)) + if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) && + !extra->inner_unique) return NULL; /* @@ -876,16 +879,13 @@ try_nestloop_path(PlannerInfo *root, /* * Check to see if proposed path is still parameterized, and reject if the * parameterization wouldn't be sensible --- unless allow_star_schema_join - * says to allow it anyway. Also, we must reject if have_dangerous_phv - * doesn't like the look of it, which could only happen if the nestloop is - * still parameterized. + * says to allow it anyway. */ required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels, innerrelids, inner_paramrels); if (required_outer && - ((!bms_overlap(required_outer, extra->param_source_rels) && - !allow_star_schema_join(root, outerrelids, inner_paramrels)) || - have_dangerous_phv(root, outerrelids, inner_paramrels))) + !bms_overlap(required_outer, extra->param_source_rels) && + !allow_star_schema_join(root, outerrelids, inner_paramrels)) { /* Waste no memory when we reject a path here */ bms_free(required_outer); diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 60d65762b5d..aad41b94009 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -565,9 +565,6 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, * Also, if the lateral reference is only indirect, we should reject * the join; whatever rel(s) the reference chain goes through must be * joined to first. - * - * Another case that might keep us from building a valid plan is the - * implementation restriction described by have_dangerous_phv(). */ lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids); lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids); @@ -584,9 +581,6 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, /* check there is a direct reference from rel2 to rel1 */ if (!bms_overlap(rel1->relids, rel2->direct_lateral_relids)) return false; /* only indirect refs, so reject */ - /* check we won't have a dangerous PHV */ - if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids)) - return false; /* might be unable to handle required PHV */ } else if (lateral_rev) { @@ -599,9 +593,6 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, /* check there is a direct reference from rel1 to rel2 */ if (!bms_overlap(rel2->relids, rel1->direct_lateral_relids)) return false; /* only indirect refs, so reject */ - /* check we won't have a dangerous PHV */ - if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids)) - return false; /* might be unable to handle required PHV */ } /* @@ -1279,57 +1270,6 @@ has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel) /* - * There's a pitfall for creating parameterized nestloops: suppose the inner - * rel (call it A) has a parameter that is a PlaceHolderVar, and that PHV's - * minimum eval_at set includes the outer rel (B) and some third rel (C). - * We might think we could create a B/A nestloop join that's parameterized by - * C. But we would end up with a plan in which the PHV's expression has to be - * evaluated as a nestloop parameter at the B/A join; and the executor is only - * set up to handle simple Vars as NestLoopParams. Rather than add complexity - * and overhead to the executor for such corner cases, it seems better to - * forbid the join. (Note that we can still make use of A's parameterized - * path with pre-joined B+C as the outer rel. have_join_order_restriction() - * ensures that we will consider making such a join even if there are not - * other reasons to do so.) - * - * So we check whether any PHVs used in the query could pose such a hazard. - * We don't have any simple way of checking whether a risky PHV would actually - * be used in the inner plan, and the case is so unusual that it doesn't seem - * worth working very hard on it. - * - * This needs to be checked in two places. If the inner rel's minimum - * parameterization would trigger the restriction, then join_is_legal() should - * reject the join altogether, because there will be no workable paths for it. - * But joinpath.c has to check again for every proposed nestloop path, because - * the inner path might have more than the minimum parameterization, causing - * some PHV to be dangerous for it that otherwise wouldn't be. - */ -bool -have_dangerous_phv(PlannerInfo *root, - Relids outer_relids, Relids inner_params) -{ - ListCell *lc; - - foreach(lc, root->placeholder_list) - { - PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc); - - if (!bms_is_subset(phinfo->ph_eval_at, inner_params)) - continue; /* ignore, could not be a nestloop param */ - if (!bms_overlap(phinfo->ph_eval_at, outer_relids)) - continue; /* ignore, not relevant to this join */ - if (bms_is_subset(phinfo->ph_eval_at, outer_relids)) - continue; /* safe, it can be eval'd within outerrel */ - /* Otherwise, it's potentially unsafe, so reject the join */ - return true; - } - - /* OK to perform the join */ - return false; -} - - -/* * is_dummy_rel --- has relation been proven empty? */ bool diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 4ad30b7627e..8a9f1d7a943 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1318,6 +1318,7 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) Oid *sortOperators; Oid *collations; bool *nullsFirst; + int presorted_keys; /* * Compute sort column info, and adjust subplan's tlist as needed. @@ -1353,14 +1354,38 @@ create_append_plan(PlannerInfo *root, AppendPath *best_path, int flags) numsortkeys * sizeof(bool)) == 0); /* Now, insert a Sort node if subplan isn't sufficiently ordered */ - if (!pathkeys_contained_in(pathkeys, subpath->pathkeys)) + if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys, + &presorted_keys)) { - Sort *sort = make_sort(subplan, numsortkeys, + Plan *sort_plan; + + /* + * We choose to use incremental sort if it is enabled and + * there are presorted keys; otherwise we use full sort. + */ + if (enable_incremental_sort && presorted_keys > 0) + { + sort_plan = (Plan *) + make_incrementalsort(subplan, numsortkeys, presorted_keys, sortColIdx, sortOperators, collations, nullsFirst); - label_sort_with_costsize(root, sort, best_path->limit_tuples); - subplan = (Plan *) sort; + label_incrementalsort_with_costsize(root, + (IncrementalSort *) sort_plan, + pathkeys, + best_path->limit_tuples); + } + else + { + sort_plan = (Plan *) make_sort(subplan, numsortkeys, + sortColIdx, sortOperators, + collations, nullsFirst); + + label_sort_with_costsize(root, (Sort *) sort_plan, + best_path->limit_tuples); + } + + subplan = sort_plan; } } @@ -1491,6 +1516,7 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, Oid *sortOperators; Oid *collations; bool *nullsFirst; + int presorted_keys; /* Build the child plan */ /* Must insist that all children return the same tlist */ @@ -1525,14 +1551,38 @@ create_merge_append_plan(PlannerInfo *root, MergeAppendPath *best_path, numsortkeys * sizeof(bool)) == 0); /* Now, insert a Sort node if subplan isn't sufficiently ordered */ - if (!pathkeys_contained_in(pathkeys, subpath->pathkeys)) + if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys, + &presorted_keys)) { - Sort *sort = make_sort(subplan, numsortkeys, + Plan *sort_plan; + + /* + * We choose to use incremental sort if it is enabled and there + * are presorted keys; otherwise we use full sort. + */ + if (enable_incremental_sort && presorted_keys > 0) + { + sort_plan = (Plan *) + make_incrementalsort(subplan, numsortkeys, presorted_keys, sortColIdx, sortOperators, collations, nullsFirst); - label_sort_with_costsize(root, sort, best_path->limit_tuples); - subplan = (Plan *) sort; + label_incrementalsort_with_costsize(root, + (IncrementalSort *) sort_plan, + pathkeys, + best_path->limit_tuples); + } + else + { + sort_plan = (Plan *) make_sort(subplan, numsortkeys, + sortColIdx, sortOperators, + collations, nullsFirst); + + label_sort_with_costsize(root, (Sort *) sort_plan, + best_path->limit_tuples); + } + + subplan = sort_plan; } subplans = lappend(subplans, subplan); @@ -4344,13 +4394,16 @@ create_nestloop_plan(PlannerInfo *root, NestLoop *join_plan; Plan *outer_plan; Plan *inner_plan; + Relids outerrelids; List *tlist = build_path_tlist(root, &best_path->jpath.path); List *joinrestrictclauses = best_path->jpath.joinrestrictinfo; List *joinclauses; List *otherclauses; - Relids outerrelids; List *nestParams; + List *outer_tlist; + bool outer_parallel_safe; Relids saveOuterRels = root->curOuterRels; + ListCell *lc; /* * If the inner path is parameterized by the topmost parent of the outer @@ -4372,8 +4425,8 @@ create_nestloop_plan(PlannerInfo *root, outer_plan = create_plan_recurse(root, best_path->jpath.outerjoinpath, 0); /* For a nestloop, include outer relids in curOuterRels for inner side */ - root->curOuterRels = bms_union(root->curOuterRels, - best_path->jpath.outerjoinpath->parent->relids); + outerrelids = best_path->jpath.outerjoinpath->parent->relids; + root->curOuterRels = bms_union(root->curOuterRels, outerrelids); inner_plan = create_plan_recurse(root, best_path->jpath.innerjoinpath, 0); @@ -4412,9 +4465,66 @@ create_nestloop_plan(PlannerInfo *root, * Identify any nestloop parameters that should be supplied by this join * node, and remove them from root->curOuterParams. */ - outerrelids = best_path->jpath.outerjoinpath->parent->relids; - nestParams = identify_current_nestloop_params(root, outerrelids); + nestParams = identify_current_nestloop_params(root, + outerrelids, + PATH_REQ_OUTER((Path *) best_path)); + + /* + * While nestloop parameters that are Vars had better be available from + * the outer_plan already, there are edge cases where nestloop parameters + * that are PHVs won't be. In such cases we must add them to the + * outer_plan's tlist, since the executor's NestLoopParam machinery + * requires the params to be simple outer-Var references to that tlist. + * (This is cheating a little bit, because the outer path's required-outer + * relids might not be enough to allow evaluating such a PHV. But in + * practice, if we could have evaluated the PHV at the nestloop node, we + * can do so in the outer plan too.) + */ + outer_tlist = outer_plan->targetlist; + outer_parallel_safe = outer_plan->parallel_safe; + foreach(lc, nestParams) + { + NestLoopParam *nlp = (NestLoopParam *) lfirst(lc); + PlaceHolderVar *phv; + TargetEntry *tle; + + if (IsA(nlp->paramval, Var)) + continue; /* nothing to do for simple Vars */ + /* Otherwise it must be a PHV */ + phv = castNode(PlaceHolderVar, nlp->paramval); + + if (tlist_member((Expr *) phv, outer_tlist)) + continue; /* already available */ + + /* + * It's possible that nestloop parameter PHVs selected to evaluate + * here contain references to surviving root->curOuterParams items + * (that is, they reference values that will be supplied by some + * higher-level nestloop). Those need to be converted to Params now. + * Note: it's safe to do this after the tlist_member() check, because + * equal() won't pay attention to phv->phexpr. + */ + phv->phexpr = (Expr *) replace_nestloop_params(root, + (Node *) phv->phexpr); + + /* Make a shallow copy of outer_tlist, if we didn't already */ + if (outer_tlist == outer_plan->targetlist) + outer_tlist = list_copy(outer_tlist); + /* ... and add the needed expression */ + tle = makeTargetEntry((Expr *) copyObject(phv), + list_length(outer_tlist) + 1, + NULL, + true); + outer_tlist = lappend(outer_tlist, tle); + /* ... and track whether tlist is (still) parallel-safe */ + if (outer_parallel_safe) + outer_parallel_safe = is_parallel_safe(root, (Node *) phv); + } + if (outer_tlist != outer_plan->targetlist) + outer_plan = change_plan_targetlist(outer_plan, outer_tlist, + outer_parallel_safe); + /* And finally, we can build the join plan node */ join_plan = make_nestloop(tlist, joinclauses, otherclauses, diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index ff65867eebe..549aedcfa99 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -6879,7 +6879,7 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid) * * tableOid is the table on which the index is to be built. indexOid is the * OID of an index to be created or reindexed (which must be an index with - * support for parallel builds - currently btree or BRIN). + * support for parallel builds - currently btree, GIN, or BRIN). * * Return value is the number of parallel worker processes to request. It * may be unsafe to proceed if this is 0. Note that this does not include the diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 26a3e050086..f45131c34c5 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -3333,6 +3333,13 @@ eval_const_expressions_mutator(Node *node, -1, coalesceexpr->coalescecollid); + /* + * If there's exactly one surviving argument, we no longer + * need COALESCE at all: the result is that argument + */ + if (list_length(newargs) == 1) + return (Node *) linitial(newargs); + newcoalesce = makeNode(CoalesceExpr); newcoalesce->coalescetype = coalesceexpr->coalescetype; newcoalesce->coalescecollid = coalesceexpr->coalescecollid; diff --git a/src/backend/optimizer/util/paramassign.c b/src/backend/optimizer/util/paramassign.c index 3bd3ce37c8f..4c13c5931b4 100644 --- a/src/backend/optimizer/util/paramassign.c +++ b/src/backend/optimizer/util/paramassign.c @@ -599,38 +599,46 @@ process_subquery_nestloop_params(PlannerInfo *root, List *subplan_params) } /* - * Identify any NestLoopParams that should be supplied by a NestLoop plan - * node with the specified lefthand rels. Remove them from the active - * root->curOuterParams list and return them as the result list. + * Identify any NestLoopParams that should be supplied by a NestLoop + * plan node with the specified lefthand rels and required-outer rels. + * Remove them from the active root->curOuterParams list and return + * them as the result list. * - * XXX Here we also hack up the returned Vars and PHVs so that they do not - * contain nullingrel sets exceeding what is available from the outer side. - * This is needed if we have applied outer join identity 3, - * (A leftjoin B on (Pab)) leftjoin C on (Pb*c) - * = A leftjoin (B leftjoin C on (Pbc)) on (Pab) - * and C contains lateral references to B. It's still safe to apply the - * identity, but the parser will have created those references in the form - * "b*" (i.e., with varnullingrels listing the A/B join), while what we will - * have available from the nestloop's outer side is just "b". We deal with - * that here by stripping the nullingrels down to what is available from the - * outer side according to leftrelids. - * - * That fixes matters for the case of forward application of identity 3. - * If the identity was applied in the reverse direction, we will have - * parameter Vars containing too few nullingrel bits rather than too many. - * Currently, that causes no problems because setrefs.c applies only a - * subset check to nullingrels in NestLoopParams, but we'd have to work - * harder if we ever want to tighten that check. This is all pretty annoying - * because it greatly weakens setrefs.c's cross-check, but the alternative + * Vars and PHVs appearing in the result list must have nullingrel sets + * that could validly appear in the lefthand rel's output. Ordinarily that + * would be true already, but if we have applied outer join identity 3, + * there could be more or fewer nullingrel bits in the nodes appearing in + * curOuterParams than are in the nominal leftrelids. We deal with that by + * forcing their nullingrel sets to include exactly the outer-join relids + * that appear in leftrelids and can null the respective Var or PHV. + * This fix is a bit ad-hoc and intellectually unsatisfactory, because it's + * essentially jumping to the conclusion that we've placed evaluation of + * the nestloop parameters correctly, and thus it defeats the intent of the + * subsequent nullingrel cross-checks in setrefs.c. But the alternative * seems to be to generate multiple versions of each laterally-parameterized * subquery, which'd be unduly expensive. */ List * -identify_current_nestloop_params(PlannerInfo *root, Relids leftrelids) +identify_current_nestloop_params(PlannerInfo *root, + Relids leftrelids, + Relids outerrelids) { List *result; + Relids allleftrelids; ListCell *cell; + /* + * We'll be able to evaluate a PHV in the lefthand path if it uses the + * lefthand rels plus any available required-outer rels. But don't do so + * if it uses *only* required-outer rels; in that case it should be + * evaluated higher in the tree. For Vars, no such hair-splitting is + * necessary since they depend on only one relid. + */ + if (outerrelids) + allleftrelids = bms_union(leftrelids, outerrelids); + else + allleftrelids = leftrelids; + result = NIL; foreach(cell, root->curOuterParams) { @@ -646,25 +654,60 @@ identify_current_nestloop_params(PlannerInfo *root, Relids leftrelids) bms_is_member(nlp->paramval->varno, leftrelids)) { Var *var = (Var *) nlp->paramval; + RelOptInfo *rel = root->simple_rel_array[var->varno]; root->curOuterParams = foreach_delete_current(root->curOuterParams, cell); - var->varnullingrels = bms_intersect(var->varnullingrels, + var->varnullingrels = bms_intersect(rel->nulling_relids, leftrelids); result = lappend(result, nlp); } - else if (IsA(nlp->paramval, PlaceHolderVar) && - bms_is_subset(find_placeholder_info(root, - (PlaceHolderVar *) nlp->paramval)->ph_eval_at, - leftrelids)) + else if (IsA(nlp->paramval, PlaceHolderVar)) { PlaceHolderVar *phv = (PlaceHolderVar *) nlp->paramval; + PlaceHolderInfo *phinfo = find_placeholder_info(root, phv); + Relids eval_at = phinfo->ph_eval_at; - root->curOuterParams = foreach_delete_current(root->curOuterParams, - cell); - phv->phnullingrels = bms_intersect(phv->phnullingrels, - leftrelids); - result = lappend(result, nlp); + if (bms_is_subset(eval_at, allleftrelids) && + bms_overlap(eval_at, leftrelids)) + { + root->curOuterParams = foreach_delete_current(root->curOuterParams, + cell); + + /* + * Deal with an edge case: if the PHV was pulled up out of a + * subquery and it contains a subquery that was originally + * pushed down from this query level, then that will still be + * represented as a SubLink, because SS_process_sublinks won't + * recurse into outer PHVs, so it didn't get transformed + * during expression preprocessing in the subquery. We need a + * version of the PHV that has a SubPlan, which we can get + * from the current query level's placeholder_list. This is + * quite grotty of course, but dealing with it earlier in the + * handling of subplan params would be just as grotty, and it + * might end up being a waste of cycles if we don't decide to + * treat the PHV as a NestLoopParam. (Perhaps that whole + * mechanism should be redesigned someday, but today is not + * that day.) + */ + if (root->parse->hasSubLinks) + { + phv = copyObject(phinfo->ph_var); + + /* + * The ph_var will have empty nullingrels, but that + * doesn't matter since we're about to overwrite + * phv->phnullingrels. Other fields should be OK already. + */ + nlp->paramval = (Var *) phv; + } + + phv->phnullingrels = + bms_intersect(get_placeholder_nulling_relids(root, phinfo), + leftrelids); + + result = lappend(result, nlp); + } } } return result; diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index e0192d4a491..9cc602788ea 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -1404,12 +1404,12 @@ create_append_path(PlannerInfo *root, pathnode->path.total_cost = child->total_cost; } else - cost_append(pathnode); + cost_append(pathnode, root); /* Must do this last, else cost_append complains */ pathnode->path.pathkeys = child->pathkeys; } else - cost_append(pathnode); + cost_append(pathnode, root); /* If the caller provided a row estimate, override the computed value. */ if (rows >= 0) @@ -1515,6 +1515,9 @@ create_merge_append_path(PlannerInfo *root, foreach(l, subpaths) { Path *subpath = (Path *) lfirst(l); + int presorted_keys; + Path sort_path; /* dummy for result of + * cost_sort/cost_incremental_sort */ /* All child paths should be unparameterized */ Assert(bms_is_empty(PATH_REQ_OUTER(subpath))); @@ -1523,32 +1526,52 @@ create_merge_append_path(PlannerInfo *root, pathnode->path.parallel_safe = pathnode->path.parallel_safe && subpath->parallel_safe; - if (pathkeys_contained_in(pathkeys, subpath->pathkeys)) + if (!pathkeys_count_contained_in(pathkeys, subpath->pathkeys, + &presorted_keys)) { - /* Subpath is adequately ordered, we won't need to sort it */ - input_disabled_nodes += subpath->disabled_nodes; - input_startup_cost += subpath->startup_cost; - input_total_cost += subpath->total_cost; - } - else - { - /* We'll need to insert a Sort node, so include cost for that */ - Path sort_path; /* dummy for result of cost_sort */ + /* + * We'll need to insert a Sort node, so include costs for that. We + * choose to use incremental sort if it is enabled and there are + * presorted keys; otherwise we use full sort. + * + * We can use the parent's LIMIT if any, since we certainly won't + * pull more than that many tuples from any child. + */ + if (enable_incremental_sort && presorted_keys > 0) + { + cost_incremental_sort(&sort_path, + root, + pathkeys, + presorted_keys, + subpath->disabled_nodes, + subpath->startup_cost, + subpath->total_cost, + subpath->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + pathnode->limit_tuples); + } + else + { + cost_sort(&sort_path, + root, + pathkeys, + subpath->disabled_nodes, + subpath->total_cost, + subpath->rows, + subpath->pathtarget->width, + 0.0, + work_mem, + pathnode->limit_tuples); + } - cost_sort(&sort_path, - root, - pathkeys, - subpath->disabled_nodes, - subpath->total_cost, - subpath->rows, - subpath->pathtarget->width, - 0.0, - work_mem, - pathnode->limit_tuples); - input_disabled_nodes += sort_path.disabled_nodes; - input_startup_cost += sort_path.startup_cost; - input_total_cost += sort_path.total_cost; + subpath = &sort_path; } + + input_disabled_nodes += subpath->disabled_nodes; + input_startup_cost += subpath->startup_cost; + input_total_cost += subpath->total_cost; } /* diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c index 41a4c81e94a..e1cd00a72fb 100644 --- a/src/backend/optimizer/util/placeholder.c +++ b/src/backend/optimizer/util/placeholder.c @@ -545,3 +545,43 @@ contain_placeholder_references_walker(Node *node, return expression_tree_walker(node, contain_placeholder_references_walker, context); } + +/* + * Compute the set of outer-join relids that can null a placeholder. + * + * This is analogous to RelOptInfo.nulling_relids for Vars, but we compute it + * on-the-fly rather than saving it somewhere. Currently the value is needed + * at most once per query, so there's little value in doing otherwise. If it + * ever gains more widespread use, perhaps we should cache the result in + * PlaceHolderInfo. + */ +Relids +get_placeholder_nulling_relids(PlannerInfo *root, PlaceHolderInfo *phinfo) +{ + Relids result = NULL; + int relid = -1; + + /* + * Form the union of all potential nulling OJs for each baserel included + * in ph_eval_at. + */ + while ((relid = bms_next_member(phinfo->ph_eval_at, relid)) > 0) + { + RelOptInfo *rel = root->simple_rel_array[relid]; + + /* ignore the RTE_GROUP RTE */ + if (relid == root->group_rtindex) + continue; + + if (rel == NULL) /* must be an outer join */ + { + Assert(bms_is_member(relid, root->outer_join_rels)); + continue; + } + result = bms_add_members(result, rel->nulling_relids); + } + + /* Now remove any OJs already included in ph_eval_at, and we're done. */ + result = bms_del_members(result, phinfo->ph_eval_at); + return result; +} diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c index a16fdd65601..34f7c17f576 100644 --- a/src/backend/parser/analyze.c +++ b/src/backend/parser/analyze.c @@ -239,102 +239,23 @@ parse_sub_analyze(Node *parseTree, ParseState *parentParseState, } /* - * setQueryLocationAndLength - * Set query's location and length from statement and ParseState - * - * Some statements, like PreparableStmt, can be located within parentheses. - * For example "(SELECT 1)" or "COPY (UPDATE ...) to x;". For those, we - * cannot use the whole string from the statement's location or the SQL - * string would yield incorrectly. The parser will set stmt_len, reflecting - * the size of the statement within the parentheses. Thus, when stmt_len is - * available, we need to use it for the Query's stmt_len. - * - * For other cases, the parser can't provide the length of individual - * statements. However, we have the statement's location plus the length - * (p_stmt_len) and location (p_stmt_location) of the top level RawStmt, - * stored in pstate. Thus, the statement's length is the RawStmt's length - * minus how much we've advanced in the RawStmt's string. If p_stmt_len - * is 0, the SQL string is used up to its end. - */ -static void -setQueryLocationAndLength(ParseState *pstate, Query *qry, Node *parseTree) -{ - ParseLoc stmt_len = 0; - - switch (nodeTag(parseTree)) - { - case T_InsertStmt: - qry->stmt_location = ((InsertStmt *) parseTree)->stmt_location; - stmt_len = ((InsertStmt *) parseTree)->stmt_len; - break; - - case T_DeleteStmt: - qry->stmt_location = ((DeleteStmt *) parseTree)->stmt_location; - stmt_len = ((DeleteStmt *) parseTree)->stmt_len; - break; - - case T_UpdateStmt: - qry->stmt_location = ((UpdateStmt *) parseTree)->stmt_location; - stmt_len = ((UpdateStmt *) parseTree)->stmt_len; - break; - - case T_MergeStmt: - qry->stmt_location = ((MergeStmt *) parseTree)->stmt_location; - stmt_len = ((MergeStmt *) parseTree)->stmt_len; - break; - - case T_SelectStmt: - qry->stmt_location = ((SelectStmt *) parseTree)->stmt_location; - stmt_len = ((SelectStmt *) parseTree)->stmt_len; - break; - - case T_PLAssignStmt: - qry->stmt_location = ((PLAssignStmt *) parseTree)->location; - break; - - default: - qry->stmt_location = pstate->p_stmt_location; - break; - } - - if (stmt_len > 0) - { - /* Statement's length is known, use it */ - qry->stmt_len = stmt_len; - } - else if (pstate->p_stmt_len > 0) - { - /* - * The top RawStmt's length is known, so calculate the statement's - * length from the statement's location and the RawStmt's length and - * location. - */ - qry->stmt_len = pstate->p_stmt_len - (qry->stmt_location - pstate->p_stmt_location); - } - - /* The calculated statement length should be calculated as positive. */ - Assert(qry->stmt_len >= 0); -} - -/* * transformTopLevelStmt - * transform a Parse tree into a Query tree. * - * This function is just responsible for storing location data - * from the RawStmt into the ParseState. + * This function is just responsible for transferring statement location data + * from the RawStmt into the finished Query. */ Query * transformTopLevelStmt(ParseState *pstate, RawStmt *parseTree) { Query *result; - /* Store RawStmt's length and location in pstate */ - pstate->p_stmt_len = parseTree->stmt_len; - pstate->p_stmt_location = parseTree->stmt_location; - /* We're at top level, so allow SELECT INTO */ result = transformOptionalSelectInto(pstate, parseTree->stmt); + result->stmt_location = parseTree->stmt_location; + result->stmt_len = parseTree->stmt_len; + return result; } @@ -503,7 +424,6 @@ transformStmt(ParseState *pstate, Node *parseTree) /* Mark as original query until we learn differently */ result->querySource = QSRC_ORIGINAL; result->canSetTag = true; - setQueryLocationAndLength(pstate, result, parseTree); return result; } diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 0b5652071d1..70a0d832a11 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -154,7 +154,6 @@ static void base_yyerror(YYLTYPE *yylloc, core_yyscan_t yyscanner, const char *msg); static RawStmt *makeRawStmt(Node *stmt, int stmt_location); static void updateRawStmtEnd(RawStmt *rs, int end_location); -static void updatePreparableStmtEnd(Node *n, int end_location); static Node *makeColumnRef(char *colname, List *indirection, int location, core_yyscan_t yyscanner); static Node *makeTypeCast(Node *arg, TypeName *typename, int location); @@ -178,13 +177,13 @@ static void insertSelectOptions(SelectStmt *stmt, SelectLimit *limitClause, WithClause *withClause, core_yyscan_t yyscanner); -static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location); +static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg); static Node *doNegate(Node *n, int location); static void doNegateFloat(Float *v); static Node *makeAndExpr(Node *lexpr, Node *rexpr, int location); static Node *makeOrExpr(Node *lexpr, Node *rexpr, int location); static Node *makeNotExpr(Node *expr, int location); -static Node *makeAArrayExpr(List *elements, int location); +static Node *makeAArrayExpr(List *elements, int location, int end_location); static Node *makeSQLValueFunction(SQLValueFunctionOp op, int32 typmod, int location); static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args, @@ -523,7 +522,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query); %type <defelt> def_elem reloption_elem old_aggr_elem operator_def_elem %type <node> def_arg columnElem where_clause where_or_current_clause a_expr b_expr c_expr AexprConst indirection_el opt_slice_bound - columnref in_expr having_clause func_table xmltable array_expr + columnref having_clause func_table xmltable array_expr OptWhereClause operator_def_arg %type <list> opt_column_and_period_list %type <list> rowsfrom_item rowsfrom_list opt_col_def_list @@ -2669,6 +2668,12 @@ alter_table_cmd: c->alterDeferrability = true; if ($4 & CAS_NO_INHERIT) c->alterInheritability = true; + /* handle unsupported case with specific error message */ + if ($4 & CAS_NOT_VALID) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("constraints cannot be altered to be NOT VALID"), + parser_errposition(@4)); processCASbits($4, @4, "FOREIGN KEY", &c->deferrable, &c->initdeferred, @@ -3417,7 +3422,6 @@ CopyStmt: COPY opt_binary qualified_name opt_column_list { CopyStmt *n = makeNode(CopyStmt); - updatePreparableStmtEnd($3, @4); n->relation = NULL; n->query = $3; n->attlist = NIL; @@ -6037,6 +6041,26 @@ CreateTrigStmt: EXECUTE FUNCTION_or_PROCEDURE func_name '(' TriggerFuncArgs ')' { CreateTrigStmt *n = makeNode(CreateTrigStmt); + bool dummy; + + if (($11 & CAS_NOT_VALID) != 0) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("constraint triggers cannot be marked %s", + "NOT VALID"), + parser_errposition(@11)); + if (($11 & CAS_NO_INHERIT) != 0) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("constraint triggers cannot be marked %s", + "NO INHERIT"), + parser_errposition(@11)); + if (($11 & CAS_NOT_ENFORCED) != 0) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("constraint triggers cannot be marked %s", + "NOT ENFORCED"), + parser_errposition(@11)); n->replace = $2; if (n->replace) /* not supported, see CreateTrigger */ @@ -6056,7 +6080,7 @@ CreateTrigStmt: n->whenClause = $15; n->transitionRels = NIL; processCASbits($11, @11, "TRIGGER", - &n->deferrable, &n->initdeferred, NULL, + &n->deferrable, &n->initdeferred, &dummy, NULL, NULL, yyscanner); n->constrrel = $10; $$ = (Node *) n; @@ -7479,6 +7503,8 @@ fetch_args: cursor_name n->portalname = $1; n->direction = FETCH_FORWARD; n->howMany = 1; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_NONE; $$ = (Node *) n; } | from_in cursor_name @@ -7488,6 +7514,19 @@ fetch_args: cursor_name n->portalname = $2; n->direction = FETCH_FORWARD; n->howMany = 1; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_NONE; + $$ = (Node *) n; + } + | SignedIconst opt_from_in cursor_name + { + FetchStmt *n = makeNode(FetchStmt); + + n->portalname = $3; + n->direction = FETCH_FORWARD; + n->howMany = $1; + n->location = @1; + n->direction_keyword = FETCH_KEYWORD_NONE; $$ = (Node *) n; } | NEXT opt_from_in cursor_name @@ -7497,6 +7536,8 @@ fetch_args: cursor_name n->portalname = $3; n->direction = FETCH_FORWARD; n->howMany = 1; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_NEXT; $$ = (Node *) n; } | PRIOR opt_from_in cursor_name @@ -7506,6 +7547,8 @@ fetch_args: cursor_name n->portalname = $3; n->direction = FETCH_BACKWARD; n->howMany = 1; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_PRIOR; $$ = (Node *) n; } | FIRST_P opt_from_in cursor_name @@ -7515,6 +7558,8 @@ fetch_args: cursor_name n->portalname = $3; n->direction = FETCH_ABSOLUTE; n->howMany = 1; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_FIRST; $$ = (Node *) n; } | LAST_P opt_from_in cursor_name @@ -7524,6 +7569,8 @@ fetch_args: cursor_name n->portalname = $3; n->direction = FETCH_ABSOLUTE; n->howMany = -1; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_LAST; $$ = (Node *) n; } | ABSOLUTE_P SignedIconst opt_from_in cursor_name @@ -7533,6 +7580,8 @@ fetch_args: cursor_name n->portalname = $4; n->direction = FETCH_ABSOLUTE; n->howMany = $2; + n->location = @2; + n->direction_keyword = FETCH_KEYWORD_ABSOLUTE; $$ = (Node *) n; } | RELATIVE_P SignedIconst opt_from_in cursor_name @@ -7542,15 +7591,8 @@ fetch_args: cursor_name n->portalname = $4; n->direction = FETCH_RELATIVE; n->howMany = $2; - $$ = (Node *) n; - } - | SignedIconst opt_from_in cursor_name - { - FetchStmt *n = makeNode(FetchStmt); - - n->portalname = $3; - n->direction = FETCH_FORWARD; - n->howMany = $1; + n->location = @2; + n->direction_keyword = FETCH_KEYWORD_RELATIVE; $$ = (Node *) n; } | ALL opt_from_in cursor_name @@ -7560,6 +7602,8 @@ fetch_args: cursor_name n->portalname = $3; n->direction = FETCH_FORWARD; n->howMany = FETCH_ALL; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_ALL; $$ = (Node *) n; } | FORWARD opt_from_in cursor_name @@ -7569,6 +7613,8 @@ fetch_args: cursor_name n->portalname = $3; n->direction = FETCH_FORWARD; n->howMany = 1; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_FORWARD; $$ = (Node *) n; } | FORWARD SignedIconst opt_from_in cursor_name @@ -7578,6 +7624,8 @@ fetch_args: cursor_name n->portalname = $4; n->direction = FETCH_FORWARD; n->howMany = $2; + n->location = @2; + n->direction_keyword = FETCH_KEYWORD_FORWARD; $$ = (Node *) n; } | FORWARD ALL opt_from_in cursor_name @@ -7587,6 +7635,8 @@ fetch_args: cursor_name n->portalname = $4; n->direction = FETCH_FORWARD; n->howMany = FETCH_ALL; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_FORWARD_ALL; $$ = (Node *) n; } | BACKWARD opt_from_in cursor_name @@ -7596,6 +7646,8 @@ fetch_args: cursor_name n->portalname = $3; n->direction = FETCH_BACKWARD; n->howMany = 1; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_BACKWARD; $$ = (Node *) n; } | BACKWARD SignedIconst opt_from_in cursor_name @@ -7605,6 +7657,8 @@ fetch_args: cursor_name n->portalname = $4; n->direction = FETCH_BACKWARD; n->howMany = $2; + n->location = @2; + n->direction_keyword = FETCH_KEYWORD_BACKWARD; $$ = (Node *) n; } | BACKWARD ALL opt_from_in cursor_name @@ -7614,6 +7668,8 @@ fetch_args: cursor_name n->portalname = $4; n->direction = FETCH_BACKWARD; n->howMany = FETCH_ALL; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_BACKWARD_ALL; $$ = (Node *) n; } ; @@ -11629,7 +11685,7 @@ AlterDomainStmt: { AlterDomainStmt *n = makeNode(AlterDomainStmt); - n->subtype = 'T'; + n->subtype = AD_AlterDefault; n->typeName = $3; n->def = $4; $$ = (Node *) n; @@ -11639,7 +11695,7 @@ AlterDomainStmt: { AlterDomainStmt *n = makeNode(AlterDomainStmt); - n->subtype = 'N'; + n->subtype = AD_DropNotNull; n->typeName = $3; $$ = (Node *) n; } @@ -11648,7 +11704,7 @@ AlterDomainStmt: { AlterDomainStmt *n = makeNode(AlterDomainStmt); - n->subtype = 'O'; + n->subtype = AD_SetNotNull; n->typeName = $3; $$ = (Node *) n; } @@ -11657,7 +11713,7 @@ AlterDomainStmt: { AlterDomainStmt *n = makeNode(AlterDomainStmt); - n->subtype = 'C'; + n->subtype = AD_AddConstraint; n->typeName = $3; n->def = $5; $$ = (Node *) n; @@ -11667,7 +11723,7 @@ AlterDomainStmt: { AlterDomainStmt *n = makeNode(AlterDomainStmt); - n->subtype = 'X'; + n->subtype = AD_DropConstraint; n->typeName = $3; n->name = $6; n->behavior = $7; @@ -11679,7 +11735,7 @@ AlterDomainStmt: { AlterDomainStmt *n = makeNode(AlterDomainStmt); - n->subtype = 'X'; + n->subtype = AD_DropConstraint; n->typeName = $3; n->name = $8; n->behavior = $9; @@ -11691,7 +11747,7 @@ AlterDomainStmt: { AlterDomainStmt *n = makeNode(AlterDomainStmt); - n->subtype = 'V'; + n->subtype = AD_ValidateConstraint; n->typeName = $3; n->name = $6; $$ = (Node *) n; @@ -12240,7 +12296,6 @@ InsertStmt: $5->onConflictClause = $6; $5->returningClause = $7; $5->withClause = $1; - $5->stmt_location = @$; $$ = (Node *) $5; } ; @@ -12431,7 +12486,6 @@ DeleteStmt: opt_with_clause DELETE_P FROM relation_expr_opt_alias n->whereClause = $6; n->returningClause = $7; n->withClause = $1; - n->stmt_location = @$; $$ = (Node *) n; } ; @@ -12506,7 +12560,6 @@ UpdateStmt: opt_with_clause UPDATE relation_expr_opt_alias n->whereClause = $7; n->returningClause = $8; n->withClause = $1; - n->stmt_location = @$; $$ = (Node *) n; } ; @@ -12584,7 +12637,6 @@ MergeStmt: m->joinCondition = $8; m->mergeWhenClauses = $9; m->returningClause = $10; - m->stmt_location = @$; $$ = (Node *) m; } @@ -12825,20 +12877,7 @@ SelectStmt: select_no_parens %prec UMINUS ; select_with_parens: - '(' select_no_parens ')' - { - SelectStmt *n = (SelectStmt *) $2; - - /* - * As SelectStmt's location starts at the SELECT keyword, - * we need to track the length of the SelectStmt within - * parentheses to be able to extract the relevant part - * of the query. Without this, the RawStmt's length would - * be used and would include the closing parenthesis. - */ - n->stmt_len = @3 - @2; - $$ = $2; - } + '(' select_no_parens ')' { $$ = $2; } | '(' select_with_parens ')' { $$ = $2; } ; @@ -12960,7 +12999,6 @@ simple_select: n->groupDistinct = ($7)->distinct; n->havingClause = $8; n->windowClause = $9; - n->stmt_location = @1; $$ = (Node *) n; } | SELECT distinct_clause target_list @@ -12978,7 +13016,6 @@ simple_select: n->groupDistinct = ($7)->distinct; n->havingClause = $8; n->windowClause = $9; - n->stmt_location = @1; $$ = (Node *) n; } | values_clause { $$ = $1; } @@ -12999,20 +13036,19 @@ simple_select: n->targetList = list_make1(rt); n->fromClause = list_make1($2); - n->stmt_location = @1; $$ = (Node *) n; } | select_clause UNION set_quantifier select_clause { - $$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4, @1); + $$ = makeSetOp(SETOP_UNION, $3 == SET_QUANTIFIER_ALL, $1, $4); } | select_clause INTERSECT set_quantifier select_clause { - $$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4, @1); + $$ = makeSetOp(SETOP_INTERSECT, $3 == SET_QUANTIFIER_ALL, $1, $4); } | select_clause EXCEPT set_quantifier select_clause { - $$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4, @1); + $$ = makeSetOp(SETOP_EXCEPT, $3 == SET_QUANTIFIER_ALL, $1, $4); } ; @@ -13590,7 +13626,6 @@ values_clause: { SelectStmt *n = makeNode(SelectStmt); - n->stmt_location = @1; n->valuesLists = list_make1($3); $$ = (Node *) n; } @@ -15287,49 +15322,50 @@ a_expr: c_expr { $$ = $1; } (Node *) list_make2($5, $7), @2); } - | a_expr IN_P in_expr + | a_expr IN_P select_with_parens { - /* in_expr returns a SubLink or a list of a_exprs */ - if (IsA($3, SubLink)) - { - /* generate foo = ANY (subquery) */ - SubLink *n = (SubLink *) $3; + /* generate foo = ANY (subquery) */ + SubLink *n = makeNode(SubLink); - n->subLinkType = ANY_SUBLINK; - n->subLinkId = 0; - n->testexpr = $1; - n->operName = NIL; /* show it's IN not = ANY */ - n->location = @2; - $$ = (Node *) n; - } - else - { - /* generate scalar IN expression */ - $$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "=", $1, $3, @2); - } + n->subselect = $3; + n->subLinkType = ANY_SUBLINK; + n->subLinkId = 0; + n->testexpr = $1; + n->operName = NIL; /* show it's IN not = ANY */ + n->location = @2; + $$ = (Node *) n; } - | a_expr NOT_LA IN_P in_expr %prec NOT_LA + | a_expr IN_P '(' expr_list ')' { - /* in_expr returns a SubLink or a list of a_exprs */ - if (IsA($4, SubLink)) - { - /* generate NOT (foo = ANY (subquery)) */ - /* Make an = ANY node */ - SubLink *n = (SubLink *) $4; - - n->subLinkType = ANY_SUBLINK; - n->subLinkId = 0; - n->testexpr = $1; - n->operName = NIL; /* show it's IN not = ANY */ - n->location = @2; - /* Stick a NOT on top; must have same parse location */ - $$ = makeNotExpr((Node *) n, @2); - } - else - { - /* generate scalar NOT IN expression */ - $$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "<>", $1, $4, @2); - } + /* generate scalar IN expression */ + A_Expr *n = makeSimpleA_Expr(AEXPR_IN, "=", $1, (Node *) $4, @2); + + n->rexpr_list_start = @3; + n->rexpr_list_end = @5; + $$ = (Node *) n; + } + | a_expr NOT_LA IN_P select_with_parens %prec NOT_LA + { + /* generate NOT (foo = ANY (subquery)) */ + SubLink *n = makeNode(SubLink); + + n->subselect = $4; + n->subLinkType = ANY_SUBLINK; + n->subLinkId = 0; + n->testexpr = $1; + n->operName = NIL; /* show it's IN not = ANY */ + n->location = @2; + /* Stick a NOT on top; must have same parse location */ + $$ = makeNotExpr((Node *) n, @2); + } + | a_expr NOT_LA IN_P '(' expr_list ')' + { + /* generate scalar NOT IN expression */ + A_Expr *n = makeSimpleA_Expr(AEXPR_IN, "<>", $1, (Node *) $5, @2); + + n->rexpr_list_start = @4; + n->rexpr_list_end = @6; + $$ = (Node *) n; } | a_expr subquery_Op sub_type select_with_parens %prec Op { @@ -16764,15 +16800,15 @@ type_list: Typename { $$ = list_make1($1); } array_expr: '[' expr_list ']' { - $$ = makeAArrayExpr($2, @1); + $$ = makeAArrayExpr($2, @1, @3); } | '[' array_expr_list ']' { - $$ = makeAArrayExpr($2, @1); + $$ = makeAArrayExpr($2, @1, @3); } | '[' ']' { - $$ = makeAArrayExpr(NIL, @1); + $$ = makeAArrayExpr(NIL, @1, @2); } ; @@ -16894,17 +16930,6 @@ trim_list: a_expr FROM expr_list { $$ = lappend($3, $1); } | expr_list { $$ = $1; } ; -in_expr: select_with_parens - { - SubLink *n = makeNode(SubLink); - - n->subselect = $1; - /* other fields will be filled later */ - $$ = (Node *) n; - } - | '(' expr_list ')' { $$ = (Node *) $2; } - ; - /* * Define SQL-style CASE clause. * - Full specification @@ -18748,47 +18773,6 @@ updateRawStmtEnd(RawStmt *rs, int end_location) rs->stmt_len = end_location - rs->stmt_location; } -/* - * Adjust a PreparableStmt to reflect that it doesn't run to the end of the - * string. - */ -static void -updatePreparableStmtEnd(Node *n, int end_location) -{ - if (IsA(n, SelectStmt)) - { - SelectStmt *stmt = (SelectStmt *) n; - - stmt->stmt_len = end_location - stmt->stmt_location; - } - else if (IsA(n, InsertStmt)) - { - InsertStmt *stmt = (InsertStmt *) n; - - stmt->stmt_len = end_location - stmt->stmt_location; - } - else if (IsA(n, UpdateStmt)) - { - UpdateStmt *stmt = (UpdateStmt *) n; - - stmt->stmt_len = end_location - stmt->stmt_location; - } - else if (IsA(n, DeleteStmt)) - { - DeleteStmt *stmt = (DeleteStmt *) n; - - stmt->stmt_len = end_location - stmt->stmt_location; - } - else if (IsA(n, MergeStmt)) - { - MergeStmt *stmt = (MergeStmt *) n; - - stmt->stmt_len = end_location - stmt->stmt_location; - } - else - elog(ERROR, "unexpected node type %d", (int) n->type); -} - static Node * makeColumnRef(char *colname, List *indirection, int location, core_yyscan_t yyscanner) @@ -19167,14 +19151,11 @@ insertSelectOptions(SelectStmt *stmt, errmsg("multiple WITH clauses not allowed"), parser_errposition(exprLocation((Node *) withClause)))); stmt->withClause = withClause; - - /* Update SelectStmt's location to the start of the WITH clause */ - stmt->stmt_location = withClause->location; } } static Node * -makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location) +makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg) { SelectStmt *n = makeNode(SelectStmt); @@ -19182,7 +19163,6 @@ makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg, int location) n->all = all; n->larg = (SelectStmt *) larg; n->rarg = (SelectStmt *) rarg; - n->stmt_location = location; return (Node *) n; } @@ -19300,12 +19280,14 @@ makeNotExpr(Node *expr, int location) } static Node * -makeAArrayExpr(List *elements, int location) +makeAArrayExpr(List *elements, int location, int location_end) { A_ArrayExpr *n = makeNode(A_ArrayExpr); n->elements = elements; n->location = location; + n->list_start = location; + n->list_end = location_end; return (Node *) n; } diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 1f8e2d54673..d66276801c6 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -1223,6 +1223,8 @@ transformAExprIn(ParseState *pstate, A_Expr *a) newa->element_typeid = scalar_type; newa->elements = aexprs; newa->multidims = false; + newa->list_start = a->rexpr_list_start; + newa->list_end = a->rexpr_list_end; newa->location = -1; result = (Node *) make_scalar_array_op(pstate, @@ -2165,6 +2167,8 @@ transformArrayExpr(ParseState *pstate, A_ArrayExpr *a, /* array_collid will be set by parse_collate.c */ newa->element_typeid = element_type; newa->elements = newcoercedelems; + newa->list_start = a->list_start; + newa->list_end = a->list_end; newa->location = a->location; return (Node *) newa; diff --git a/src/backend/parser/parse_utilcmd.c b/src/backend/parser/parse_utilcmd.c index 62015431fdf..afcf54169c3 100644 --- a/src/backend/parser/parse_utilcmd.c +++ b/src/backend/parser/parse_utilcmd.c @@ -1279,6 +1279,28 @@ transformTableLikeClause(CreateStmtContext *cxt, TableLikeClause *table_like_cla lst = RelationGetNotNullConstraints(RelationGetRelid(relation), false, true); cxt->nnconstraints = list_concat(cxt->nnconstraints, lst); + + /* Copy comments on not-null constraints */ + if (table_like_clause->options & CREATE_TABLE_LIKE_COMMENTS) + { + foreach_node(Constraint, nnconstr, lst) + { + if ((comment = GetComment(get_relation_constraint_oid(RelationGetRelid(relation), + nnconstr->conname, false), + ConstraintRelationId, + 0)) != NULL) + { + CommentStmt *stmt = makeNode(CommentStmt); + + stmt->objtype = OBJECT_TABCONSTRAINT; + stmt->object = (Node *) list_make3(makeString(cxt->relation->schemaname), + makeString(cxt->relation->relname), + makeString(nnconstr->conname)); + stmt->comment = comment; + cxt->alist = lappend(cxt->alist, stmt); + } + } + } } /* diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 4d4a1a3197e..9474095f271 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -781,10 +781,6 @@ ProcessAutoVacLauncherInterrupts(void) if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); - /* Publish memory contexts of this process */ - if (PublishMemoryContextPending) - ProcessGetMemoryContextInterrupt(); - /* Process sinval catchup interrupts that happened while sleeping */ ProcessCatchupInterrupt(); } @@ -2077,6 +2073,12 @@ do_autovacuum(void) } } } + + /* Release stuff to avoid per-relation leakage */ + if (relopts) + pfree(relopts); + if (tabentry) + pfree(tabentry); } table_endscan(relScan); @@ -2093,7 +2095,8 @@ do_autovacuum(void) Form_pg_class classForm = (Form_pg_class) GETSTRUCT(tuple); PgStat_StatTabEntry *tabentry; Oid relid; - AutoVacOpts *relopts = NULL; + AutoVacOpts *relopts; + bool free_relopts = false; bool dovacuum; bool doanalyze; bool wraparound; @@ -2111,7 +2114,9 @@ do_autovacuum(void) * main rel */ relopts = extract_autovac_opts(tuple, pg_class_desc); - if (relopts == NULL) + if (relopts) + free_relopts = true; + else { av_relation *hentry; bool found; @@ -2132,6 +2137,12 @@ do_autovacuum(void) /* ignore analyze for toast tables */ if (dovacuum) table_oids = lappend_oid(table_oids, relid); + + /* Release stuff to avoid leakage */ + if (free_relopts) + pfree(relopts); + if (tabentry) + pfree(tabentry); } table_endscan(relScan); @@ -2223,6 +2234,12 @@ do_autovacuum(void) get_namespace_name(classForm->relnamespace), NameStr(classForm->relname)))); + /* + * Deletion might involve TOAST table access, so ensure we have a + * valid snapshot. + */ + PushActiveSnapshot(GetTransactionSnapshot()); + object.classId = RelationRelationId; object.objectId = relid; object.objectSubId = 0; @@ -2235,6 +2252,7 @@ do_autovacuum(void) * To commit the deletion, end current transaction and start a new * one. Note this also releases the locks we took. */ + PopActiveSnapshot(); CommitTransactionCommand(); StartTransactionCommand(); @@ -2503,6 +2521,8 @@ deleted: pg_atomic_test_set_flag(&MyWorkerInfo->wi_dobalance); } + list_free(table_oids); + /* * Perform additional work items, as requested by backends. */ @@ -2684,8 +2704,8 @@ deleted2: /* * extract_autovac_opts * - * Given a relation's pg_class tuple, return the AutoVacOpts portion of - * reloptions, if set; otherwise, return NULL. + * Given a relation's pg_class tuple, return a palloc'd copy of the + * AutoVacOpts portion of reloptions, if set; otherwise, return NULL. * * Note: callers do not have a relation lock on the table at this point, * so the table could have been dropped, and its catalog rows gone, after @@ -2734,6 +2754,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, autovac_table *tab = NULL; bool wraparound; AutoVacOpts *avopts; + bool free_avopts = false; /* fetch the relation's relcache entry */ classTup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid)); @@ -2746,8 +2767,10 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, * main table reloptions if the toast table itself doesn't have. */ avopts = extract_autovac_opts(classTup, pg_class_desc); - if (classForm->relkind == RELKIND_TOASTVALUE && - avopts == NULL && table_toast_map != NULL) + if (avopts) + free_avopts = true; + else if (classForm->relkind == RELKIND_TOASTVALUE && + table_toast_map != NULL) { av_relation *hentry; bool found; @@ -2856,6 +2879,8 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map, avopts->vacuum_cost_delay >= 0)); } + if (free_avopts) + pfree(avopts); heap_freetuple(classTup); return tab; } @@ -2887,6 +2912,10 @@ recheck_relation_needs_vacanalyze(Oid relid, effective_multixact_freeze_max_age, dovacuum, doanalyze, wraparound); + /* Release tabentry to avoid leakage */ + if (tabentry) + pfree(tabentry); + /* ignore ANALYZE for toast tables */ if (classForm->relkind == RELKIND_TOASTVALUE) *doanalyze = false; @@ -3144,20 +3173,24 @@ autovacuum_do_vac_analyze(autovac_table *tab, BufferAccessStrategy bstrategy) VacuumRelation *rel; List *rel_list; MemoryContext vac_context; + MemoryContext old_context; /* Let pgstat know what we're doing */ autovac_report_activity(tab); + /* Create a context that vacuum() can use as cross-transaction storage */ + vac_context = AllocSetContextCreate(CurrentMemoryContext, + "Vacuum", + ALLOCSET_DEFAULT_SIZES); + /* Set up one VacuumRelation target, identified by OID, for vacuum() */ + old_context = MemoryContextSwitchTo(vac_context); rangevar = makeRangeVar(tab->at_nspname, tab->at_relname, -1); rel = makeVacuumRelation(rangevar, tab->at_relid, NIL); rel_list = list_make1(rel); + MemoryContextSwitchTo(old_context); - vac_context = AllocSetContextCreate(CurrentMemoryContext, - "Vacuum", - ALLOCSET_DEFAULT_SIZES); - - vacuum(rel_list, &tab->at_params, bstrategy, vac_context, true); + vacuum(rel_list, tab->at_params, bstrategy, vac_context, true); MemoryContextDelete(vac_context); } diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index d3cb3f1891c..fda91ffd1ce 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -663,10 +663,6 @@ ProcessCheckpointerInterrupts(void) /* Perform logging of memory contexts of this process */ if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); - - /* Publish memory contexts of this process */ - if (PublishMemoryContextPending) - ProcessGetMemoryContextInterrupt(); } /* diff --git a/src/backend/postmaster/interrupt.c b/src/backend/postmaster/interrupt.c index f24f574e748..0ae9bf906ec 100644 --- a/src/backend/postmaster/interrupt.c +++ b/src/backend/postmaster/interrupt.c @@ -48,10 +48,6 @@ ProcessMainLoopInterrupts(void) /* Perform logging of memory contexts of this process */ if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); - - /* Publish memory contexts of this process */ - if (PublishMemoryContextPending) - ProcessGetMemoryContextInterrupt(); } /* diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c index cb7408acf4c..78e39e5f866 100644 --- a/src/backend/postmaster/pgarch.c +++ b/src/backend/postmaster/pgarch.c @@ -718,15 +718,15 @@ pgarch_readyXlog(char *xlog) /* * Store the file in our max-heap if it has a high enough priority. */ - if (arch_files->arch_heap->bh_size < NUM_FILES_PER_DIRECTORY_SCAN) + if (binaryheap_size(arch_files->arch_heap) < NUM_FILES_PER_DIRECTORY_SCAN) { /* If the heap isn't full yet, quickly add it. */ - arch_file = arch_files->arch_filenames[arch_files->arch_heap->bh_size]; + arch_file = arch_files->arch_filenames[binaryheap_size(arch_files->arch_heap)]; strcpy(arch_file, basename); binaryheap_add_unordered(arch_files->arch_heap, CStringGetDatum(arch_file)); /* If we just filled the heap, make it a valid one. */ - if (arch_files->arch_heap->bh_size == NUM_FILES_PER_DIRECTORY_SCAN) + if (binaryheap_size(arch_files->arch_heap) == NUM_FILES_PER_DIRECTORY_SCAN) binaryheap_build(arch_files->arch_heap); } else if (ready_file_comparator(binaryheap_first(arch_files->arch_heap), @@ -744,21 +744,21 @@ pgarch_readyXlog(char *xlog) FreeDir(rldir); /* If no files were found, simply return. */ - if (arch_files->arch_heap->bh_size == 0) + if (binaryheap_empty(arch_files->arch_heap)) return false; /* * If we didn't fill the heap, we didn't make it a valid one. Do that * now. */ - if (arch_files->arch_heap->bh_size < NUM_FILES_PER_DIRECTORY_SCAN) + if (binaryheap_size(arch_files->arch_heap) < NUM_FILES_PER_DIRECTORY_SCAN) binaryheap_build(arch_files->arch_heap); /* * Fill arch_files array with the files to archive in ascending order of * priority. */ - arch_files->arch_files_size = arch_files->arch_heap->bh_size; + arch_files->arch_files_size = binaryheap_size(arch_files->arch_heap); for (int i = 0; i < arch_files->arch_files_size; i++) arch_files->arch_files[i] = DatumGetCString(binaryheap_remove_first(arch_files->arch_heap)); @@ -867,10 +867,6 @@ ProcessPgArchInterrupts(void) if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); - /* Publish memory contexts of this process */ - if (PublishMemoryContextPending) - ProcessGetMemoryContextInterrupt(); - if (ConfigReloadPending) { char *archiveLib = pstrdup(XLogArchiveLibrary); diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c index 7149a67fcbc..27e86cf393f 100644 --- a/src/backend/postmaster/startup.c +++ b/src/backend/postmaster/startup.c @@ -192,10 +192,6 @@ ProcessStartupProcInterrupts(void) /* Perform logging of memory contexts of this process */ if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); - - /* Publish memory contexts of this process */ - if (PublishMemoryContextPending) - ProcessGetMemoryContextInterrupt(); } diff --git a/src/backend/postmaster/walsummarizer.c b/src/backend/postmaster/walsummarizer.c index c7a76711cc5..777c9a8d555 100644 --- a/src/backend/postmaster/walsummarizer.c +++ b/src/backend/postmaster/walsummarizer.c @@ -385,7 +385,7 @@ WalSummarizerMain(const void *startup_data, size_t startup_data_len) switch_lsn = tliSwitchPoint(current_tli, tles, &switch_tli); ereport(DEBUG1, - errmsg_internal("switch point from TLI %u to TLI %u is at %X/%X", + errmsg_internal("switch point from TLI %u to TLI %u is at %X/%08X", current_tli, switch_tli, LSN_FORMAT_ARGS(switch_lsn))); } @@ -741,7 +741,7 @@ WaitForWalSummarization(XLogRecPtr lsn) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("WAL summarization is not progressing"), - errdetail("Summarization is needed through %X/%X, but is stuck at %X/%X on disk and %X/%X in memory.", + errdetail("Summarization is needed through %X/%08X, but is stuck at %X/%08X on disk and %X/%08X in memory.", LSN_FORMAT_ARGS(lsn), LSN_FORMAT_ARGS(summarized_lsn), LSN_FORMAT_ARGS(pending_lsn)))); @@ -755,12 +755,12 @@ WaitForWalSummarization(XLogRecPtr lsn) current_time) / 1000; ereport(WARNING, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg_plural("still waiting for WAL summarization through %X/%X after %ld second", - "still waiting for WAL summarization through %X/%X after %ld seconds", + errmsg_plural("still waiting for WAL summarization through %X/%08X after %ld second", + "still waiting for WAL summarization through %X/%08X after %ld seconds", elapsed_seconds, LSN_FORMAT_ARGS(lsn), elapsed_seconds), - errdetail("Summarization has reached %X/%X on disk and %X/%X in memory.", + errdetail("Summarization has reached %X/%08X on disk and %X/%08X in memory.", LSN_FORMAT_ARGS(summarized_lsn), LSN_FORMAT_ARGS(pending_lsn)))); } @@ -879,10 +879,6 @@ ProcessWalSummarizerInterrupts(void) /* Perform logging of memory contexts of this process */ if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); - - /* Publish memory contexts of this process */ - if (PublishMemoryContextPending) - ProcessGetMemoryContextInterrupt(); } /* @@ -985,7 +981,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, if (private_data->end_of_wal) { ereport(DEBUG1, - errmsg_internal("could not read WAL from timeline %u at %X/%X: end of WAL at %X/%X", + errmsg_internal("could not read WAL from timeline %u at %X/%08X: end of WAL at %X/%08X", tli, LSN_FORMAT_ARGS(start_lsn), LSN_FORMAT_ARGS(private_data->read_upto))); @@ -1004,8 +1000,8 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, } else ereport(ERROR, - (errmsg("could not find a valid record after %X/%X", - LSN_FORMAT_ARGS(start_lsn)))); + errmsg("could not find a valid record after %X/%08X", + LSN_FORMAT_ARGS(start_lsn))); } /* We shouldn't go backward. */ @@ -1038,7 +1034,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, * able to read a complete record. */ ereport(DEBUG1, - errmsg_internal("could not read WAL from timeline %u at %X/%X: end of WAL at %X/%X", + errmsg_internal("could not read WAL from timeline %u at %X/%08X: end of WAL at %X/%08X", tli, LSN_FORMAT_ARGS(xlogreader->EndRecPtr), LSN_FORMAT_ARGS(private_data->read_upto))); @@ -1049,13 +1045,13 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, if (errormsg) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not read WAL from timeline %u at %X/%X: %s", + errmsg("could not read WAL from timeline %u at %X/%08X: %s", tli, LSN_FORMAT_ARGS(xlogreader->EndRecPtr), errormsg))); else ereport(ERROR, (errcode_for_file_access(), - errmsg("could not read WAL from timeline %u at %X/%X", + errmsg("could not read WAL from timeline %u at %X/%08X", tli, LSN_FORMAT_ARGS(xlogreader->EndRecPtr)))); } @@ -1226,7 +1222,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, /* Tell the user what we did. */ ereport(DEBUG1, - errmsg_internal("summarized WAL on TLI %u from %X/%X to %X/%X", + errmsg_internal("summarized WAL on TLI %u from %X/%08X to %X/%08X", tli, LSN_FORMAT_ARGS(summary_start_lsn), LSN_FORMAT_ARGS(summary_end_lsn))); @@ -1238,7 +1234,7 @@ SummarizeWAL(TimeLineID tli, XLogRecPtr start_lsn, bool exact, /* If we skipped a non-zero amount of WAL, log a debug message. */ if (summary_end_lsn > summary_start_lsn && fast_forward) ereport(DEBUG1, - errmsg_internal("skipped summarizing WAL on TLI %u from %X/%X to %X/%X", + errmsg_internal("skipped summarizing WAL on TLI %u from %X/%08X to %X/%08X", tli, LSN_FORMAT_ARGS(summary_start_lsn), LSN_FORMAT_ARGS(summary_end_lsn))); @@ -1584,7 +1580,7 @@ summarizer_read_local_xlog_page(XLogReaderState *state, /* Debugging output. */ ereport(DEBUG1, - errmsg_internal("timeline %u became historic, can read up to %X/%X", + errmsg_internal("timeline %u became historic, can read up to %X/%08X", private_data->tli, LSN_FORMAT_ARGS(private_data->read_upto))); } diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index 78193cfb964..d9eab5357bc 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -20,58 +20,13 @@ #include "common/unicode_category.h" #include "utils/pg_locale.h" -/* - * For the libc provider, to provide as much functionality as possible on a - * variety of platforms without going so far as to implement everything from - * scratch, we use several implementation strategies depending on the - * situation: - * - * 1. In C/POSIX collations, we use hard-wired code. We can't depend on - * the <ctype.h> functions since those will obey LC_CTYPE. Note that these - * collations don't give a fig about multibyte characters. - * - * 2. When working in UTF8 encoding, we use the <wctype.h> functions. - * This assumes that every platform uses Unicode codepoints directly - * as the wchar_t representation of Unicode. (XXX: ICU makes this assumption - * even for non-UTF8 encodings, which may be a problem.) On some platforms - * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF. - * - * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar - * values up to 255, and punt for values above that. This is 100% correct - * only in single-byte encodings such as LATINn. However, non-Unicode - * multibyte encodings are mostly Far Eastern character sets for which the - * properties being tested here aren't very relevant for higher code values - * anyway. The difficulty with using the <wctype.h> functions with - * non-Unicode multibyte encodings is that we can have no certainty that - * the platform's wchar_t representation matches what we do in pg_wchar - * conversions. - * - * As a special case, in the "default" collation, (2) and (3) force ASCII - * letters to follow ASCII upcase/downcase rules, while in a non-default - * collation we just let the library functions do what they will. The case - * where this matters is treatment of I/i in Turkish, and the behavior is - * meant to match the upper()/lower() SQL functions. - * - * We store the active collation setting in static variables. In principle - * it could be passed down to here via the regex library's "struct vars" data - * structure; but that would require somewhat invasive changes in the regex - * library, and right now there's no real benefit to be gained from that. - * - * NB: the coding here assumes pg_wchar is an unsigned type. - */ - -typedef enum -{ - PG_REGEX_STRATEGY_C, /* C locale (encoding independent) */ - PG_REGEX_STRATEGY_BUILTIN, /* built-in Unicode semantics */ - PG_REGEX_STRATEGY_LIBC_WIDE, /* Use locale_t <wctype.h> functions */ - PG_REGEX_STRATEGY_LIBC_1BYTE, /* Use locale_t <ctype.h> functions */ - PG_REGEX_STRATEGY_ICU, /* Use ICU uchar.h functions */ -} PG_Locale_Strategy; - -static PG_Locale_Strategy pg_regex_strategy; static pg_locale_t pg_regex_locale; +static struct pg_locale_struct dummy_c_locale = { + .collate_is_c = true, + .ctype_is_c = true, +}; + /* * Hard-wired character properties for C locale */ @@ -228,7 +183,6 @@ void pg_set_regex_collation(Oid collation) { pg_locale_t locale = 0; - PG_Locale_Strategy strategy; if (!OidIsValid(collation)) { @@ -249,8 +203,7 @@ pg_set_regex_collation(Oid collation) * catalog access is available, so we can't call * pg_newlocale_from_collation(). */ - strategy = PG_REGEX_STRATEGY_C; - locale = 0; + locale = &dummy_c_locale; } else { @@ -267,113 +220,41 @@ pg_set_regex_collation(Oid collation) * C/POSIX collations use this path regardless of database * encoding */ - strategy = PG_REGEX_STRATEGY_C; - locale = 0; - } - else if (locale->provider == COLLPROVIDER_BUILTIN) - { - Assert(GetDatabaseEncoding() == PG_UTF8); - strategy = PG_REGEX_STRATEGY_BUILTIN; - } -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - { - strategy = PG_REGEX_STRATEGY_ICU; - } -#endif - else - { - Assert(locale->provider == COLLPROVIDER_LIBC); - if (GetDatabaseEncoding() == PG_UTF8) - strategy = PG_REGEX_STRATEGY_LIBC_WIDE; - else - strategy = PG_REGEX_STRATEGY_LIBC_1BYTE; + locale = &dummy_c_locale; } } - pg_regex_strategy = strategy; pg_regex_locale = locale; } static int pg_wc_isdigit(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISDIGIT)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isdigit(c, !pg_regex_locale->info.builtin.casemap_full); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswdigit_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isdigit_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isdigit(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISDIGIT)); + else + return pg_regex_locale->ctype->wc_isdigit(c, pg_regex_locale); } static int pg_wc_isalpha(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISALPHA)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isalpha(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalpha_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isalpha_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isalpha(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISALPHA)); + else + return pg_regex_locale->ctype->wc_isalpha(c, pg_regex_locale); } static int pg_wc_isalnum(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISALNUM)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isalnum(c, !pg_regex_locale->info.builtin.casemap_full); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalnum_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isalnum_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isalnum(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISALNUM)); + else + return pg_regex_locale->ctype->wc_isalnum(c, pg_regex_locale); } static int @@ -388,231 +269,87 @@ pg_wc_isword(pg_wchar c) static int pg_wc_isupper(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISUPPER)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isupper(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswupper_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isupper_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isupper(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISUPPER)); + else + return pg_regex_locale->ctype->wc_isupper(c, pg_regex_locale); } static int pg_wc_islower(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISLOWER)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_islower(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswlower_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - islower_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_islower(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISLOWER)); + else + return pg_regex_locale->ctype->wc_islower(c, pg_regex_locale); } static int pg_wc_isgraph(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISGRAPH)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isgraph(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswgraph_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isgraph_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isgraph(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISGRAPH)); + else + return pg_regex_locale->ctype->wc_isgraph(c, pg_regex_locale); } static int pg_wc_isprint(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISPRINT)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isprint(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswprint_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isprint_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isprint(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISPRINT)); + else + return pg_regex_locale->ctype->wc_isprint(c, pg_regex_locale); } static int pg_wc_ispunct(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISPUNCT)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_ispunct(c, !pg_regex_locale->info.builtin.casemap_full); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswpunct_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - ispunct_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_ispunct(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISPUNCT)); + else + return pg_regex_locale->ctype->wc_ispunct(c, pg_regex_locale); } static int pg_wc_isspace(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISSPACE)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isspace(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswspace_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isspace_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isspace(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISSPACE)); + else + return pg_regex_locale->ctype->wc_isspace(c, pg_regex_locale); } static pg_wchar pg_wc_toupper(pg_wchar c) { - switch (pg_regex_strategy) + if (pg_regex_locale->ctype_is_c) { - case PG_REGEX_STRATEGY_C: - if (c <= (pg_wchar) 127) - return pg_ascii_toupper((unsigned char) c); - return c; - case PG_REGEX_STRATEGY_BUILTIN: - return unicode_uppercase_simple(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - /* force C behavior for ASCII characters, per comments above */ - if (pg_regex_locale->is_default && c <= (pg_wchar) 127) - return pg_ascii_toupper((unsigned char) c); - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towupper_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - /* force C behavior for ASCII characters, per comments above */ - if (pg_regex_locale->is_default && c <= (pg_wchar) 127) - return pg_ascii_toupper((unsigned char) c); - if (c <= (pg_wchar) UCHAR_MAX) - return toupper_l((unsigned char) c, pg_regex_locale->info.lt); - return c; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_toupper(c); -#endif - break; + if (c <= (pg_wchar) 127) + return pg_ascii_toupper((unsigned char) c); + return c; } - return 0; /* can't get here, but keep compiler quiet */ + else + return pg_regex_locale->ctype->wc_toupper(c, pg_regex_locale); } static pg_wchar pg_wc_tolower(pg_wchar c) { - switch (pg_regex_strategy) + if (pg_regex_locale->ctype_is_c) { - case PG_REGEX_STRATEGY_C: - if (c <= (pg_wchar) 127) - return pg_ascii_tolower((unsigned char) c); - return c; - case PG_REGEX_STRATEGY_BUILTIN: - return unicode_lowercase_simple(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - /* force C behavior for ASCII characters, per comments above */ - if (pg_regex_locale->is_default && c <= (pg_wchar) 127) - return pg_ascii_tolower((unsigned char) c); - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towlower_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - /* force C behavior for ASCII characters, per comments above */ - if (pg_regex_locale->is_default && c <= (pg_wchar) 127) - return pg_ascii_tolower((unsigned char) c); - if (c <= (pg_wchar) UCHAR_MAX) - return tolower_l((unsigned char) c, pg_regex_locale->info.lt); - return c; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_tolower(c); -#endif - break; + if (c <= (pg_wchar) 127) + return pg_ascii_tolower((unsigned char) c); + return c; } - return 0; /* can't get here, but keep compiler quiet */ + else + return pg_regex_locale->ctype->wc_tolower(c, pg_regex_locale); } @@ -738,37 +475,25 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode) * would always be true for production values of MAX_SIMPLE_CHR, but it's * useful to allow it to be small for testing purposes.) */ - switch (pg_regex_strategy) + if (pg_regex_locale->ctype_is_c) { - case PG_REGEX_STRATEGY_C: #if MAX_SIMPLE_CHR >= 127 - max_chr = (pg_wchar) 127; - pcc->cv.cclasscode = -1; + max_chr = (pg_wchar) 127; + pcc->cv.cclasscode = -1; #else - max_chr = (pg_wchar) MAX_SIMPLE_CHR; + max_chr = (pg_wchar) MAX_SIMPLE_CHR; #endif - break; - case PG_REGEX_STRATEGY_BUILTIN: - max_chr = (pg_wchar) MAX_SIMPLE_CHR; - break; - case PG_REGEX_STRATEGY_LIBC_WIDE: - max_chr = (pg_wchar) MAX_SIMPLE_CHR; - break; - case PG_REGEX_STRATEGY_LIBC_1BYTE: -#if MAX_SIMPLE_CHR >= UCHAR_MAX - max_chr = (pg_wchar) UCHAR_MAX; + } + else + { + if (pg_regex_locale->ctype->max_chr != 0 && + pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR) + { + max_chr = pg_regex_locale->ctype->max_chr; pcc->cv.cclasscode = -1; -#else - max_chr = (pg_wchar) MAX_SIMPLE_CHR; -#endif - break; - case PG_REGEX_STRATEGY_ICU: + } + else max_chr = (pg_wchar) MAX_SIMPLE_CHR; - break; - default: - Assert(false); - max_chr = 0; /* can't get here, but keep compiler quiet */ - break; } /* diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c index 7b4ddf7a8f5..f7b5d093681 100644 --- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c +++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c @@ -534,7 +534,7 @@ libpqrcv_startstreaming(WalReceiverConn *conn, if (options->logical) appendStringInfoString(&cmd, " LOGICAL"); - appendStringInfo(&cmd, " %X/%X", LSN_FORMAT_ARGS(options->startpoint)); + appendStringInfo(&cmd, " %X/%08X", LSN_FORMAT_ARGS(options->startpoint)); /* * Additional options are different depending on if we are doing logical diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index 10677da56b2..4aed0dfcebb 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -175,12 +175,14 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker, uint16 generation, BackgroundWorkerHandle *handle) { - BgwHandleStatus status; - int rc; + bool result = false; + bool dropped_latch = false; for (;;) { + BgwHandleStatus status; pid_t pid; + int rc; CHECK_FOR_INTERRUPTS(); @@ -189,8 +191,9 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker, /* Worker either died or has started. Return false if died. */ if (!worker->in_use || worker->proc) { + result = worker->in_use; LWLockRelease(LogicalRepWorkerLock); - return worker->in_use; + break; } LWLockRelease(LogicalRepWorkerLock); @@ -205,7 +208,7 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker, if (generation == worker->generation) logicalrep_worker_cleanup(worker); LWLockRelease(LogicalRepWorkerLock); - return false; + break; /* result is already false */ } /* @@ -220,8 +223,18 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker, { ResetLatch(MyLatch); CHECK_FOR_INTERRUPTS(); + dropped_latch = true; } } + + /* + * If we had to clear a latch event in order to wait, be sure to restore + * it before exiting. Otherwise caller may miss events. + */ + if (dropped_latch) + SetLatch(MyLatch); + + return result; } /* @@ -328,7 +341,7 @@ logicalrep_worker_launch(LogicalRepWorkerType wtype, if (max_active_replication_origins == 0) ereport(ERROR, (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED), - errmsg("cannot start logical replication workers when \"max_active_replication_origins\"=0"))); + errmsg("cannot start logical replication workers when \"max_active_replication_origins\" is 0"))); /* * We need to do the modification of the shared memory under lock so that @@ -1016,7 +1029,7 @@ logicalrep_launcher_attach_dshmem(void) last_start_times_dsa = dsa_attach(LogicalRepCtx->last_start_dsa); dsa_pin_mapping(last_start_times_dsa); last_start_times = dshash_attach(last_start_times_dsa, &dsh_params, - LogicalRepCtx->last_start_dsh, 0); + LogicalRepCtx->last_start_dsh, NULL); } MemoryContextSwitchTo(oldcontext); @@ -1194,10 +1207,21 @@ ApplyLauncherMain(Datum main_arg) (elapsed = TimestampDifferenceMilliseconds(last_start, now)) >= wal_retrieve_retry_interval) { ApplyLauncherSetWorkerStartTime(sub->oid, now); - logicalrep_worker_launch(WORKERTYPE_APPLY, - sub->dbid, sub->oid, sub->name, - sub->owner, InvalidOid, - DSM_HANDLE_INVALID); + if (!logicalrep_worker_launch(WORKERTYPE_APPLY, + sub->dbid, sub->oid, sub->name, + sub->owner, InvalidOid, + DSM_HANDLE_INVALID)) + { + /* + * We get here either if we failed to launch a worker + * (perhaps for resource-exhaustion reasons) or if we + * launched one but it immediately quit. Either way, it + * seems appropriate to try again after + * wal_retrieve_retry_interval. + */ + wait_time = Min(wait_time, + wal_retrieve_retry_interval); + } } else { diff --git a/src/backend/replication/logical/logical.c b/src/backend/replication/logical/logical.c index 1d56d0c4ef3..7e363a7c05b 100644 --- a/src/backend/replication/logical/logical.c +++ b/src/backend/replication/logical/logical.c @@ -29,6 +29,7 @@ #include "postgres.h" #include "access/xact.h" +#include "access/xlog_internal.h" #include "access/xlogutils.h" #include "fmgr.h" #include "miscadmin.h" @@ -41,6 +42,7 @@ #include "storage/proc.h" #include "storage/procarray.h" #include "utils/builtins.h" +#include "utils/injection_point.h" #include "utils/inval.h" #include "utils/memutils.h" @@ -565,7 +567,7 @@ CreateDecodingContext(XLogRecPtr start_lsn, * kinds of client errors; so the client may wish to check that * confirmed_flush_lsn matches its expectations. */ - elog(LOG, "%X/%X has been already streamed, forwarding to %X/%X", + elog(LOG, "%X/%08X has been already streamed, forwarding to %X/%08X", LSN_FORMAT_ARGS(start_lsn), LSN_FORMAT_ARGS(slot->data.confirmed_flush)); @@ -608,7 +610,7 @@ CreateDecodingContext(XLogRecPtr start_lsn, ereport(LOG, (errmsg("starting logical decoding for slot \"%s\"", NameStr(slot->data.name)), - errdetail("Streaming transactions committing after %X/%X, reading WAL from %X/%X.", + errdetail("Streaming transactions committing after %X/%08X, reading WAL from %X/%08X.", LSN_FORMAT_ARGS(slot->data.confirmed_flush), LSN_FORMAT_ARGS(slot->data.restart_lsn)))); @@ -635,7 +637,7 @@ DecodingContextFindStartpoint(LogicalDecodingContext *ctx) /* Initialize from where to start reading WAL. */ XLogBeginRead(ctx->reader, slot->data.restart_lsn); - elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%X", + elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%08X", LSN_FORMAT_ARGS(slot->data.restart_lsn)); /* Wait for a consistent starting point */ @@ -756,7 +758,7 @@ output_plugin_error_callback(void *arg) /* not all callbacks have an associated LSN */ if (state->report_location != InvalidXLogRecPtr) - errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%X", + errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%08X", NameStr(state->ctx->slot->data.name), NameStr(state->ctx->slot->data.plugin), state->callback_name, @@ -1723,7 +1725,7 @@ LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin) SpinLockRelease(&slot->mutex); if (got_new_xmin) - elog(DEBUG1, "got new catalog xmin %u at %X/%X", xmin, + elog(DEBUG1, "got new catalog xmin %u at %X/%08X", xmin, LSN_FORMAT_ARGS(current_lsn)); /* candidate already valid with the current flush position, apply */ @@ -1783,7 +1785,7 @@ LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart slot->candidate_restart_lsn = restart_lsn; SpinLockRelease(&slot->mutex); - elog(DEBUG1, "got new restart lsn %X/%X at %X/%X", + elog(DEBUG1, "got new restart lsn %X/%08X at %X/%08X", LSN_FORMAT_ARGS(restart_lsn), LSN_FORMAT_ARGS(current_lsn)); } @@ -1798,7 +1800,7 @@ LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart confirmed_flush = slot->data.confirmed_flush; SpinLockRelease(&slot->mutex); - elog(DEBUG1, "failed to increase restart lsn: proposed %X/%X, after %X/%X, current candidate %X/%X, current after %X/%X, flushed up to %X/%X", + elog(DEBUG1, "failed to increase restart lsn: proposed %X/%08X, after %X/%08X, current candidate %X/%08X, current after %X/%08X, flushed up to %X/%08X", LSN_FORMAT_ARGS(restart_lsn), LSN_FORMAT_ARGS(current_lsn), LSN_FORMAT_ARGS(candidate_restart_lsn), @@ -1825,9 +1827,13 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn) { bool updated_xmin = false; bool updated_restart = false; + XLogRecPtr restart_lsn pg_attribute_unused(); SpinLockAcquire(&MyReplicationSlot->mutex); + /* remember the old restart lsn */ + restart_lsn = MyReplicationSlot->data.restart_lsn; + /* * Prevent moving the confirmed_flush backwards, as this could lead to * data duplication issues caused by replicating already replicated @@ -1881,6 +1887,18 @@ LogicalConfirmReceivedLocation(XLogRecPtr lsn) /* first write new xmin to disk, so we know what's up after a crash */ if (updated_xmin || updated_restart) { +#ifdef USE_INJECTION_POINTS + XLogSegNo seg1, + seg2; + + XLByteToSeg(restart_lsn, seg1, wal_segment_size); + XLByteToSeg(MyReplicationSlot->data.restart_lsn, seg2, wal_segment_size); + + /* trigger injection point, but only if segment changes */ + if (seg1 != seg2) + INJECTION_POINT("logical-replication-slot-advance-segment", NULL); +#endif + ReplicationSlotMarkDirty(); ReplicationSlotSave(); elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart); diff --git a/src/backend/replication/logical/origin.c b/src/backend/replication/logical/origin.c index a17bacf88e7..87f10e50dcc 100644 --- a/src/backend/replication/logical/origin.c +++ b/src/backend/replication/logical/origin.c @@ -826,9 +826,9 @@ StartupReplicationOrigin(void) last_state++; ereport(LOG, - (errmsg("recovered replication state of node %d to %X/%X", - disk_state.roident, - LSN_FORMAT_ARGS(disk_state.remote_lsn)))); + errmsg("recovered replication state of node %d to %X/%08X", + disk_state.roident, + LSN_FORMAT_ARGS(disk_state.remote_lsn))); } /* now check checksum */ diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 67655111875..7b4e8629553 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -109,10 +109,22 @@ #include "storage/procarray.h" #include "storage/sinval.h" #include "utils/builtins.h" +#include "utils/inval.h" #include "utils/memutils.h" #include "utils/rel.h" #include "utils/relfilenumbermap.h" +/* + * Each transaction has an 8MB limit for invalidation messages distributed from + * other transactions. This limit is set considering scenarios with many + * concurrent logical decoding operations. When the distributed invalidation + * messages reach this threshold, the transaction is marked as + * RBTXN_DISTR_INVAL_OVERFLOWED to invalidate the complete cache as we have lost + * some inval messages and hence don't know what needs to be invalidated. + */ +#define MAX_DISTR_INVAL_MSG_PER_TXN \ + ((8 * 1024 * 1024) / sizeof(SharedInvalidationMessage)) + /* entry for a hash table we use to map from xid to our transaction state */ typedef struct ReorderBufferTXNByIdEnt { @@ -472,6 +484,12 @@ ReorderBufferFreeTXN(ReorderBuffer *rb, ReorderBufferTXN *txn) txn->invalidations = NULL; } + if (txn->invalidations_distributed) + { + pfree(txn->invalidations_distributed); + txn->invalidations_distributed = NULL; + } + /* Reset the toast hash */ ReorderBufferToastReset(rb, txn); @@ -1397,7 +1415,7 @@ ReorderBufferIterTXNNext(ReorderBuffer *rb, ReorderBufferIterTXNState *state) int32 off; /* nothing there anymore */ - if (state->heap->bh_size == 0) + if (binaryheap_empty(state->heap)) return NULL; off = DatumGetInt32(binaryheap_first(state->heap)); @@ -2661,7 +2679,17 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, AbortCurrentTransaction(); /* make sure there's no cache pollution */ - ReorderBufferExecuteInvalidations(txn->ninvalidations, txn->invalidations); + if (rbtxn_distr_inval_overflowed(txn)) + { + Assert(txn->ninvalidations_distributed == 0); + InvalidateSystemCaches(); + } + else + { + ReorderBufferExecuteInvalidations(txn->ninvalidations, txn->invalidations); + ReorderBufferExecuteInvalidations(txn->ninvalidations_distributed, + txn->invalidations_distributed); + } if (using_subtxn) RollbackAndReleaseCurrentSubTransaction(); @@ -2710,8 +2738,17 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, AbortCurrentTransaction(); /* make sure there's no cache pollution */ - ReorderBufferExecuteInvalidations(txn->ninvalidations, - txn->invalidations); + if (rbtxn_distr_inval_overflowed(txn)) + { + Assert(txn->ninvalidations_distributed == 0); + InvalidateSystemCaches(); + } + else + { + ReorderBufferExecuteInvalidations(txn->ninvalidations, txn->invalidations); + ReorderBufferExecuteInvalidations(txn->ninvalidations_distributed, + txn->invalidations_distributed); + } if (using_subtxn) RollbackAndReleaseCurrentSubTransaction(); @@ -3060,7 +3097,8 @@ ReorderBufferAbort(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, * We might have decoded changes for this transaction that could load * the cache as per the current transaction's view (consider DDL's * happened in this transaction). We don't want the decoding of future - * transactions to use those cache entries so execute invalidations. + * transactions to use those cache entries so execute only the inval + * messages in this transaction. */ if (txn->ninvalidations > 0) ReorderBufferImmediateInvalidation(rb, txn->ninvalidations, @@ -3147,9 +3185,10 @@ ReorderBufferForget(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn) txn->final_lsn = lsn; /* - * Process cache invalidation messages if there are any. Even if we're not - * interested in the transaction's contents, it could have manipulated the - * catalog and we need to update the caches according to that. + * Process only cache invalidation messages in this transaction if there + * are any. Even if we're not interested in the transaction's contents, it + * could have manipulated the catalog and we need to update the caches + * according to that. */ if (txn->base_snapshot != NULL && txn->ninvalidations > 0) ReorderBufferImmediateInvalidation(rb, txn->ninvalidations, @@ -3422,6 +3461,57 @@ ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid, } /* + * Add new invalidation messages to the reorder buffer queue. + */ +static void +ReorderBufferQueueInvalidations(ReorderBuffer *rb, TransactionId xid, + XLogRecPtr lsn, Size nmsgs, + SharedInvalidationMessage *msgs) +{ + ReorderBufferChange *change; + + change = ReorderBufferAllocChange(rb); + change->action = REORDER_BUFFER_CHANGE_INVALIDATION; + change->data.inval.ninvalidations = nmsgs; + change->data.inval.invalidations = (SharedInvalidationMessage *) + palloc(sizeof(SharedInvalidationMessage) * nmsgs); + memcpy(change->data.inval.invalidations, msgs, + sizeof(SharedInvalidationMessage) * nmsgs); + + ReorderBufferQueueChange(rb, xid, lsn, change, false); +} + +/* + * A helper function for ReorderBufferAddInvalidations() and + * ReorderBufferAddDistributedInvalidations() to accumulate the invalidation + * messages to the **invals_out. + */ +static void +ReorderBufferAccumulateInvalidations(SharedInvalidationMessage **invals_out, + uint32 *ninvals_out, + SharedInvalidationMessage *msgs_new, + Size nmsgs_new) +{ + if (*ninvals_out == 0) + { + *ninvals_out = nmsgs_new; + *invals_out = (SharedInvalidationMessage *) + palloc(sizeof(SharedInvalidationMessage) * nmsgs_new); + memcpy(*invals_out, msgs_new, sizeof(SharedInvalidationMessage) * nmsgs_new); + } + else + { + /* Enlarge the array of inval messages */ + *invals_out = (SharedInvalidationMessage *) + repalloc(*invals_out, sizeof(SharedInvalidationMessage) * + (*ninvals_out + nmsgs_new)); + memcpy(*invals_out + *ninvals_out, msgs_new, + nmsgs_new * sizeof(SharedInvalidationMessage)); + *ninvals_out += nmsgs_new; + } +} + +/* * Accumulate the invalidations for executing them later. * * This needs to be called for each XLOG_XACT_INVALIDATIONS message and @@ -3441,7 +3531,6 @@ ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid, { ReorderBufferTXN *txn; MemoryContext oldcontext; - ReorderBufferChange *change; txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true); @@ -3456,35 +3545,76 @@ ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid, Assert(nmsgs > 0); - /* Accumulate invalidations. */ - if (txn->ninvalidations == 0) - { - txn->ninvalidations = nmsgs; - txn->invalidations = (SharedInvalidationMessage *) - palloc(sizeof(SharedInvalidationMessage) * nmsgs); - memcpy(txn->invalidations, msgs, - sizeof(SharedInvalidationMessage) * nmsgs); - } - else + ReorderBufferAccumulateInvalidations(&txn->invalidations, + &txn->ninvalidations, + msgs, nmsgs); + + ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Accumulate the invalidations distributed by other committed transactions + * for executing them later. + * + * This function is similar to ReorderBufferAddInvalidations() but stores + * the given inval messages to the txn->invalidations_distributed with the + * overflow check. + * + * This needs to be called by committed transactions to distribute their + * inval messages to in-progress transactions. + */ +void +ReorderBufferAddDistributedInvalidations(ReorderBuffer *rb, TransactionId xid, + XLogRecPtr lsn, Size nmsgs, + SharedInvalidationMessage *msgs) +{ + ReorderBufferTXN *txn; + MemoryContext oldcontext; + + txn = ReorderBufferTXNByXid(rb, xid, true, NULL, lsn, true); + + oldcontext = MemoryContextSwitchTo(rb->context); + + /* + * Collect all the invalidations under the top transaction, if available, + * so that we can execute them all together. See comments + * ReorderBufferAddInvalidations. + */ + txn = rbtxn_get_toptxn(txn); + + Assert(nmsgs > 0); + + if (!rbtxn_distr_inval_overflowed(txn)) { - txn->invalidations = (SharedInvalidationMessage *) - repalloc(txn->invalidations, sizeof(SharedInvalidationMessage) * - (txn->ninvalidations + nmsgs)); + /* + * Check the transaction has enough space for storing distributed + * invalidation messages. + */ + if (txn->ninvalidations_distributed + nmsgs >= MAX_DISTR_INVAL_MSG_PER_TXN) + { + /* + * Mark the invalidation message as overflowed and free up the + * messages accumulated so far. + */ + txn->txn_flags |= RBTXN_DISTR_INVAL_OVERFLOWED; - memcpy(txn->invalidations + txn->ninvalidations, msgs, - nmsgs * sizeof(SharedInvalidationMessage)); - txn->ninvalidations += nmsgs; + if (txn->invalidations_distributed) + { + pfree(txn->invalidations_distributed); + txn->invalidations_distributed = NULL; + txn->ninvalidations_distributed = 0; + } + } + else + ReorderBufferAccumulateInvalidations(&txn->invalidations_distributed, + &txn->ninvalidations_distributed, + msgs, nmsgs); } - change = ReorderBufferAllocChange(rb); - change->action = REORDER_BUFFER_CHANGE_INVALIDATION; - change->data.inval.ninvalidations = nmsgs; - change->data.inval.invalidations = (SharedInvalidationMessage *) - palloc(sizeof(SharedInvalidationMessage) * nmsgs); - memcpy(change->data.inval.invalidations, msgs, - sizeof(SharedInvalidationMessage) * nmsgs); - - ReorderBufferQueueChange(rb, xid, lsn, change, false); + /* Queue the invalidation messages into the transaction */ + ReorderBufferQueueInvalidations(rb, xid, lsn, nmsgs, msgs); MemoryContextSwitchTo(oldcontext); } diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c index 656e66e0ae0..2f0c08b8fbd 100644 --- a/src/backend/replication/logical/slotsync.c +++ b/src/backend/replication/logical/slotsync.c @@ -211,9 +211,9 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid, * impact the users, so we used DEBUG1 level to log the message. */ ereport(slot->data.persistency == RS_TEMPORARY ? LOG : DEBUG1, - errmsg("could not synchronize replication slot \"%s\" because remote slot precedes local slot", + errmsg("could not synchronize replication slot \"%s\"", remote_slot->name), - errdetail("The remote slot has LSN %X/%X and catalog xmin %u, but the local slot has LSN %X/%X and catalog xmin %u.", + errdetail("Synchronization could lead to data loss, because the remote slot needs WAL at LSN %X/%08X and catalog xmin %u, but the standby has LSN %X/%08X and catalog xmin %u.", LSN_FORMAT_ARGS(remote_slot->restart_lsn), remote_slot->catalog_xmin, LSN_FORMAT_ARGS(slot->data.restart_lsn), @@ -275,7 +275,7 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid, ereport(ERROR, errmsg_internal("synchronized confirmed_flush for slot \"%s\" differs from remote slot", remote_slot->name), - errdetail_internal("Remote slot has LSN %X/%X but local slot has LSN %X/%X.", + errdetail_internal("Remote slot has LSN %X/%08X but local slot has LSN %X/%08X.", LSN_FORMAT_ARGS(remote_slot->confirmed_lsn), LSN_FORMAT_ARGS(slot->data.confirmed_flush))); } @@ -593,7 +593,7 @@ update_and_persist_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid) { ereport(LOG, errmsg("could not synchronize replication slot \"%s\"", remote_slot->name), - errdetail("Logical decoding could not find consistent point from local slot's LSN %X/%X.", + errdetail("Synchronization could lead to data loss, because the standby could not build a consistent snapshot to decode WALs at LSN %X/%08X.", LSN_FORMAT_ARGS(slot->data.restart_lsn))); return false; @@ -642,7 +642,7 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid) ereport(AmLogicalSlotSyncWorkerProcess() ? LOG : ERROR, errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("skipping slot synchronization because the received slot sync" - " LSN %X/%X for slot \"%s\" is ahead of the standby position %X/%X", + " LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X", LSN_FORMAT_ARGS(remote_slot->confirmed_lsn), remote_slot->name, LSN_FORMAT_ARGS(latestFlushPtr))); @@ -733,7 +733,7 @@ synchronize_one_slot(RemoteSlot *remote_slot, Oid remote_dbid) ereport(ERROR, errmsg_internal("cannot synchronize local slot \"%s\"", remote_slot->name), - errdetail_internal("Local slot's start streaming location LSN(%X/%X) is ahead of remote slot's LSN(%X/%X).", + errdetail_internal("Local slot's start streaming location LSN(%X/%08X) is ahead of remote slot's LSN(%X/%08X).", LSN_FORMAT_ARGS(slot->data.confirmed_flush), LSN_FORMAT_ARGS(remote_slot->confirmed_lsn))); diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 0d7bddbe4ed..8532bfd27e5 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -774,7 +774,7 @@ SnapBuildDistributeSnapshotAndInval(SnapBuild *builder, XLogRecPtr lsn, Transact if (rbtxn_is_prepared(txn)) continue; - elog(DEBUG2, "adding a new snapshot and invalidations to %u at %X/%X", + elog(DEBUG2, "adding a new snapshot and invalidations to %u at %X/%08X", txn->xid, LSN_FORMAT_ARGS(lsn)); /* @@ -794,6 +794,13 @@ SnapBuildDistributeSnapshotAndInval(SnapBuild *builder, XLogRecPtr lsn, Transact * contents built by the current transaction even after its decoding, * which should have been invalidated due to concurrent catalog * changing transaction. + * + * Distribute only the invalidation messages generated by the current + * committed transaction. Invalidation messages received from other + * transactions would have already been propagated to the relevant + * in-progress transactions. This transaction would have processed + * those invalidations, ensuring that subsequent transactions observe + * a consistent cache state. */ if (txn->xid != xid) { @@ -807,8 +814,9 @@ SnapBuildDistributeSnapshotAndInval(SnapBuild *builder, XLogRecPtr lsn, Transact { Assert(msgs != NULL); - ReorderBufferAddInvalidations(builder->reorder, txn->xid, lsn, - ninvalidations, msgs); + ReorderBufferAddDistributedInvalidations(builder->reorder, + txn->xid, lsn, + ninvalidations, msgs); } } } @@ -1263,10 +1271,10 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn builder->initial_xmin_horizon)) { ereport(DEBUG1, - (errmsg_internal("skipping snapshot at %X/%X while building logical decoding snapshot, xmin horizon too low", - LSN_FORMAT_ARGS(lsn)), - errdetail_internal("initial xmin horizon of %u vs the snapshot's %u", - builder->initial_xmin_horizon, running->oldestRunningXid))); + errmsg_internal("skipping snapshot at %X/%08X while building logical decoding snapshot, xmin horizon too low", + LSN_FORMAT_ARGS(lsn)), + errdetail_internal("initial xmin horizon of %u vs the snapshot's %u", + builder->initial_xmin_horizon, running->oldestRunningXid)); SnapBuildWaitSnapshot(running, builder->initial_xmin_horizon); @@ -1302,9 +1310,9 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn builder->next_phase_at = InvalidTransactionId; ereport(LOG, - (errmsg("logical decoding found consistent point at %X/%X", - LSN_FORMAT_ARGS(lsn)), - errdetail("There are no running transactions."))); + errmsg("logical decoding found consistent point at %X/%08X", + LSN_FORMAT_ARGS(lsn)), + errdetail("There are no running transactions.")); return false; } @@ -1351,10 +1359,10 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn Assert(TransactionIdIsNormal(builder->xmax)); ereport(LOG, - (errmsg("logical decoding found initial starting point at %X/%X", - LSN_FORMAT_ARGS(lsn)), - errdetail("Waiting for transactions (approximately %d) older than %u to end.", - running->xcnt, running->nextXid))); + errmsg("logical decoding found initial starting point at %X/%08X", + LSN_FORMAT_ARGS(lsn)), + errdetail("Waiting for transactions (approximately %d) older than %u to end.", + running->xcnt, running->nextXid)); SnapBuildWaitSnapshot(running, running->nextXid); } @@ -1375,10 +1383,10 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn builder->next_phase_at = running->nextXid; ereport(LOG, - (errmsg("logical decoding found initial consistent point at %X/%X", - LSN_FORMAT_ARGS(lsn)), - errdetail("Waiting for transactions (approximately %d) older than %u to end.", - running->xcnt, running->nextXid))); + errmsg("logical decoding found initial consistent point at %X/%08X", + LSN_FORMAT_ARGS(lsn)), + errdetail("Waiting for transactions (approximately %d) older than %u to end.", + running->xcnt, running->nextXid)); SnapBuildWaitSnapshot(running, running->nextXid); } @@ -1399,9 +1407,9 @@ SnapBuildFindSnapshot(SnapBuild *builder, XLogRecPtr lsn, xl_running_xacts *runn builder->next_phase_at = InvalidTransactionId; ereport(LOG, - (errmsg("logical decoding found consistent point at %X/%X", - LSN_FORMAT_ARGS(lsn)), - errdetail("There are no old transactions anymore."))); + errmsg("logical decoding found consistent point at %X/%08X", + LSN_FORMAT_ARGS(lsn)), + errdetail("There are no old transactions anymore.")); } /* @@ -1905,9 +1913,9 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn) Assert(builder->state == SNAPBUILD_CONSISTENT); ereport(LOG, - (errmsg("logical decoding found consistent point at %X/%X", - LSN_FORMAT_ARGS(lsn)), - errdetail("Logical decoding will begin using saved snapshot."))); + errmsg("logical decoding found consistent point at %X/%08X", + LSN_FORMAT_ARGS(lsn)), + errdetail("Logical decoding will begin using saved snapshot.")); return true; snapshot_not_interesting: @@ -2053,7 +2061,7 @@ SnapBuildSnapshotExists(XLogRecPtr lsn) int ret; struct stat stat_buf; - sprintf(path, "%s/%X-%X.snap", + sprintf(path, "%s/%08X-%08X.snap", PG_LOGICAL_SNAPSHOTS_DIR, LSN_FORMAT_ARGS(lsn)); diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c index 8e1e8762f62..e4fd6347fd1 100644 --- a/src/backend/replication/logical/tablesync.c +++ b/src/backend/replication/logical/tablesync.c @@ -603,14 +603,19 @@ process_syncing_tables_for_apply(XLogRecPtr current_lsn) TimestampDifferenceExceeds(hentry->last_start_time, now, wal_retrieve_retry_interval)) { - logicalrep_worker_launch(WORKERTYPE_TABLESYNC, - MyLogicalRepWorker->dbid, - MySubscription->oid, - MySubscription->name, - MyLogicalRepWorker->userid, - rstate->relid, - DSM_HANDLE_INVALID); + /* + * Set the last_start_time even if we fail to start + * the worker, so that we won't retry until + * wal_retrieve_retry_interval has elapsed. + */ hentry->last_start_time = now; + (void) logicalrep_worker_launch(WORKERTYPE_TABLESYNC, + MyLogicalRepWorker->dbid, + MySubscription->oid, + MySubscription->name, + MyLogicalRepWorker->userid, + rstate->relid, + DSM_HANDLE_INVALID); } } } @@ -1548,7 +1553,7 @@ LogicalRepSyncTableStart(XLogRecPtr *origin_startpos) copy_table_done: elog(DEBUG1, - "LogicalRepSyncTableStart: '%s' origin_startpos lsn %X/%X", + "LogicalRepSyncTableStart: '%s' origin_startpos lsn %X/%08X", originname, LSN_FORMAT_ARGS(*origin_startpos)); /* diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 4151a4b2a96..c5fb627aa56 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -109,13 +109,6 @@ * If ever a user needs to be aware of the tri-state value, they can fetch it * from the pg_subscription catalog (see column subtwophasestate). * - * We don't allow to toggle two_phase option of a subscription because it can - * lead to an inconsistent replica. Consider, initially, it was on and we have - * received some prepare then we turn it off, now at commit time the server - * will send the entire transaction data along with the commit. With some more - * analysis, we can allow changing this option from off to on but not sure if - * that alone would be useful. - * * Finally, to avoid problems mentioned in previous paragraphs from any * subsequent (not READY) tablesyncs (need to toggle two_phase option from 'on' * to 'off' and then again back to 'on') there is a restriction for @@ -1023,7 +1016,7 @@ apply_handle_commit(StringInfo s) if (commit_data.commit_lsn != remote_final_lsn) ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), - errmsg_internal("incorrect commit LSN %X/%X in commit message (expected %X/%X)", + errmsg_internal("incorrect commit LSN %X/%08X in commit message (expected %X/%08X)", LSN_FORMAT_ARGS(commit_data.commit_lsn), LSN_FORMAT_ARGS(remote_final_lsn)))); @@ -1115,7 +1108,7 @@ apply_handle_prepare(StringInfo s) if (prepare_data.prepare_lsn != remote_final_lsn) ereport(ERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), - errmsg_internal("incorrect prepare LSN %X/%X in prepare message (expected %X/%X)", + errmsg_internal("incorrect prepare LSN %X/%08X in prepare message (expected %X/%08X)", LSN_FORMAT_ARGS(prepare_data.prepare_lsn), LSN_FORMAT_ARGS(remote_final_lsn)))); @@ -3910,7 +3903,7 @@ send_feedback(XLogRecPtr recvpos, bool force, bool requestReply) pq_sendint64(reply_message, now); /* sendTime */ pq_sendbyte(reply_message, requestReply); /* replyRequested */ - elog(DEBUG2, "sending feedback (force %d) to recv %X/%X, write %X/%X, flush %X/%X", + elog(DEBUG2, "sending feedback (force %d) to recv %X/%08X, write %X/%08X, flush %X/%08X", force, LSN_FORMAT_ARGS(recvpos), LSN_FORMAT_ARGS(writepos), @@ -4626,8 +4619,16 @@ run_apply_worker() walrcv_startstreaming(LogRepWorkerWalRcvConn, &options); StartTransactionCommand(); + + /* + * Updating pg_subscription might involve TOAST table access, so + * ensure we have a valid snapshot. + */ + PushActiveSnapshot(GetTransactionSnapshot()); + UpdateTwoPhaseState(MySubscription->oid, LOGICALREP_TWOPHASE_STATE_ENABLED); MySubscription->twophasestate = LOGICALREP_TWOPHASE_STATE_ENABLED; + PopActiveSnapshot(); CommitTransactionCommand(); } else @@ -4843,7 +4844,15 @@ DisableSubscriptionAndExit(void) /* Disable the subscription */ StartTransactionCommand(); + + /* + * Updating pg_subscription might involve TOAST table access, so ensure we + * have a valid snapshot. + */ + PushActiveSnapshot(GetTransactionSnapshot()); + DisableSubscription(MySubscription->oid); + PopActiveSnapshot(); CommitTransactionCommand(); /* Ensure we remove no-longer-useful entry for worker's start time */ @@ -4900,7 +4909,7 @@ maybe_start_skipping_changes(XLogRecPtr finish_lsn) skip_xact_finish_lsn = finish_lsn; ereport(LOG, - errmsg("logical replication starts skipping transaction at LSN %X/%X", + errmsg("logical replication starts skipping transaction at LSN %X/%08X", LSN_FORMAT_ARGS(skip_xact_finish_lsn))); } @@ -4914,8 +4923,8 @@ stop_skipping_changes(void) return; ereport(LOG, - (errmsg("logical replication completed skipping transaction at LSN %X/%X", - LSN_FORMAT_ARGS(skip_xact_finish_lsn)))); + errmsg("logical replication completed skipping transaction at LSN %X/%08X", + LSN_FORMAT_ARGS(skip_xact_finish_lsn))); /* Stop skipping changes */ skip_xact_finish_lsn = InvalidXLogRecPtr; @@ -4948,6 +4957,12 @@ clear_subscription_skip_lsn(XLogRecPtr finish_lsn) } /* + * Updating pg_subscription might involve TOAST table access, so ensure we + * have a valid snapshot. + */ + PushActiveSnapshot(GetTransactionSnapshot()); + + /* * Protect subskiplsn of pg_subscription from being concurrently updated * while clearing it. */ @@ -4997,7 +5012,7 @@ clear_subscription_skip_lsn(XLogRecPtr finish_lsn) if (myskiplsn != finish_lsn) ereport(WARNING, errmsg("skip-LSN of subscription \"%s\" cleared", MySubscription->name), - errdetail("Remote transaction's finish WAL location (LSN) %X/%X did not match skip-LSN %X/%X.", + errdetail("Remote transaction's finish WAL location (LSN) %X/%08X did not match skip-LSN %X/%08X.", LSN_FORMAT_ARGS(finish_lsn), LSN_FORMAT_ARGS(myskiplsn))); } @@ -5005,6 +5020,8 @@ clear_subscription_skip_lsn(XLogRecPtr finish_lsn) heap_freetuple(tup); table_close(rel, NoLock); + PopActiveSnapshot(); + if (started_tx) CommitTransactionCommand(); } @@ -5032,7 +5049,7 @@ apply_error_callback(void *arg) logicalrep_message_type(errarg->command), errarg->remote_xid); else - errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" in transaction %u, finished at %X/%X", + errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" in transaction %u, finished at %X/%08X", errarg->origin_name, logicalrep_message_type(errarg->command), errarg->remote_xid, @@ -5050,7 +5067,7 @@ apply_error_callback(void *arg) errarg->rel->remoterel.relname, errarg->remote_xid); else - errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" for replication target relation \"%s.%s\" in transaction %u, finished at %X/%X", + errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" for replication target relation \"%s.%s\" in transaction %u, finished at %X/%08X", errarg->origin_name, logicalrep_message_type(errarg->command), errarg->rel->remoterel.nspname, @@ -5069,7 +5086,7 @@ apply_error_callback(void *arg) errarg->rel->remoterel.attnames[errarg->remote_attnum], errarg->remote_xid); else - errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" for replication target relation \"%s.%s\" column \"%s\" in transaction %u, finished at %X/%X", + errcontext("processing remote data for replication origin \"%s\" during message type \"%s\" for replication target relation \"%s.%s\" column \"%s\" in transaction %u, finished at %X/%08X", errarg->origin_name, logicalrep_message_type(errarg->command), errarg->rel->remoterel.nspname, diff --git a/src/backend/replication/pgoutput/pgoutput.c b/src/backend/replication/pgoutput/pgoutput.c index 693a766e6d7..082b4d9d327 100644 --- a/src/backend/replication/pgoutput/pgoutput.c +++ b/src/backend/replication/pgoutput/pgoutput.c @@ -1789,7 +1789,7 @@ LoadPublications(List *pubnames) else ereport(WARNING, errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("skipped loading publication: %s", pubname), + errmsg("skipped loading publication \"%s\"", pubname), errdetail("The publication does not exist at this point in the WAL."), errhint("Create the publication if it does not exist.")); } diff --git a/src/backend/replication/repl_gram.y b/src/backend/replication/repl_gram.y index 7440aae5a1a..8a649199ec6 100644 --- a/src/backend/replication/repl_gram.y +++ b/src/backend/replication/repl_gram.y @@ -279,7 +279,7 @@ alter_replication_slot: ; /* - * START_REPLICATION [SLOT slot] [PHYSICAL] %X/%X [TIMELINE %u] + * START_REPLICATION [SLOT slot] [PHYSICAL] %X/%08X [TIMELINE %u] */ start_replication: K_START_REPLICATION opt_slot opt_physical RECPTR opt_timeline @@ -295,7 +295,7 @@ start_replication: } ; -/* START_REPLICATION SLOT slot LOGICAL %X/%X options */ +/* START_REPLICATION SLOT slot LOGICAL %X/%08X options */ start_logical_replication: K_START_REPLICATION K_SLOT IDENT K_LOGICAL RECPTR plugin_options { diff --git a/src/backend/replication/repl_scanner.l b/src/backend/replication/repl_scanner.l index 014ea8d25c6..b6930e28659 100644 --- a/src/backend/replication/repl_scanner.l +++ b/src/backend/replication/repl_scanner.l @@ -155,7 +155,7 @@ UPLOAD_MANIFEST { return K_UPLOAD_MANIFEST; } {hexdigit}+\/{hexdigit}+ { uint32 hi, lo; - if (sscanf(yytext, "%X/%X", &hi, &lo) != 2) + if (sscanf(yytext, "%X/%08X", &hi, &lo) != 2) replication_yyerror(NULL, yyscanner, "invalid streaming start location"); yylval->recptr = ((uint64) hi) << 32 | lo; return RECPTR; diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 600b87fa9cb..f369fce2485 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -424,6 +424,7 @@ ReplicationSlotCreate(const char *name, bool db_specific, slot->candidate_restart_valid = InvalidXLogRecPtr; slot->candidate_restart_lsn = InvalidXLogRecPtr; slot->last_saved_confirmed_flush = InvalidXLogRecPtr; + slot->last_saved_restart_lsn = InvalidXLogRecPtr; slot->inactive_since = 0; /* @@ -1165,20 +1166,41 @@ ReplicationSlotsComputeRequiredLSN(void) { ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i]; XLogRecPtr restart_lsn; + XLogRecPtr last_saved_restart_lsn; bool invalidated; + ReplicationSlotPersistency persistency; if (!s->in_use) continue; SpinLockAcquire(&s->mutex); + persistency = s->data.persistency; restart_lsn = s->data.restart_lsn; invalidated = s->data.invalidated != RS_INVAL_NONE; + last_saved_restart_lsn = s->last_saved_restart_lsn; SpinLockRelease(&s->mutex); /* invalidated slots need not apply */ if (invalidated) continue; + /* + * For persistent slot use last_saved_restart_lsn to compute the + * oldest LSN for removal of WAL segments. The segments between + * last_saved_restart_lsn and restart_lsn might be needed by a + * persistent slot in the case of database crash. Non-persistent + * slots can't survive the database crash, so we don't care about + * last_saved_restart_lsn for them. + */ + if (persistency == RS_PERSISTENT) + { + if (last_saved_restart_lsn != InvalidXLogRecPtr && + restart_lsn > last_saved_restart_lsn) + { + restart_lsn = last_saved_restart_lsn; + } + } + if (restart_lsn != InvalidXLogRecPtr && (min_required == InvalidXLogRecPtr || restart_lsn < min_required)) @@ -1216,7 +1238,9 @@ ReplicationSlotsComputeLogicalRestartLSN(void) { ReplicationSlot *s; XLogRecPtr restart_lsn; + XLogRecPtr last_saved_restart_lsn; bool invalidated; + ReplicationSlotPersistency persistency; s = &ReplicationSlotCtl->replication_slots[i]; @@ -1230,14 +1254,33 @@ ReplicationSlotsComputeLogicalRestartLSN(void) /* read once, it's ok if it increases while we're checking */ SpinLockAcquire(&s->mutex); + persistency = s->data.persistency; restart_lsn = s->data.restart_lsn; invalidated = s->data.invalidated != RS_INVAL_NONE; + last_saved_restart_lsn = s->last_saved_restart_lsn; SpinLockRelease(&s->mutex); /* invalidated slots need not apply */ if (invalidated) continue; + /* + * For persistent slot use last_saved_restart_lsn to compute the + * oldest LSN for removal of WAL segments. The segments between + * last_saved_restart_lsn and restart_lsn might be needed by a + * persistent slot in the case of database crash. Non-persistent + * slots can't survive the database crash, so we don't care about + * last_saved_restart_lsn for them. + */ + if (persistency == RS_PERSISTENT) + { + if (last_saved_restart_lsn != InvalidXLogRecPtr && + restart_lsn > last_saved_restart_lsn) + { + restart_lsn = last_saved_restart_lsn; + } + } + if (restart_lsn == InvalidXLogRecPtr) continue; @@ -1455,6 +1498,7 @@ ReplicationSlotReserveWal(void) Assert(slot != NULL); Assert(slot->data.restart_lsn == InvalidXLogRecPtr); + Assert(slot->last_saved_restart_lsn == InvalidXLogRecPtr); /* * The replication slot mechanism is used to prevent removal of required @@ -1547,8 +1591,8 @@ ReportSlotInvalidation(ReplicationSlotInvalidationCause cause, uint64 ex = oldestLSN - restart_lsn; appendStringInfo(&err_detail, - ngettext("The slot's restart_lsn %X/%X exceeds the limit by %" PRIu64 " byte.", - "The slot's restart_lsn %X/%X exceeds the limit by %" PRIu64 " bytes.", + ngettext("The slot's restart_lsn %X/%08X exceeds the limit by %" PRIu64 " byte.", + "The slot's restart_lsn %X/%08X exceeds the limit by %" PRIu64 " bytes.", ex), LSN_FORMAT_ARGS(restart_lsn), ex); @@ -1835,7 +1879,10 @@ InvalidatePossiblyObsoleteSlot(uint32 possible_causes, * just rely on .invalidated. */ if (invalidation_cause == RS_INVAL_WAL_REMOVED) + { s->data.restart_lsn = InvalidXLogRecPtr; + s->last_saved_restart_lsn = InvalidXLogRecPtr; + } /* Let caller know */ *invalidated = true; @@ -2032,6 +2079,7 @@ void CheckPointReplicationSlots(bool is_shutdown) { int i; + bool last_saved_restart_lsn_updated = false; elog(DEBUG1, "performing replication slot checkpoint"); @@ -2076,9 +2124,23 @@ CheckPointReplicationSlots(bool is_shutdown) SpinLockRelease(&s->mutex); } + /* + * Track if we're going to update slot's last_saved_restart_lsn. We + * need this to know if we need to recompute the required LSN. + */ + if (s->last_saved_restart_lsn != s->data.restart_lsn) + last_saved_restart_lsn_updated = true; + SaveSlotToPath(s, path, LOG); } LWLockRelease(ReplicationSlotAllocationLock); + + /* + * Recompute the required LSN if SaveSlotToPath() updated + * last_saved_restart_lsn for any slot. + */ + if (last_saved_restart_lsn_updated) + ReplicationSlotsComputeRequiredLSN(); } /* @@ -2354,6 +2416,7 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel) if (!slot->just_dirtied) slot->dirty = false; slot->last_saved_confirmed_flush = cp.slotdata.confirmed_flush; + slot->last_saved_restart_lsn = cp.slotdata.restart_lsn; SpinLockRelease(&slot->mutex); LWLockRelease(&slot->io_in_progress_lock); @@ -2569,6 +2632,7 @@ RestoreSlotFromDisk(const char *name) slot->effective_xmin = cp.slotdata.xmin; slot->effective_catalog_xmin = cp.slotdata.catalog_xmin; slot->last_saved_confirmed_flush = cp.slotdata.confirmed_flush; + slot->last_saved_restart_lsn = cp.slotdata.restart_lsn; slot->candidate_catalog_xmin = InvalidTransactionId; slot->candidate_xmin_lsn = InvalidXLogRecPtr; diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c index 36cc2ed4e44..69f4c6157c5 100644 --- a/src/backend/replication/slotfuncs.c +++ b/src/backend/replication/slotfuncs.c @@ -566,7 +566,7 @@ pg_replication_slot_advance(PG_FUNCTION_ARGS) if (moveto < minlsn) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("cannot advance replication slot to %X/%X, minimum is %X/%X", + errmsg("cannot advance replication slot to %X/%08X, minimum is %X/%08X", LSN_FORMAT_ARGS(moveto), LSN_FORMAT_ARGS(minlsn)))); /* Do the actual slot update, depending on the slot type */ diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c index cc35984ad00..32cf3a48b89 100644 --- a/src/backend/replication/syncrep.c +++ b/src/backend/replication/syncrep.c @@ -258,7 +258,7 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit) { char buffer[32]; - sprintf(buffer, "waiting for %X/%X", LSN_FORMAT_ARGS(lsn)); + sprintf(buffer, "waiting for %X/%08X", LSN_FORMAT_ARGS(lsn)); set_ps_display_suffix(buffer); } @@ -566,7 +566,7 @@ SyncRepReleaseWaiters(void) LWLockRelease(SyncRepLock); - elog(DEBUG3, "released %d procs up to write %X/%X, %d procs up to flush %X/%X, %d procs up to apply %X/%X", + elog(DEBUG3, "released %d procs up to write %X/%08X, %d procs up to flush %X/%08X, %d procs up to apply %X/%08X", numwrite, LSN_FORMAT_ARGS(writePtr), numflush, LSN_FORMAT_ARGS(flushPtr), numapply, LSN_FORMAT_ARGS(applyPtr)); diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index 8c4d0fd9aed..b6281101711 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -386,12 +386,12 @@ WalReceiverMain(const void *startup_data, size_t startup_data_len) { if (first_stream) ereport(LOG, - (errmsg("started streaming WAL from primary at %X/%X on timeline %u", - LSN_FORMAT_ARGS(startpoint), startpointTLI))); + errmsg("started streaming WAL from primary at %X/%08X on timeline %u", + LSN_FORMAT_ARGS(startpoint), startpointTLI)); else ereport(LOG, - (errmsg("restarted WAL streaming at %X/%X on timeline %u", - LSN_FORMAT_ARGS(startpoint), startpointTLI))); + errmsg("restarted WAL streaming at %X/%08X on timeline %u", + LSN_FORMAT_ARGS(startpoint), startpointTLI)); first_stream = false; /* Initialize LogstreamResult and buffers for processing messages */ @@ -470,7 +470,7 @@ WalReceiverMain(const void *startup_data, size_t startup_data_len) { ereport(LOG, (errmsg("replication terminated by primary server"), - errdetail("End of WAL reached on timeline %u at %X/%X.", + errdetail("End of WAL reached on timeline %u at %X/%08X.", startpointTLI, LSN_FORMAT_ARGS(LogstreamResult.Write)))); endofwal = true; @@ -711,7 +711,7 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI) { char activitymsg[50]; - snprintf(activitymsg, sizeof(activitymsg), "restarting at %X/%X", + snprintf(activitymsg, sizeof(activitymsg), "restarting at %X/%08X", LSN_FORMAT_ARGS(*startpoint)); set_ps_display(activitymsg); } @@ -1014,7 +1014,7 @@ XLogWalRcvFlush(bool dying, TimeLineID tli) { char activitymsg[50]; - snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%X", + snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%08X", LSN_FORMAT_ARGS(LogstreamResult.Write)); set_ps_display(activitymsg); } @@ -1138,7 +1138,7 @@ XLogWalRcvSendReply(bool force, bool requestReply) pq_sendbyte(&reply_message, requestReply ? 1 : 0); /* Send it */ - elog(DEBUG2, "sending write %X/%X flush %X/%X apply %X/%X%s", + elog(DEBUG2, "sending write %X/%08X flush %X/%08X apply %X/%08X%s", LSN_FORMAT_ARGS(writePtr), LSN_FORMAT_ARGS(flushPtr), LSN_FORMAT_ARGS(applyPtr), diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 9fa8beb6103..28b8591efa5 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -408,7 +408,7 @@ IdentifySystem(void) else logptr = GetFlushRecPtr(&currTLI); - snprintf(xloc, sizeof(xloc), "%X/%X", LSN_FORMAT_ARGS(logptr)); + snprintf(xloc, sizeof(xloc), "%X/%08X", LSN_FORMAT_ARGS(logptr)); if (MyDatabaseId != InvalidOid) { @@ -515,7 +515,7 @@ ReadReplicationSlot(ReadReplicationSlotCmd *cmd) { char xloc[64]; - snprintf(xloc, sizeof(xloc), "%X/%X", + snprintf(xloc, sizeof(xloc), "%X/%08X", LSN_FORMAT_ARGS(slot_contents.data.restart_lsn)); values[i] = CStringGetTextDatum(xloc); nulls[i] = false; @@ -892,12 +892,12 @@ StartReplication(StartReplicationCmd *cmd) switchpoint < cmd->startpoint) { ereport(ERROR, - (errmsg("requested starting point %X/%X on timeline %u is not in this server's history", - LSN_FORMAT_ARGS(cmd->startpoint), - cmd->timeline), - errdetail("This server's history forked from timeline %u at %X/%X.", - cmd->timeline, - LSN_FORMAT_ARGS(switchpoint)))); + errmsg("requested starting point %X/%08X on timeline %u is not in this server's history", + LSN_FORMAT_ARGS(cmd->startpoint), + cmd->timeline), + errdetail("This server's history forked from timeline %u at %X/%08X.", + cmd->timeline, + LSN_FORMAT_ARGS(switchpoint))); } sendTimeLineValidUpto = switchpoint; } @@ -939,9 +939,9 @@ StartReplication(StartReplicationCmd *cmd) if (FlushPtr < cmd->startpoint) { ereport(ERROR, - (errmsg("requested starting point %X/%X is ahead of the WAL flush position of this server %X/%X", - LSN_FORMAT_ARGS(cmd->startpoint), - LSN_FORMAT_ARGS(FlushPtr)))); + errmsg("requested starting point %X/%08X is ahead of the WAL flush position of this server %X/%08X", + LSN_FORMAT_ARGS(cmd->startpoint), + LSN_FORMAT_ARGS(FlushPtr))); } /* Start streaming from the requested point */ @@ -983,7 +983,7 @@ StartReplication(StartReplicationCmd *cmd) Datum values[2]; bool nulls[2] = {0}; - snprintf(startpos_str, sizeof(startpos_str), "%X/%X", + snprintf(startpos_str, sizeof(startpos_str), "%X/%08X", LSN_FORMAT_ARGS(sendTimeLineValidUpto)); dest = CreateDestReceiver(DestRemoteSimple); @@ -1324,7 +1324,7 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd) ReplicationSlotPersist(); } - snprintf(xloc, sizeof(xloc), "%X/%X", + snprintf(xloc, sizeof(xloc), "%X/%08X", LSN_FORMAT_ARGS(MyReplicationSlot->data.confirmed_flush)); dest = CreateDestReceiver(DestRemoteSimple); @@ -2429,7 +2429,7 @@ ProcessStandbyReplyMessage(void) /* Copy because timestamptz_to_str returns a static buffer */ replyTimeStr = pstrdup(timestamptz_to_str(replyTime)); - elog(DEBUG2, "write %X/%X flush %X/%X apply %X/%X%s reply_time %s", + elog(DEBUG2, "write %X/%08X flush %X/%08X apply %X/%08X%s reply_time %s", LSN_FORMAT_ARGS(writePtr), LSN_FORMAT_ARGS(flushPtr), LSN_FORMAT_ARGS(applyPtr), @@ -3251,7 +3251,7 @@ XLogSendPhysical(void) WalSndCaughtUp = true; - elog(DEBUG1, "walsender reached end of timeline at %X/%X (sent up to %X/%X)", + elog(DEBUG1, "walsender reached end of timeline at %X/%08X (sent up to %X/%08X)", LSN_FORMAT_ARGS(sendTimeLineValidUpto), LSN_FORMAT_ARGS(sentPtr)); return; @@ -3392,7 +3392,7 @@ retry: { char activitymsg[50]; - snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%X", + snprintf(activitymsg, sizeof(activitymsg), "streaming %X/%08X", LSN_FORMAT_ARGS(sentPtr)); set_ps_display(activitymsg); } @@ -3449,8 +3449,16 @@ XLogSendLogical(void) if (flushPtr == InvalidXLogRecPtr || logical_decoding_ctx->reader->EndRecPtr >= flushPtr) { + /* + * For cascading logical WAL senders, we use the replay LSN instead of + * the flush LSN, since logical decoding on a standby only processes + * WAL that has been replayed. This distinction becomes particularly + * important during shutdown, as new WAL is no longer replayed and the + * last replayed LSN marks the furthest point up to which decoding can + * proceed. + */ if (am_cascading_walsender) - flushPtr = GetStandbyFlushRecPtr(NULL); + flushPtr = GetXLogReplayRecPtr(NULL); else flushPtr = GetFlushRecPtr(NULL); } diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index f0bce5f9ed9..adc9e7600e1 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -923,8 +923,9 @@ rewriteTargetListIU(List *targetList, apply_default = true; /* - * Can only insert DEFAULT into generated columns, regardless of - * any OVERRIDING clauses. + * Can only insert DEFAULT into generated columns. (The + * OVERRIDING clause does not apply to generated columns, so we + * don't consider it here.) */ if (att_tup->attgenerated && !apply_default) { @@ -4544,7 +4545,7 @@ build_generation_expression(Relation rel, int attrno) List * QueryRewrite(Query *parsetree) { - uint64 input_query_id = parsetree->queryId; + int64 input_query_id = parsetree->queryId; List *querylist; List *results; ListCell *l; diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c index c64d815ebd1..3643f27ad6e 100644 --- a/src/backend/storage/aio/aio.c +++ b/src/backend/storage/aio/aio.c @@ -556,6 +556,13 @@ bool pgaio_io_was_recycled(PgAioHandle *ioh, uint64 ref_generation, PgAioHandleState *state) { *state = ioh->state; + + /* + * Ensure that we don't see an earlier state of the handle than ioh->state + * due to compiler or CPU reordering. This protects both ->generation as + * directly used here, and other fields in the handle accessed in the + * caller if the handle was not reused. + */ pg_read_barrier(); return ioh->generation != ref_generation; @@ -752,7 +759,7 @@ pgaio_io_wait_for_free(void) { int reclaimed = 0; - pgaio_debug(DEBUG2, "waiting for free IO with %d pending, %d in-flight, %d idle IOs", + pgaio_debug(DEBUG2, "waiting for free IO with %d pending, %u in-flight, %u idle IOs", pgaio_my_backend->num_staged_ios, dclist_count(&pgaio_my_backend->in_flight_ios), dclist_count(&pgaio_my_backend->idle_ios)); @@ -773,7 +780,12 @@ pgaio_io_wait_for_free(void) * Note that no interrupts are processed between the state check * and the call to reclaim - that's important as otherwise an * interrupt could have already reclaimed the handle. + * + * Need to ensure that there's no reordering, in the more common + * paths, where we wait for IO, that's done by + * pgaio_io_was_recycled(). */ + pg_read_barrier(); pgaio_io_reclaim(ioh); reclaimed++; } @@ -797,7 +809,7 @@ pgaio_io_wait_for_free(void) if (dclist_count(&pgaio_my_backend->in_flight_ios) == 0) ereport(ERROR, errmsg_internal("no free IOs despite no in-flight IOs"), - errdetail_internal("%d pending, %d in-flight, %d idle IOs", + errdetail_internal("%d pending, %u in-flight, %u idle IOs", pgaio_my_backend->num_staged_ios, dclist_count(&pgaio_my_backend->in_flight_ios), dclist_count(&pgaio_my_backend->idle_ios))); @@ -828,7 +840,7 @@ pgaio_io_wait_for_free(void) case PGAIO_HS_COMPLETED_IO: case PGAIO_HS_SUBMITTED: pgaio_debug_io(DEBUG2, ioh, - "waiting for free io with %d in flight", + "waiting for free io with %u in flight", dclist_count(&pgaio_my_backend->in_flight_ios)); /* @@ -852,7 +864,12 @@ pgaio_io_wait_for_free(void) * check and the call to reclaim - that's important as * otherwise an interrupt could have already reclaimed the * handle. + * + * Need to ensure that there's no reordering, in the more + * common paths, where we wait for IO, that's done by + * pgaio_io_was_recycled(). */ + pg_read_barrier(); pgaio_io_reclaim(ioh); break; } @@ -1252,7 +1269,7 @@ pgaio_closing_fd(int fd) break; pgaio_debug_io(DEBUG2, ioh, - "waiting for IO before FD %d gets closed, %d in-flight IOs", + "waiting for IO before FD %d gets closed, %u in-flight IOs", fd, dclist_count(&pgaio_my_backend->in_flight_ios)); /* see comment in pgaio_io_wait_for_free() about raciness */ @@ -1288,7 +1305,7 @@ pgaio_shutdown(int code, Datum arg) uint64 generation = ioh->generation; pgaio_debug_io(DEBUG2, ioh, - "waiting for IO to complete during shutdown, %d in-flight IOs", + "waiting for IO to complete during shutdown, %u in-flight IOs", dclist_count(&pgaio_my_backend->in_flight_ios)); /* see comment in pgaio_io_wait_for_free() about raciness */ diff --git a/src/backend/storage/aio/aio_callback.c b/src/backend/storage/aio/aio_callback.c index 0ad9795bb7e..03c9bba0802 100644 --- a/src/backend/storage/aio/aio_callback.c +++ b/src/backend/storage/aio/aio_callback.c @@ -256,6 +256,9 @@ pgaio_io_call_complete_shared(PgAioHandle *ioh) pgaio_result_status_string(result.status), result.id, result.error_data, result.result); result = ce->cb->complete_shared(ioh, result, cb_data); + + /* the callback should never transition to unknown */ + Assert(result.status != PGAIO_RS_UNKNOWN); } ioh->distilled_result = result; @@ -290,6 +293,7 @@ pgaio_io_call_complete_local(PgAioHandle *ioh) /* start with distilled result from shared callback */ result = ioh->distilled_result; + Assert(result.status != PGAIO_RS_UNKNOWN); for (int i = ioh->num_callbacks; i > 0; i--) { @@ -306,6 +310,9 @@ pgaio_io_call_complete_local(PgAioHandle *ioh) pgaio_result_status_string(result.status), result.id, result.error_data, result.result); result = ce->cb->complete_local(ioh, result, cb_data); + + /* the callback should never transition to unknown */ + Assert(result.status != PGAIO_RS_UNKNOWN); } /* diff --git a/src/backend/storage/aio/method_io_uring.c b/src/backend/storage/aio/method_io_uring.c index c719ba2727a..0a8c054162f 100644 --- a/src/backend/storage/aio/method_io_uring.c +++ b/src/backend/storage/aio/method_io_uring.c @@ -29,6 +29,9 @@ #ifdef IOMETHOD_IO_URING_ENABLED +#include <sys/mman.h> +#include <unistd.h> + #include <liburing.h> #include "miscadmin.h" @@ -94,12 +97,32 @@ PgAioUringContext struct io_uring io_uring_ring; } PgAioUringContext; +/* + * Information about the capabilities that io_uring has. + * + * Depending on liburing and kernel version different features are + * supported. At least for the kernel a kernel version check does not suffice + * as various vendors do backport features to older kernels :(. + */ +typedef struct PgAioUringCaps +{ + bool checked; + /* -1 if io_uring_queue_init_mem() is unsupported */ + int mem_init_size; +} PgAioUringCaps; + + /* PgAioUringContexts for all backends */ static PgAioUringContext *pgaio_uring_contexts; /* the current backend's context */ static PgAioUringContext *pgaio_my_uring_context; +static PgAioUringCaps pgaio_uring_caps = +{ + .checked = false, + .mem_init_size = -1, +}; static uint32 pgaio_uring_procs(void) @@ -111,30 +134,184 @@ pgaio_uring_procs(void) return MaxBackends + NUM_AUXILIARY_PROCS - MAX_IO_WORKERS; } -static Size +/* + * Initializes pgaio_uring_caps, unless that's already done. + */ +static void +pgaio_uring_check_capabilities(void) +{ + if (pgaio_uring_caps.checked) + return; + + /* + * By default io_uring creates a shared memory mapping for each io_uring + * instance, leading to a large number of memory mappings. Unfortunately a + * large number of memory mappings slows things down, backend exit is + * particularly affected. To address that, newer kernels (6.5) support + * using user-provided memory for the memory, by putting the relevant + * memory into shared memory we don't need any additional mappings. + * + * To know whether this is supported, we unfortunately need to probe the + * kernel by trying to create a ring with userspace-provided memory. This + * also has a secondary benefit: We can determine precisely how much + * memory we need for each io_uring instance. + */ +#if defined(HAVE_LIBURING_QUEUE_INIT_MEM) && defined(IORING_SETUP_NO_MMAP) + { + struct io_uring test_ring; + size_t ring_size; + void *ring_ptr; + struct io_uring_params p = {0}; + int ret; + + /* + * Liburing does not yet provide an API to query how much memory a + * ring will need. So we over-estimate it here. As the memory is freed + * just below that's small temporary waste of memory. + * + * 1MB is more than enough for rings within io_max_concurrency's + * range. + */ + ring_size = 1024 * 1024; + + /* + * Hard to believe a system exists where 1MB would not be a multiple + * of the page size. But it's cheap to ensure... + */ + ring_size -= ring_size % sysconf(_SC_PAGESIZE); + + ring_ptr = mmap(NULL, ring_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (ring_ptr == MAP_FAILED) + elog(ERROR, + "mmap(%zu) to determine io_uring_queue_init_mem() support failed: %m", + ring_size); + + ret = io_uring_queue_init_mem(io_max_concurrency, &test_ring, &p, ring_ptr, ring_size); + if (ret > 0) + { + pgaio_uring_caps.mem_init_size = ret; + + elog(DEBUG1, + "can use combined memory mapping for io_uring, each ring needs %d bytes", + ret); + + /* clean up the created ring, it was just for a test */ + io_uring_queue_exit(&test_ring); + } + else + { + /* + * There are different reasons for ring creation to fail, but it's + * ok to treat that just as io_uring_queue_init_mem() not being + * supported. We'll report a more detailed error in + * pgaio_uring_shmem_init(). + */ + errno = -ret; + elog(DEBUG1, + "cannot use combined memory mapping for io_uring, ring creation failed: %m"); + + } + + if (munmap(ring_ptr, ring_size) != 0) + elog(ERROR, "munmap() failed: %m"); + } +#else + { + elog(DEBUG1, + "can't use combined memory mapping for io_uring, kernel or liburing too old"); + } +#endif + + pgaio_uring_caps.checked = true; +} + +/* + * Memory for all PgAioUringContext instances + */ +static size_t pgaio_uring_context_shmem_size(void) { return mul_size(pgaio_uring_procs(), sizeof(PgAioUringContext)); } +/* + * Memory for the combined memory used by io_uring instances. Returns 0 if + * that is not supported by kernel/liburing. + */ +static size_t +pgaio_uring_ring_shmem_size(void) +{ + size_t sz = 0; + + if (pgaio_uring_caps.mem_init_size > 0) + { + /* + * Memory for rings needs to be allocated to the page boundary, + * reserve space. Luckily it does not need to be aligned to hugepage + * boundaries, even if huge pages are used. + */ + sz = add_size(sz, sysconf(_SC_PAGESIZE)); + sz = add_size(sz, mul_size(pgaio_uring_procs(), + pgaio_uring_caps.mem_init_size)); + } + + return sz; +} + static size_t pgaio_uring_shmem_size(void) { - return pgaio_uring_context_shmem_size(); + size_t sz; + + /* + * Kernel and liburing support for various features influences how much + * shmem we need, perform the necessary checks. + */ + pgaio_uring_check_capabilities(); + + sz = pgaio_uring_context_shmem_size(); + sz = add_size(sz, pgaio_uring_ring_shmem_size()); + + return sz; } static void pgaio_uring_shmem_init(bool first_time) { - int TotalProcs = MaxBackends + NUM_AUXILIARY_PROCS - MAX_IO_WORKERS; + int TotalProcs = pgaio_uring_procs(); bool found; + char *shmem; + size_t ring_mem_remain = 0; + char *ring_mem_next = 0; - pgaio_uring_contexts = (PgAioUringContext *) - ShmemInitStruct("AioUring", pgaio_uring_shmem_size(), &found); - + /* + * We allocate memory for all PgAioUringContext instances and, if + * supported, the memory required for each of the io_uring instances, in + * one ShmemInitStruct(). + */ + shmem = ShmemInitStruct("AioUringContext", pgaio_uring_shmem_size(), &found); if (found) return; + pgaio_uring_contexts = (PgAioUringContext *) shmem; + shmem += pgaio_uring_context_shmem_size(); + + /* if supported, handle memory alignment / sizing for io_uring memory */ + if (pgaio_uring_caps.mem_init_size > 0) + { + ring_mem_remain = pgaio_uring_ring_shmem_size(); + ring_mem_next = (char *) shmem; + + /* align to page boundary, see also pgaio_uring_ring_shmem_size() */ + ring_mem_next = (char *) TYPEALIGN(sysconf(_SC_PAGESIZE), ring_mem_next); + + /* account for alignment */ + ring_mem_remain -= ring_mem_next - shmem; + shmem += ring_mem_next - shmem; + + shmem += ring_mem_remain; + } + for (int contextno = 0; contextno < TotalProcs; contextno++) { PgAioUringContext *context = &pgaio_uring_contexts[contextno]; @@ -158,7 +335,28 @@ pgaio_uring_shmem_init(bool first_time) * be worth using that - also need to evaluate if that causes * noticeable additional contention? */ - ret = io_uring_queue_init(io_max_concurrency, &context->io_uring_ring, 0); + + /* + * If supported (c.f. pgaio_uring_check_capabilities()), create ring + * with its data in shared memory. Otherwise fall back io_uring + * creating a memory mapping for each ring. + */ +#if defined(HAVE_LIBURING_QUEUE_INIT_MEM) && defined(IORING_SETUP_NO_MMAP) + if (pgaio_uring_caps.mem_init_size > 0) + { + struct io_uring_params p = {0}; + + ret = io_uring_queue_init_mem(io_max_concurrency, &context->io_uring_ring, &p, ring_mem_next, ring_mem_remain); + + ring_mem_remain -= ret; + ring_mem_next += ret; + } + else +#endif + { + ret = io_uring_queue_init(io_max_concurrency, &context->io_uring_ring, 0); + } + if (ret < 0) { char *hint = NULL; @@ -400,9 +598,9 @@ pgaio_uring_wait_one(PgAioHandle *ioh, uint64 ref_generation) while (true) { pgaio_debug_io(DEBUG3, ioh, - "wait_one io_gen: %llu, ref_gen: %llu, cycle %d", - (long long unsigned) ioh->generation, - (long long unsigned) ref_generation, + "wait_one io_gen: %" PRIu64 ", ref_gen: %" PRIu64 ", cycle %d", + ioh->generation, + ref_generation, waited); if (pgaio_io_was_recycled(ioh, ref_generation, &state) || diff --git a/src/backend/storage/aio/method_worker.c b/src/backend/storage/aio/method_worker.c index 743cccc2acd..36be179678d 100644 --- a/src/backend/storage/aio/method_worker.c +++ b/src/backend/storage/aio/method_worker.c @@ -461,7 +461,12 @@ IoWorkerMain(const void *startup_data, size_t startup_data_len) int nwakeups = 0; int worker; - /* Try to get a job to do. */ + /* + * Try to get a job to do. + * + * The lwlock acquisition also provides the necessary memory barrier + * to ensure that we don't see an outdated data in the handle. + */ LWLockAcquire(AioWorkerSubmissionQueueLock, LW_EXCLUSIVE); if ((io_index = pgaio_worker_submission_queue_consume()) == UINT32_MAX) { diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index f93131a645e..bd68d7e0ca9 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -4550,11 +4550,9 @@ DropRelationBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum, if (RelFileLocatorBackendIsTemp(rlocator)) { if (rlocator.backend == MyProcNumber) - { - for (j = 0; j < nforks; j++) - DropRelationLocalBuffers(rlocator.locator, forkNum[j], - firstDelBlock[j]); - } + DropRelationLocalBuffers(rlocator.locator, forkNum, nforks, + firstDelBlock); + return; } @@ -7320,7 +7318,7 @@ buffer_readv_report(PgAioResult result, const PgAioTargetData *td, affected_count > 1 ? errdetail("Block %u held first zeroed page.", first + first_off) : 0, - errhint("See server log for details about the other %u invalid block(s).", + errhint("See server log for details about the other %d invalid block(s).", affected_count + checkfail_count - 1)); return; } diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index 63101d56a07..3da9c41ee1d 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -629,7 +629,7 @@ InvalidateLocalBuffer(BufferDesc *bufHdr, bool check_unreferenced) */ if (check_unreferenced && (LocalRefCount[bufid] != 0 || BUF_STATE_GET_REFCOUNT(buf_state) != 0)) - elog(ERROR, "block %u of %s is still referenced (local %u)", + elog(ERROR, "block %u of %s is still referenced (local %d)", bufHdr->tag.blockNum, relpathbackend(BufTagGetRelFileLocator(&bufHdr->tag), MyProcNumber, @@ -660,10 +660,11 @@ InvalidateLocalBuffer(BufferDesc *bufHdr, bool check_unreferenced) * See DropRelationBuffers in bufmgr.c for more notes. */ void -DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum, - BlockNumber firstDelBlock) +DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber *forkNum, + int nforks, BlockNumber *firstDelBlock) { int i; + int j; for (i = 0; i < NLocBuffer; i++) { @@ -672,12 +673,18 @@ DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum, buf_state = pg_atomic_read_u32(&bufHdr->state); - if ((buf_state & BM_TAG_VALID) && - BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator) && - BufTagGetForkNum(&bufHdr->tag) == forkNum && - bufHdr->tag.blockNum >= firstDelBlock) + if (!(buf_state & BM_TAG_VALID) || + !BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator)) + continue; + + for (j = 0; j < nforks; j++) { - InvalidateLocalBuffer(bufHdr, true); + if (BufTagGetForkNum(&bufHdr->tag) == forkNum[j] && + bufHdr->tag.blockNum >= firstDelBlock[j]) + { + InvalidateLocalBuffer(bufHdr, true); + break; + } } } } diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 0e8299dd556..a4ec7959f31 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -400,25 +400,22 @@ pg_fsync(int fd) * portable, even if it runs ok on the current system. * * We assert here that a descriptor for a file was opened with write - * permissions (either O_RDWR or O_WRONLY) and for a directory without - * write permissions (O_RDONLY). + * permissions (i.e., not O_RDONLY) and for a directory without write + * permissions (O_RDONLY). Notice that the assertion check is made even + * if fsync() is disabled. * - * Ignore any fstat errors and let the follow-up fsync() do its work. - * Doing this sanity check here counts for the case where fsync() is - * disabled. + * If fstat() fails, ignore it and let the follow-up fsync() complain. */ if (fstat(fd, &st) == 0) { int desc_flags = fcntl(fd, F_GETFL); - /* - * O_RDONLY is historically 0, so just make sure that for directories - * no write flags are used. - */ + desc_flags &= O_ACCMODE; + if (S_ISDIR(st.st_mode)) - Assert((desc_flags & (O_RDWR | O_WRONLY)) == 0); + Assert(desc_flags == O_RDONLY); else - Assert((desc_flags & (O_RDWR | O_WRONLY)) != 0); + Assert(desc_flags != O_RDONLY); } errno = 0; #endif diff --git a/src/backend/storage/ipc/dsm_registry.c b/src/backend/storage/ipc/dsm_registry.c index 1d4fd31ffed..828c2ff0c7f 100644 --- a/src/backend/storage/ipc/dsm_registry.c +++ b/src/backend/storage/ipc/dsm_registry.c @@ -15,6 +15,20 @@ * current backend. This function guarantees that only one backend * initializes the segment and that all other backends just attach it. * + * A DSA can be created in or retrieved from the registry by calling + * GetNamedDSA(). As with GetNamedDSMSegment(), if a DSA with the provided + * name does not yet exist, it is created. Otherwise, GetNamedDSA() + * ensures the DSA is attached to the current backend. This function + * guarantees that only one backend initializes the DSA and that all other + * backends just attach it. + * + * A dshash table can be created in or retrieved from the registry by + * calling GetNamedDSHash(). As with GetNamedDSMSegment(), if a hash + * table with the provided name does not yet exist, it is created. + * Otherwise, GetNamedDSHash() ensures the hash table is attached to the + * current backend. This function guarantees that only one backend + * initializes the table and that all other backends just attach it. + * * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * @@ -32,6 +46,12 @@ #include "storage/shmem.h" #include "utils/memutils.h" +#define DSMR_NAME_LEN 128 + +#define DSMR_DSA_TRANCHE_SUFFIX " DSA" +#define DSMR_DSA_TRANCHE_SUFFIX_LEN (sizeof(DSMR_DSA_TRANCHE_SUFFIX) - 1) +#define DSMR_DSA_TRANCHE_NAME_LEN (DSMR_NAME_LEN + DSMR_DSA_TRANCHE_SUFFIX_LEN) + typedef struct DSMRegistryCtxStruct { dsa_handle dsah; @@ -40,15 +60,48 @@ typedef struct DSMRegistryCtxStruct static DSMRegistryCtxStruct *DSMRegistryCtx; -typedef struct DSMRegistryEntry +typedef struct NamedDSMState { - char name[64]; dsm_handle handle; size_t size; +} NamedDSMState; + +typedef struct NamedDSAState +{ + dsa_handle handle; + int tranche; + char tranche_name[DSMR_DSA_TRANCHE_NAME_LEN]; +} NamedDSAState; + +typedef struct NamedDSHState +{ + NamedDSAState dsa; + dshash_table_handle handle; + int tranche; + char tranche_name[DSMR_NAME_LEN]; +} NamedDSHState; + +typedef enum DSMREntryType +{ + DSMR_ENTRY_TYPE_DSM, + DSMR_ENTRY_TYPE_DSA, + DSMR_ENTRY_TYPE_DSH, +} DSMREntryType; + +typedef struct DSMRegistryEntry +{ + char name[DSMR_NAME_LEN]; + DSMREntryType type; + union + { + NamedDSMState dsm; + NamedDSAState dsa; + NamedDSHState dsh; + } data; } DSMRegistryEntry; static const dshash_parameters dsh_params = { - offsetof(DSMRegistryEntry, handle), + offsetof(DSMRegistryEntry, type), sizeof(DSMRegistryEntry), dshash_strcmp, dshash_strhash, @@ -141,7 +194,7 @@ GetNamedDSMSegment(const char *name, size_t size, ereport(ERROR, (errmsg("DSM segment name cannot be empty"))); - if (strlen(name) >= offsetof(DSMRegistryEntry, handle)) + if (strlen(name) >= offsetof(DSMRegistryEntry, type)) ereport(ERROR, (errmsg("DSM segment name too long"))); @@ -158,32 +211,39 @@ GetNamedDSMSegment(const char *name, size_t size, entry = dshash_find_or_insert(dsm_registry_table, name, found); if (!(*found)) { + NamedDSMState *state = &entry->data.dsm; + dsm_segment *seg; + + entry->type = DSMR_ENTRY_TYPE_DSM; + /* Initialize the segment. */ - dsm_segment *seg = dsm_create(size, 0); + seg = dsm_create(size, 0); dsm_pin_segment(seg); dsm_pin_mapping(seg); - entry->handle = dsm_segment_handle(seg); - entry->size = size; + state->handle = dsm_segment_handle(seg); + state->size = size; ret = dsm_segment_address(seg); if (init_callback) (*init_callback) (ret); } - else if (entry->size != size) - { + else if (entry->type != DSMR_ENTRY_TYPE_DSM) ereport(ERROR, - (errmsg("requested DSM segment size does not match size of " - "existing segment"))); - } + (errmsg("requested DSM segment does not match type of existing entry"))); + else if (entry->data.dsm.size != size) + ereport(ERROR, + (errmsg("requested DSM segment size does not match size of existing segment"))); else { - dsm_segment *seg = dsm_find_mapping(entry->handle); + NamedDSMState *state = &entry->data.dsm; + dsm_segment *seg; /* If the existing segment is not already attached, attach it now. */ + seg = dsm_find_mapping(state->handle); if (seg == NULL) { - seg = dsm_attach(entry->handle); + seg = dsm_attach(state->handle); if (seg == NULL) elog(ERROR, "could not map dynamic shared memory segment"); @@ -198,3 +258,180 @@ GetNamedDSMSegment(const char *name, size_t size, return ret; } + +/* + * Initialize or attach a named DSA. + * + * This routine returns a pointer to the DSA. A new LWLock tranche ID will be + * generated if needed. Note that the lock tranche will be registered with the + * provided name. Also note that this should be called at most once for a + * given DSA in each backend. + */ +dsa_area * +GetNamedDSA(const char *name, bool *found) +{ + DSMRegistryEntry *entry; + MemoryContext oldcontext; + dsa_area *ret; + + Assert(found); + + if (!name || *name == '\0') + ereport(ERROR, + (errmsg("DSA name cannot be empty"))); + + if (strlen(name) >= offsetof(DSMRegistryEntry, type)) + ereport(ERROR, + (errmsg("DSA name too long"))); + + /* Be sure any local memory allocated by DSM/DSA routines is persistent. */ + oldcontext = MemoryContextSwitchTo(TopMemoryContext); + + /* Connect to the registry. */ + init_dsm_registry(); + + entry = dshash_find_or_insert(dsm_registry_table, name, found); + if (!(*found)) + { + NamedDSAState *state = &entry->data.dsa; + + entry->type = DSMR_ENTRY_TYPE_DSA; + + /* Initialize the LWLock tranche for the DSA. */ + state->tranche = LWLockNewTrancheId(); + strcpy(state->tranche_name, name); + LWLockRegisterTranche(state->tranche, state->tranche_name); + + /* Initialize the DSA. */ + ret = dsa_create(state->tranche); + dsa_pin(ret); + dsa_pin_mapping(ret); + + /* Store handle for other backends to use. */ + state->handle = dsa_get_handle(ret); + } + else if (entry->type != DSMR_ENTRY_TYPE_DSA) + ereport(ERROR, + (errmsg("requested DSA does not match type of existing entry"))); + else + { + NamedDSAState *state = &entry->data.dsa; + + if (dsa_is_attached(state->handle)) + ereport(ERROR, + (errmsg("requested DSA already attached to current process"))); + + /* Initialize existing LWLock tranche for the DSA. */ + LWLockRegisterTranche(state->tranche, state->tranche_name); + + /* Attach to existing DSA. */ + ret = dsa_attach(state->handle); + dsa_pin_mapping(ret); + } + + dshash_release_lock(dsm_registry_table, entry); + MemoryContextSwitchTo(oldcontext); + + return ret; +} + +/* + * Initialize or attach a named dshash table. + * + * This routine returns the address of the table. The tranche_id member of + * params is ignored; new tranche IDs will be generated if needed. Note that + * the DSA lock tranche will be registered with the provided name with " DSA" + * appended. The dshash lock tranche will be registered with the provided + * name. Also note that this should be called at most once for a given table + * in each backend. + */ +dshash_table * +GetNamedDSHash(const char *name, const dshash_parameters *params, bool *found) +{ + DSMRegistryEntry *entry; + MemoryContext oldcontext; + dshash_table *ret; + + Assert(params); + Assert(found); + + if (!name || *name == '\0') + ereport(ERROR, + (errmsg("DSHash name cannot be empty"))); + + if (strlen(name) >= offsetof(DSMRegistryEntry, type)) + ereport(ERROR, + (errmsg("DSHash name too long"))); + + /* Be sure any local memory allocated by DSM/DSA routines is persistent. */ + oldcontext = MemoryContextSwitchTo(TopMemoryContext); + + /* Connect to the registry. */ + init_dsm_registry(); + + entry = dshash_find_or_insert(dsm_registry_table, name, found); + if (!(*found)) + { + NamedDSAState *dsa_state = &entry->data.dsh.dsa; + NamedDSHState *dsh_state = &entry->data.dsh; + dshash_parameters params_copy; + dsa_area *dsa; + + entry->type = DSMR_ENTRY_TYPE_DSH; + + /* Initialize the LWLock tranche for the DSA. */ + dsa_state->tranche = LWLockNewTrancheId(); + sprintf(dsa_state->tranche_name, "%s%s", name, DSMR_DSA_TRANCHE_SUFFIX); + LWLockRegisterTranche(dsa_state->tranche, dsa_state->tranche_name); + + /* Initialize the LWLock tranche for the dshash table. */ + dsh_state->tranche = LWLockNewTrancheId(); + strcpy(dsh_state->tranche_name, name); + LWLockRegisterTranche(dsh_state->tranche, dsh_state->tranche_name); + + /* Initialize the DSA for the hash table. */ + dsa = dsa_create(dsa_state->tranche); + dsa_pin(dsa); + dsa_pin_mapping(dsa); + + /* Initialize the dshash table. */ + memcpy(¶ms_copy, params, sizeof(dshash_parameters)); + params_copy.tranche_id = dsh_state->tranche; + ret = dshash_create(dsa, ¶ms_copy, NULL); + + /* Store handles for other backends to use. */ + dsa_state->handle = dsa_get_handle(dsa); + dsh_state->handle = dshash_get_hash_table_handle(ret); + } + else if (entry->type != DSMR_ENTRY_TYPE_DSH) + ereport(ERROR, + (errmsg("requested DSHash does not match type of existing entry"))); + else + { + NamedDSAState *dsa_state = &entry->data.dsh.dsa; + NamedDSHState *dsh_state = &entry->data.dsh; + dsa_area *dsa; + + /* XXX: Should we verify params matches what table was created with? */ + + if (dsa_is_attached(dsa_state->handle)) + ereport(ERROR, + (errmsg("requested DSHash already attached to current process"))); + + /* Initialize existing LWLock tranches for the DSA and dshash table. */ + LWLockRegisterTranche(dsa_state->tranche, dsa_state->tranche_name); + LWLockRegisterTranche(dsh_state->tranche, dsh_state->tranche_name); + + /* Attach to existing DSA for the hash table. */ + dsa = dsa_attach(dsa_state->handle); + dsa_pin_mapping(dsa); + + /* Attach to existing dshash table. */ + ret = dshash_attach(dsa, params, dsh_state->handle, NULL); + } + + dshash_release_lock(dsm_registry_table, entry); + MemoryContextSwitchTo(oldcontext); + + return ret; +} diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c index 00c76d05356..2fa045e6b0f 100644 --- a/src/backend/storage/ipc/ipci.c +++ b/src/backend/storage/ipc/ipci.c @@ -51,7 +51,6 @@ #include "storage/sinvaladt.h" #include "utils/guc.h" #include "utils/injection_point.h" -#include "utils/memutils.h" /* GUCs */ int shared_memory_type = DEFAULT_SHARED_MEMORY_TYPE; @@ -151,7 +150,6 @@ CalculateShmemSize(int *num_semaphores) size = add_size(size, InjectionPointShmemSize()); size = add_size(size, SlotSyncShmemSize()); size = add_size(size, AioShmemSize()); - size = add_size(size, MemoryContextReportingShmemSize()); /* include additional requested shmem from preload libraries */ size = add_size(size, total_addin_request); @@ -345,7 +343,6 @@ CreateOrAttachShmemStructs(void) WaitEventCustomShmemInit(); InjectionPointShmemInit(); AioShmemInit(); - MemoryContextReportingShmemInit(); } /* diff --git a/src/backend/storage/ipc/procsignal.c b/src/backend/storage/ipc/procsignal.c index ce69e26d720..a9bb540b55a 100644 --- a/src/backend/storage/ipc/procsignal.c +++ b/src/backend/storage/ipc/procsignal.c @@ -691,9 +691,6 @@ procsignal_sigusr1_handler(SIGNAL_ARGS) if (CheckProcSignal(PROCSIG_LOG_MEMORY_CONTEXT)) HandleLogMemoryContextInterrupt(); - if (CheckProcSignal(PROCSIG_GET_MEMORY_CONTEXT)) - HandleGetMemoryContextInterrupt(); - if (CheckProcSignal(PROCSIG_PARALLEL_APPLY_MESSAGE)) HandleParallelApplyMessageInterrupt(); diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c index c9ae3b45b76..ca3656fc76f 100644 --- a/src/backend/storage/ipc/shmem.c +++ b/src/backend/storage/ipc/shmem.c @@ -679,12 +679,10 @@ pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS) */ for (i = 0; i < shm_ent_page_count; i++) { - volatile uint64 touch pg_attribute_unused(); - page_ptrs[i] = startptr + (i * os_page_size); if (firstNumaTouch) - pg_numa_touch_mem_if_required(touch, page_ptrs[i]); + pg_numa_touch_mem_if_required(page_ptrs[i]); CHECK_FOR_INTERRUPTS(); } diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c index 7fa8d9247e0..4222bdab078 100644 --- a/src/backend/storage/ipc/standby.c +++ b/src/backend/storage/ipc/standby.c @@ -1376,7 +1376,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) if (xlrec.subxid_overflow) elog(DEBUG2, - "snapshot of %d running transactions overflowed (lsn %X/%X oldest xid %u latest complete %u next xid %u)", + "snapshot of %d running transactions overflowed (lsn %X/%08X oldest xid %u latest complete %u next xid %u)", CurrRunningXacts->xcnt, LSN_FORMAT_ARGS(recptr), CurrRunningXacts->oldestRunningXid, @@ -1384,7 +1384,7 @@ LogCurrentRunningXacts(RunningTransactions CurrRunningXacts) CurrRunningXacts->nextXid); else elog(DEBUG2, - "snapshot of %d+%d running transaction ids (lsn %X/%X oldest xid %u latest complete %u next xid %u)", + "snapshot of %d+%d running transaction ids (lsn %X/%08X oldest xid %u latest complete %u next xid %u)", CurrRunningXacts->xcnt, CurrRunningXacts->subxcnt, LSN_FORMAT_ARGS(recptr), CurrRunningXacts->oldestRunningXid, diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index f50962983c3..3f6bf70bd3c 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -717,7 +717,10 @@ XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, * through, to avoid slowing down the normal case.) */ if (!first) + { + CHECK_FOR_INTERRUPTS(); pg_usleep(1000L); + } first = false; xid = SubTransGetTopmostTransaction(xid); } @@ -757,7 +760,10 @@ ConditionalXactLockTableWait(TransactionId xid, bool logLockFailure) /* See XactLockTableWait about this case */ if (!first) + { + CHECK_FOR_INTERRUPTS(); pg_usleep(1000L); + } first = false; xid = SubTransGetTopmostTransaction(xid); } diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index 86b06b9223f..62f3471448e 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -51,7 +51,7 @@ /* GUC variables */ int max_locks_per_xact; /* used to set the lock table size */ -bool log_lock_failure = false; +bool log_lock_failures = false; #define NLOCKENTS() \ mul_size(max_locks_per_xact, add_size(MaxBackends, max_prepared_xacts)) @@ -3539,9 +3539,9 @@ AtPrepare_Locks(void) * but that probably costs more cycles. */ void -PostPrepare_Locks(TransactionId xid) +PostPrepare_Locks(FullTransactionId fxid) { - PGPROC *newproc = TwoPhaseGetDummyProc(xid, false); + PGPROC *newproc = TwoPhaseGetDummyProc(fxid, false); HASH_SEQ_STATUS status; LOCALLOCK *locallock; LOCK *lock; @@ -4324,11 +4324,11 @@ DumpAllLocks(void) * and PANIC anyway. */ void -lock_twophase_recover(TransactionId xid, uint16 info, +lock_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata; - PGPROC *proc = TwoPhaseGetDummyProc(xid, false); + PGPROC *proc = TwoPhaseGetDummyProc(fxid, false); LOCKTAG *locktag; LOCKMODE lockmode; LOCKMETHODID lockmethodid; @@ -4505,7 +4505,7 @@ lock_twophase_recover(TransactionId xid, uint16 info, * starting up into hot standby mode. */ void -lock_twophase_standby_recover(TransactionId xid, uint16 info, +lock_twophase_standby_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata; @@ -4524,7 +4524,7 @@ lock_twophase_standby_recover(TransactionId xid, uint16 info, if (lockmode == AccessExclusiveLock && locktag->locktag_type == LOCKTAG_RELATION) { - StandbyAcquireAccessExclusiveLock(xid, + StandbyAcquireAccessExclusiveLock(XidFromFullTransactionId(fxid), locktag->locktag_field1 /* dboid */ , locktag->locktag_field2 /* reloid */ ); } @@ -4537,11 +4537,11 @@ lock_twophase_standby_recover(TransactionId xid, uint16 info, * Find and release the lock indicated by the 2PC record. */ void -lock_twophase_postcommit(TransactionId xid, uint16 info, +lock_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata; - PGPROC *proc = TwoPhaseGetDummyProc(xid, true); + PGPROC *proc = TwoPhaseGetDummyProc(fxid, true); LOCKTAG *locktag; LOCKMETHODID lockmethodid; LockMethod lockMethodTable; @@ -4563,10 +4563,10 @@ lock_twophase_postcommit(TransactionId xid, uint16 info, * This is actually just the same as the COMMIT case. */ void -lock_twophase_postabort(TransactionId xid, uint16 info, +lock_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { - lock_twophase_postcommit(xid, info, recdata, len); + lock_twophase_postcommit(fxid, info, recdata, len); } /* diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index 5148ef982e3..46f44bc4511 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -178,8 +178,6 @@ static const char *const BuiltinTrancheNames[] = { [LWTRANCHE_XACT_SLRU] = "XactSLRU", [LWTRANCHE_PARALLEL_VACUUM_DSA] = "ParallelVacuumDSA", [LWTRANCHE_AIO_URING_COMPLETION] = "AioUringCompletion", - [LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE] = "MemoryContextReportingState", - [LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC] = "MemoryContextReportingPerProcess", }; StaticAssertDecl(lengthof(BuiltinTrancheNames) == diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index d82114ffca1..c07fb588355 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -191,7 +191,7 @@ * AtPrepare_PredicateLocks(void); * PostPrepare_PredicateLocks(TransactionId xid); * PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit); - * predicatelock_twophase_recover(TransactionId xid, uint16 info, + * predicatelock_twophase_recover(FullTransactionId fxid, uint16 info, * void *recdata, uint32 len); */ @@ -4856,7 +4856,7 @@ AtPrepare_PredicateLocks(void) * anyway. We only need to clean up our local state. */ void -PostPrepare_PredicateLocks(TransactionId xid) +PostPrepare_PredicateLocks(FullTransactionId fxid) { if (MySerializableXact == InvalidSerializableXact) return; @@ -4879,12 +4879,12 @@ PostPrepare_PredicateLocks(TransactionId xid) * commits or aborts. */ void -PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit) +PredicateLockTwoPhaseFinish(FullTransactionId fxid, bool isCommit) { SERIALIZABLEXID *sxid; SERIALIZABLEXIDTAG sxidtag; - sxidtag.xid = xid; + sxidtag.xid = XidFromFullTransactionId(fxid); LWLockAcquire(SerializableXactHashLock, LW_SHARED); sxid = (SERIALIZABLEXID *) @@ -4906,10 +4906,11 @@ PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit) * Re-acquire a predicate lock belonging to a transaction that was prepared. */ void -predicatelock_twophase_recover(TransactionId xid, uint16 info, +predicatelock_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhasePredicateRecord *record; + TransactionId xid = XidFromFullTransactionId(fxid); Assert(len == sizeof(TwoPhasePredicateRecord)); diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index f194e6b3dcc..e9ef0fbfe32 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -50,7 +50,6 @@ #include "storage/procsignal.h" #include "storage/spin.h" #include "storage/standby.h" -#include "utils/memutils.h" #include "utils/timeout.h" #include "utils/timestamp.h" diff --git a/src/backend/tcop/backend_startup.c b/src/backend/tcop/backend_startup.c index a7d1fec981f..ad0af5edc1f 100644 --- a/src/backend/tcop/backend_startup.c +++ b/src/backend/tcop/backend_startup.c @@ -881,7 +881,7 @@ ProcessCancelRequestPacket(Port *port, void *pkt, int pktlen) { ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), - errmsg("invalid length of query cancel packet"))); + errmsg("invalid length of cancel request packet"))); return; } len = pktlen - offsetof(CancelRequestPacket, cancelAuthCode); @@ -889,7 +889,7 @@ ProcessCancelRequestPacket(Port *port, void *pkt, int pktlen) { ereport(COMMERROR, (errcode(ERRCODE_PROTOCOL_VIOLATION), - errmsg("invalid length of query cancel key"))); + errmsg("invalid length of cancel key in cancel request packet"))); return; } @@ -1077,7 +1077,7 @@ check_log_connections(char **newval, void **extra, GucSource source) if (!SplitIdentifierString(rawstring, ',', &elemlist)) { - GUC_check_errdetail("Invalid list syntax in parameter \"log_connections\"."); + GUC_check_errdetail("Invalid list syntax in parameter \"%s\".", "log_connections"); pfree(rawstring); list_free(elemlist); return false; diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 37784b7816d..2f8c3d5f918 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -1682,7 +1682,7 @@ exec_bind_message(StringInfo input_message) { Query *query = lfirst_node(Query, lc); - if (query->queryId != UINT64CONST(0)) + if (query->queryId != INT64CONST(0)) { pgstat_report_query_id(query->queryId, false); break; @@ -2034,7 +2034,7 @@ exec_bind_message(StringInfo input_message) { PlannedStmt *plan = lfirst_node(PlannedStmt, lc); - if (plan->planId != UINT64CONST(0)) + if (plan->planId != INT64CONST(0)) { pgstat_report_plan_id(plan->planId, false); break; @@ -2174,7 +2174,7 @@ exec_execute_message(const char *portal_name, long max_rows) { PlannedStmt *stmt = lfirst_node(PlannedStmt, lc); - if (stmt->queryId != UINT64CONST(0)) + if (stmt->queryId != INT64CONST(0)) { pgstat_report_query_id(stmt->queryId, false); break; @@ -2185,7 +2185,7 @@ exec_execute_message(const char *portal_name, long max_rows) { PlannedStmt *stmt = lfirst_node(PlannedStmt, lc); - if (stmt->planId != UINT64CONST(0)) + if (stmt->planId != INT64CONST(0)) { pgstat_report_plan_id(stmt->planId, false); break; @@ -3533,9 +3533,6 @@ ProcessInterrupts(void) if (LogMemoryContextPending) ProcessLogMemoryContextInterrupt(); - if (PublishMemoryContextPending) - ProcessGetMemoryContextInterrupt(); - if (ParallelApplyMessagePending) ProcessParallelApplyMessages(); } diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 25fe3d58016..aff8510755f 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -1343,7 +1343,7 @@ ProcessUtilitySlow(ParseState *pstate, */ switch (stmt->subtype) { - case 'T': /* ALTER DOMAIN DEFAULT */ + case AD_AlterDefault: /* * Recursively alter column default for table and, @@ -1353,30 +1353,30 @@ ProcessUtilitySlow(ParseState *pstate, AlterDomainDefault(stmt->typeName, stmt->def); break; - case 'N': /* ALTER DOMAIN DROP NOT NULL */ + case AD_DropNotNull: address = AlterDomainNotNull(stmt->typeName, false); break; - case 'O': /* ALTER DOMAIN SET NOT NULL */ + case AD_SetNotNull: address = AlterDomainNotNull(stmt->typeName, true); break; - case 'C': /* ADD CONSTRAINT */ + case AD_AddConstraint: address = AlterDomainAddConstraint(stmt->typeName, stmt->def, &secondaryObject); break; - case 'X': /* DROP CONSTRAINT */ + case AD_DropConstraint: address = AlterDomainDropConstraint(stmt->typeName, stmt->name, stmt->behavior, stmt->missing_ok); break; - case 'V': /* VALIDATE CONSTRAINT */ + case AD_ValidateConstraint: address = AlterDomainValidateConstraint(stmt->typeName, stmt->name); diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c index e1576e64b6d..a290cc4c975 100644 --- a/src/backend/utils/activity/backend_status.c +++ b/src/backend/utils/activity/backend_status.c @@ -320,8 +320,8 @@ pgstat_bestart_initial(void) lbeentry.st_state = STATE_STARTING; lbeentry.st_progress_command = PROGRESS_COMMAND_INVALID; lbeentry.st_progress_command_target = InvalidOid; - lbeentry.st_query_id = UINT64CONST(0); - lbeentry.st_plan_id = UINT64CONST(0); + lbeentry.st_query_id = INT64CONST(0); + lbeentry.st_plan_id = INT64CONST(0); /* * we don't zero st_progress_param here to save cycles; nobody should @@ -599,8 +599,8 @@ pgstat_report_activity(BackendState state, const char *cmd_str) beentry->st_activity_start_timestamp = 0; /* st_xact_start_timestamp and wait_event_info are also disabled */ beentry->st_xact_start_timestamp = 0; - beentry->st_query_id = UINT64CONST(0); - beentry->st_plan_id = UINT64CONST(0); + beentry->st_query_id = INT64CONST(0); + beentry->st_plan_id = INT64CONST(0); proc->wait_event_info = 0; PGSTAT_END_WRITE_ACTIVITY(beentry); } @@ -662,8 +662,8 @@ pgstat_report_activity(BackendState state, const char *cmd_str) */ if (state == STATE_RUNNING) { - beentry->st_query_id = UINT64CONST(0); - beentry->st_plan_id = UINT64CONST(0); + beentry->st_query_id = INT64CONST(0); + beentry->st_plan_id = INT64CONST(0); } if (cmd_str != NULL) @@ -683,7 +683,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str) * -------- */ void -pgstat_report_query_id(uint64 query_id, bool force) +pgstat_report_query_id(int64 query_id, bool force) { volatile PgBackendStatus *beentry = MyBEEntry; @@ -702,7 +702,7 @@ pgstat_report_query_id(uint64 query_id, bool force) * command, so ignore the one provided unless it's an explicit call to * reset the identifier. */ - if (beentry->st_query_id != 0 && !force) + if (beentry->st_query_id != INT64CONST(0) && !force) return; /* @@ -722,7 +722,7 @@ pgstat_report_query_id(uint64 query_id, bool force) * -------- */ void -pgstat_report_plan_id(uint64 plan_id, bool force) +pgstat_report_plan_id(int64 plan_id, bool force) { volatile PgBackendStatus *beentry = MyBEEntry; @@ -1134,7 +1134,7 @@ pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen) * * Return current backend's query identifier. */ -uint64 +int64 pgstat_get_my_query_id(void) { if (!MyBEEntry) @@ -1154,7 +1154,7 @@ pgstat_get_my_query_id(void) * * Return current backend's plan identifier. */ -uint64 +int64 pgstat_get_my_plan_id(void) { if (!MyBEEntry) diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index 28587e2916b..69df741cbf6 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -744,7 +744,7 @@ PostPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state) * Load the saved counts into our local pgstats state. */ void -pgstat_twophase_postcommit(TransactionId xid, uint16 info, +pgstat_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata; @@ -780,7 +780,7 @@ pgstat_twophase_postcommit(TransactionId xid, uint16 info, * as aborted. */ void -pgstat_twophase_postabort(TransactionId xid, uint16 info, +pgstat_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len) { TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata; diff --git a/src/backend/utils/activity/pgstat_shmem.c b/src/backend/utils/activity/pgstat_shmem.c index 2e33293b000..53e7d534270 100644 --- a/src/backend/utils/activity/pgstat_shmem.c +++ b/src/backend/utils/activity/pgstat_shmem.c @@ -183,7 +183,7 @@ StatsShmemInit(void) p += MAXALIGN(pgstat_dsa_init_size()); dsa = dsa_create_in_place(ctl->raw_dsa_area, pgstat_dsa_init_size(), - LWTRANCHE_PGSTATS_DSA, 0); + LWTRANCHE_PGSTATS_DSA, NULL); dsa_pin(dsa); /* @@ -255,7 +255,8 @@ pgstat_attach_shmem(void) dsa_pin_mapping(pgStatLocal.dsa); pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params, - pgStatLocal.shmem->hash_handle, 0); + pgStatLocal.shmem->hash_handle, + NULL); MemoryContextSwitchTo(oldcontext); } diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt index 930321905f1..4da68312b5f 100644 --- a/src/backend/utils/activity/wait_event_names.txt +++ b/src/backend/utils/activity/wait_event_names.txt @@ -161,7 +161,6 @@ WAL_RECEIVER_EXIT "Waiting for the WAL receiver to exit." WAL_RECEIVER_WAIT_START "Waiting for startup process to send initial data for streaming replication." WAL_SUMMARY_READY "Waiting for a new WAL summary to be generated." XACT_GROUP_UPDATE "Waiting for the group leader to update transaction status at transaction end." -MEM_CXT_PUBLISH "Waiting for a process to publish memory information." ABI_compatibility: @@ -402,6 +401,7 @@ SerialSLRU "Waiting to access the serializable transaction conflict SLRU cache." SubtransSLRU "Waiting to access the sub-transaction SLRU cache." XactSLRU "Waiting to access the transaction status SLRU cache." ParallelVacuumDSA "Waiting for parallel vacuum dynamic shared memory allocation." +AioUringCompletion "Waiting for another process to complete IO via io_uring." # No "ABI_compatibility" region here as WaitEventLWLock has its own C code. diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 4a233b63c32..ffeacf2b819 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -23,6 +23,7 @@ OBJS = \ arrayutils.o \ ascii.o \ bool.o \ + bytea.o \ cash.o \ char.o \ cryptohashfuncs.o \ diff --git a/src/backend/utils/adt/bytea.c b/src/backend/utils/adt/bytea.c new file mode 100644 index 00000000000..2e539c2504e --- /dev/null +++ b/src/backend/utils/adt/bytea.c @@ -0,0 +1,1143 @@ +/*------------------------------------------------------------------------- + * + * bytea.c + * Functions for the bytea type. + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/bytea.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/detoast.h" +#include "catalog/pg_collation_d.h" +#include "catalog/pg_type_d.h" +#include "common/int.h" +#include "fmgr.h" +#include "libpq/pqformat.h" +#include "port/pg_bitutils.h" +#include "utils/builtins.h" +#include "utils/bytea.h" +#include "utils/fmgrprotos.h" +#include "utils/memutils.h" +#include "utils/sortsupport.h" +#include "utils/varlena.h" +#include "varatt.h" + +/* GUC variable */ +int bytea_output = BYTEA_OUTPUT_HEX; + +static bytea *bytea_catenate(bytea *t1, bytea *t2); +static bytea *bytea_substring(Datum str, int S, int L, + bool length_not_specified); +static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl); + +/* + * bytea_catenate + * Guts of byteacat(), broken out so it can be used by other functions + * + * Arguments can be in short-header form, but not compressed or out-of-line + */ +static bytea * +bytea_catenate(bytea *t1, bytea *t2) +{ + bytea *result; + int len1, + len2, + len; + char *ptr; + + len1 = VARSIZE_ANY_EXHDR(t1); + len2 = VARSIZE_ANY_EXHDR(t2); + + /* paranoia ... probably should throw error instead? */ + if (len1 < 0) + len1 = 0; + if (len2 < 0) + len2 = 0; + + len = len1 + len2 + VARHDRSZ; + result = (bytea *) palloc(len); + + /* Set size of result string... */ + SET_VARSIZE(result, len); + + /* Fill data field of result string... */ + ptr = VARDATA(result); + if (len1 > 0) + memcpy(ptr, VARDATA_ANY(t1), len1); + if (len2 > 0) + memcpy(ptr + len1, VARDATA_ANY(t2), len2); + + return result; +} + +#define PG_STR_GET_BYTEA(str_) \ + DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_))) + +static bytea * +bytea_substring(Datum str, + int S, + int L, + bool length_not_specified) +{ + int32 S1; /* adjusted start position */ + int32 L1; /* adjusted substring length */ + int32 E; /* end position */ + + /* + * The logic here should generally match text_substring(). + */ + S1 = Max(S, 1); + + if (length_not_specified) + { + /* + * Not passed a length - DatumGetByteaPSlice() grabs everything to the + * end of the string if we pass it a negative value for length. + */ + L1 = -1; + } + else if (L < 0) + { + /* SQL99 says to throw an error for E < S, i.e., negative length */ + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + L1 = -1; /* silence stupider compilers */ + } + else if (pg_add_s32_overflow(S, L, &E)) + { + /* + * L could be large enough for S + L to overflow, in which case the + * substring must run to end of string. + */ + L1 = -1; + } + else + { + /* + * A zero or negative value for the end position can happen if the + * start was negative or one. SQL99 says to return a zero-length + * string. + */ + if (E < 1) + return PG_STR_GET_BYTEA(""); + + L1 = E - S1; + } + + /* + * If the start position is past the end of the string, SQL99 says to + * return a zero-length string -- DatumGetByteaPSlice() will do that for + * us. We need only convert S1 to zero-based starting position. + */ + return DatumGetByteaPSlice(str, S1 - 1, L1); +} + +static bytea * +bytea_overlay(bytea *t1, bytea *t2, int sp, int sl) +{ + bytea *result; + bytea *s1; + bytea *s2; + int sp_pl_sl; + + /* + * Check for possible integer-overflow cases. For negative sp, throw a + * "substring length" error because that's what should be expected + * according to the spec's definition of OVERLAY(). + */ + if (sp <= 0) + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + if (pg_add_s32_overflow(sp, sl, &sp_pl_sl)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false); + s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true); + result = bytea_catenate(s1, t2); + result = bytea_catenate(result, s2); + + return result; +} + +/***************************************************************************** + * USER I/O ROUTINES * + *****************************************************************************/ + +#define VAL(CH) ((CH) - '0') +#define DIG(VAL) ((VAL) + '0') + +/* + * byteain - converts from printable representation of byte array + * + * Non-printable characters must be passed as '\nnn' (octal) and are + * converted to internal form. '\' must be passed as '\\'. + * ereport(ERROR, ...) if bad form. + * + * BUGS: + * The input is scanned twice. + * The error checking of input is minimal. + */ +Datum +byteain(PG_FUNCTION_ARGS) +{ + char *inputText = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + char *tp; + char *rp; + int bc; + bytea *result; + + /* Recognize hex input */ + if (inputText[0] == '\\' && inputText[1] == 'x') + { + size_t len = strlen(inputText); + + bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */ + result = palloc(bc); + bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result), + escontext); + SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ + + PG_RETURN_BYTEA_P(result); + } + + /* Else, it's the traditional escaped style */ + for (bc = 0, tp = inputText; *tp != '\0'; bc++) + { + if (tp[0] != '\\') + tp++; + else if ((tp[0] == '\\') && + (tp[1] >= '0' && tp[1] <= '3') && + (tp[2] >= '0' && tp[2] <= '7') && + (tp[3] >= '0' && tp[3] <= '7')) + tp += 4; + else if ((tp[0] == '\\') && + (tp[1] == '\\')) + tp += 2; + else + { + /* + * one backslash, not followed by another or ### valid octal + */ + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "bytea"))); + } + } + + bc += VARHDRSZ; + + result = (bytea *) palloc(bc); + SET_VARSIZE(result, bc); + + tp = inputText; + rp = VARDATA(result); + while (*tp != '\0') + { + if (tp[0] != '\\') + *rp++ = *tp++; + else if ((tp[0] == '\\') && + (tp[1] >= '0' && tp[1] <= '3') && + (tp[2] >= '0' && tp[2] <= '7') && + (tp[3] >= '0' && tp[3] <= '7')) + { + bc = VAL(tp[1]); + bc <<= 3; + bc += VAL(tp[2]); + bc <<= 3; + *rp++ = bc + VAL(tp[3]); + + tp += 4; + } + else if ((tp[0] == '\\') && + (tp[1] == '\\')) + { + *rp++ = '\\'; + tp += 2; + } + else + { + /* + * We should never get here. The first pass should not allow it. + */ + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "bytea"))); + } + } + + PG_RETURN_BYTEA_P(result); +} + +/* + * byteaout - converts to printable representation of byte array + * + * In the traditional escaped format, non-printable characters are + * printed as '\nnn' (octal) and '\' as '\\'. + */ +Datum +byteaout(PG_FUNCTION_ARGS) +{ + bytea *vlena = PG_GETARG_BYTEA_PP(0); + char *result; + char *rp; + + if (bytea_output == BYTEA_OUTPUT_HEX) + { + /* Print hex format */ + rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); + *rp++ = '\\'; + *rp++ = 'x'; + rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); + } + else if (bytea_output == BYTEA_OUTPUT_ESCAPE) + { + /* Print traditional escaped format */ + char *vp; + uint64 len; + int i; + + len = 1; /* empty string has 1 char */ + vp = VARDATA_ANY(vlena); + for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) + { + if (*vp == '\\') + len += 2; + else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) + len += 4; + else + len++; + } + + /* + * In principle len can't overflow uint32 if the input fit in 1GB, but + * for safety let's check rather than relying on palloc's internal + * check. + */ + if (len > MaxAllocSize) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg_internal("result of bytea output conversion is too large"))); + rp = result = (char *) palloc(len); + + vp = VARDATA_ANY(vlena); + for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) + { + if (*vp == '\\') + { + *rp++ = '\\'; + *rp++ = '\\'; + } + else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) + { + int val; /* holds unprintable chars */ + + val = *vp; + rp[0] = '\\'; + rp[3] = DIG(val & 07); + val >>= 3; + rp[2] = DIG(val & 07); + val >>= 3; + rp[1] = DIG(val & 03); + rp += 4; + } + else + *rp++ = *vp; + } + } + else + { + elog(ERROR, "unrecognized \"bytea_output\" setting: %d", + bytea_output); + rp = result = NULL; /* keep compiler quiet */ + } + *rp = '\0'; + PG_RETURN_CSTRING(result); +} + +/* + * bytearecv - converts external binary format to bytea + */ +Datum +bytearecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + bytea *result; + int nbytes; + + nbytes = buf->len - buf->cursor; + result = (bytea *) palloc(nbytes + VARHDRSZ); + SET_VARSIZE(result, nbytes + VARHDRSZ); + pq_copymsgbytes(buf, VARDATA(result), nbytes); + PG_RETURN_BYTEA_P(result); +} + +/* + * byteasend - converts bytea to binary format + * + * This is a special case: just copy the input... + */ +Datum +byteasend(PG_FUNCTION_ARGS) +{ + bytea *vlena = PG_GETARG_BYTEA_P_COPY(0); + + PG_RETURN_BYTEA_P(vlena); +} + +Datum +bytea_string_agg_transfn(PG_FUNCTION_ARGS) +{ + StringInfo state; + + state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); + + /* Append the value unless null, preceding it with the delimiter. */ + if (!PG_ARGISNULL(1)) + { + bytea *value = PG_GETARG_BYTEA_PP(1); + bool isfirst = false; + + /* + * You might think we can just throw away the first delimiter, however + * we must keep it as we may be a parallel worker doing partial + * aggregation building a state to send to the main process. We need + * to keep the delimiter of every aggregation so that the combine + * function can properly join up the strings of two separately + * partially aggregated results. The first delimiter is only stripped + * off in the final function. To know how much to strip off the front + * of the string, we store the length of the first delimiter in the + * StringInfo's cursor field, which we don't otherwise need here. + */ + if (state == NULL) + { + MemoryContext aggcontext; + MemoryContext oldcontext; + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "bytea_string_agg_transfn called in non-aggregate context"); + } + + /* + * Create state in aggregate context. It'll stay there across + * subsequent calls. + */ + oldcontext = MemoryContextSwitchTo(aggcontext); + state = makeStringInfo(); + MemoryContextSwitchTo(oldcontext); + + isfirst = true; + } + + if (!PG_ARGISNULL(2)) + { + bytea *delim = PG_GETARG_BYTEA_PP(2); + + appendBinaryStringInfo(state, VARDATA_ANY(delim), + VARSIZE_ANY_EXHDR(delim)); + if (isfirst) + state->cursor = VARSIZE_ANY_EXHDR(delim); + } + + appendBinaryStringInfo(state, VARDATA_ANY(value), + VARSIZE_ANY_EXHDR(value)); + } + + /* + * The transition type for string_agg() is declared to be "internal", + * which is a pass-by-value type the same size as a pointer. + */ + if (state) + PG_RETURN_POINTER(state); + PG_RETURN_NULL(); +} + +Datum +bytea_string_agg_finalfn(PG_FUNCTION_ARGS) +{ + StringInfo state; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); + + if (state != NULL) + { + /* As per comment in transfn, strip data before the cursor position */ + bytea *result; + int strippedlen = state->len - state->cursor; + + result = (bytea *) palloc(strippedlen + VARHDRSZ); + SET_VARSIZE(result, strippedlen + VARHDRSZ); + memcpy(VARDATA(result), &state->data[state->cursor], strippedlen); + PG_RETURN_BYTEA_P(result); + } + else + PG_RETURN_NULL(); +} + +/*------------------------------------------------------------- + * byteaoctetlen + * + * get the number of bytes contained in an instance of type 'bytea' + *------------------------------------------------------------- + */ +Datum +byteaoctetlen(PG_FUNCTION_ARGS) +{ + Datum str = PG_GETARG_DATUM(0); + + /* We need not detoast the input at all */ + PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ); +} + +/* + * byteacat - + * takes two bytea* and returns a bytea* that is the concatenation of + * the two. + * + * Cloned from textcat and modified as required. + */ +Datum +byteacat(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + + PG_RETURN_BYTEA_P(bytea_catenate(t1, t2)); +} + +/* + * byteaoverlay + * Replace specified substring of first string with second + * + * The SQL standard defines OVERLAY() in terms of substring and concatenation. + * This code is a direct implementation of what the standard says. + */ +Datum +byteaoverlay(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int sp = PG_GETARG_INT32(2); /* substring start position */ + int sl = PG_GETARG_INT32(3); /* substring length */ + + PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); +} + +Datum +byteaoverlay_no_len(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int sp = PG_GETARG_INT32(2); /* substring start position */ + int sl; + + sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */ + PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); +} + +/* + * bytea_substr() + * Return a substring starting at the specified position. + * Cloned from text_substr and modified as required. + * + * Input: + * - string + * - starting position (is one-based) + * - string length (optional) + * + * If the starting position is zero or less, then return from the start of the string + * adjusting the length to be consistent with the "negative start" per SQL. + * If the length is less than zero, an ERROR is thrown. If no third argument + * (length) is provided, the length to the end of the string is assumed. + */ +Datum +bytea_substr(PG_FUNCTION_ARGS) +{ + PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), + PG_GETARG_INT32(1), + PG_GETARG_INT32(2), + false)); +} + +/* + * bytea_substr_no_len - + * Wrapper to avoid opr_sanity failure due to + * one function accepting a different number of args. + */ +Datum +bytea_substr_no_len(PG_FUNCTION_ARGS) +{ + PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), + PG_GETARG_INT32(1), + -1, + true)); +} + +/* + * bit_count + */ +Datum +bytea_bit_count(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + + PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1))); +} + +/* + * byteapos - + * Return the position of the specified substring. + * Implements the SQL POSITION() function. + * Cloned from textpos and modified as required. + */ +Datum +byteapos(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int pos; + int px, + p; + int len1, + len2; + char *p1, + *p2; + + len1 = VARSIZE_ANY_EXHDR(t1); + len2 = VARSIZE_ANY_EXHDR(t2); + + if (len2 <= 0) + PG_RETURN_INT32(1); /* result for empty pattern */ + + p1 = VARDATA_ANY(t1); + p2 = VARDATA_ANY(t2); + + pos = 0; + px = (len1 - len2); + for (p = 0; p <= px; p++) + { + if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0)) + { + pos = p + 1; + break; + }; + p1++; + }; + + PG_RETURN_INT32(pos); +} + +/*------------------------------------------------------------- + * byteaGetByte + * + * this routine treats "bytea" as an array of bytes. + * It returns the Nth byte (a number between 0 and 255). + *------------------------------------------------------------- + */ +Datum +byteaGetByte(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int32 n = PG_GETARG_INT32(1); + int len; + int byte; + + len = VARSIZE_ANY_EXHDR(v); + + if (n < 0 || n >= len) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %d out of valid range, 0..%d", + n, len - 1))); + + byte = ((unsigned char *) VARDATA_ANY(v))[n]; + + PG_RETURN_INT32(byte); +} + +/*------------------------------------------------------------- + * byteaGetBit + * + * This routine treats a "bytea" type like an array of bits. + * It returns the value of the Nth bit (0 or 1). + * + *------------------------------------------------------------- + */ +Datum +byteaGetBit(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int64 n = PG_GETARG_INT64(1); + int byteNo, + bitNo; + int len; + int byte; + + len = VARSIZE_ANY_EXHDR(v); + + if (n < 0 || n >= (int64) len * 8) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, + n, (int64) len * 8 - 1))); + + /* n/8 is now known < len, so safe to cast to int */ + byteNo = (int) (n / 8); + bitNo = (int) (n % 8); + + byte = ((unsigned char *) VARDATA_ANY(v))[byteNo]; + + if (byte & (1 << bitNo)) + PG_RETURN_INT32(1); + else + PG_RETURN_INT32(0); +} + +/*------------------------------------------------------------- + * byteaSetByte + * + * Given an instance of type 'bytea' creates a new one with + * the Nth byte set to the given value. + * + *------------------------------------------------------------- + */ +Datum +byteaSetByte(PG_FUNCTION_ARGS) +{ + bytea *res = PG_GETARG_BYTEA_P_COPY(0); + int32 n = PG_GETARG_INT32(1); + int32 newByte = PG_GETARG_INT32(2); + int len; + + len = VARSIZE(res) - VARHDRSZ; + + if (n < 0 || n >= len) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %d out of valid range, 0..%d", + n, len - 1))); + + /* + * Now set the byte. + */ + ((unsigned char *) VARDATA(res))[n] = newByte; + + PG_RETURN_BYTEA_P(res); +} + +/*------------------------------------------------------------- + * byteaSetBit + * + * Given an instance of type 'bytea' creates a new one with + * the Nth bit set to the given value. + * + *------------------------------------------------------------- + */ +Datum +byteaSetBit(PG_FUNCTION_ARGS) +{ + bytea *res = PG_GETARG_BYTEA_P_COPY(0); + int64 n = PG_GETARG_INT64(1); + int32 newBit = PG_GETARG_INT32(2); + int len; + int oldByte, + newByte; + int byteNo, + bitNo; + + len = VARSIZE(res) - VARHDRSZ; + + if (n < 0 || n >= (int64) len * 8) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, + n, (int64) len * 8 - 1))); + + /* n/8 is now known < len, so safe to cast to int */ + byteNo = (int) (n / 8); + bitNo = (int) (n % 8); + + /* + * sanity check! + */ + if (newBit != 0 && newBit != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("new bit must be 0 or 1"))); + + /* + * Update the byte. + */ + oldByte = ((unsigned char *) VARDATA(res))[byteNo]; + + if (newBit == 0) + newByte = oldByte & (~(1 << bitNo)); + else + newByte = oldByte | (1 << bitNo); + + ((unsigned char *) VARDATA(res))[byteNo] = newByte; + + PG_RETURN_BYTEA_P(res); +} + +/* + * Return reversed bytea + */ +Datum +bytea_reverse(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + const char *p = VARDATA_ANY(v); + int len = VARSIZE_ANY_EXHDR(v); + const char *endp = p + len; + bytea *result = palloc(len + VARHDRSZ); + char *dst = (char *) VARDATA(result) + len; + + SET_VARSIZE(result, len + VARHDRSZ); + + while (p < endp) + *(--dst) = *p++; + + PG_RETURN_BYTEA_P(result); +} + + +/***************************************************************************** + * Comparison Functions used for bytea + * + * Note: btree indexes need these routines not to leak memory; therefore, + * be careful to free working copies of toasted datums. Most places don't + * need to be so careful. + *****************************************************************************/ + +Datum +byteaeq(PG_FUNCTION_ARGS) +{ + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + bool result; + Size len1, + len2; + + /* + * We can use a fast path for unequal lengths, which might save us from + * having to detoast one or both values. + */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = false; + else + { + bytea *barg1 = DatumGetByteaPP(arg1); + bytea *barg2 = DatumGetByteaPP(arg2); + + result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), + len1 - VARHDRSZ) == 0); + + PG_FREE_IF_COPY(barg1, 0); + PG_FREE_IF_COPY(barg2, 1); + } + + PG_RETURN_BOOL(result); +} + +Datum +byteane(PG_FUNCTION_ARGS) +{ + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + bool result; + Size len1, + len2; + + /* + * We can use a fast path for unequal lengths, which might save us from + * having to detoast one or both values. + */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = true; + else + { + bytea *barg1 = DatumGetByteaPP(arg1); + bytea *barg2 = DatumGetByteaPP(arg2); + + result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), + len1 - VARHDRSZ) != 0); + + PG_FREE_IF_COPY(barg1, 0); + PG_FREE_IF_COPY(barg2, 1); + } + + PG_RETURN_BOOL(result); +} + +Datum +bytealt(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2))); +} + +Datum +byteale(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2))); +} + +Datum +byteagt(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2))); +} + +Datum +byteage(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2))); +} + +Datum +byteacmp(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + if ((cmp == 0) && (len1 != len2)) + cmp = (len1 < len2) ? -1 : 1; + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(cmp); +} + +Datum +bytea_larger(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + bytea *result; + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2); + + PG_RETURN_BYTEA_P(result); +} + +Datum +bytea_smaller(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + bytea *result; + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2); + + PG_RETURN_BYTEA_P(result); +} + +Datum +bytea_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + /* Use generic string SortSupport, forcing "C" collation */ + varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID); + + MemoryContextSwitchTo(oldcontext); + + PG_RETURN_VOID(); +} + +/* Cast bytea -> int2 */ +Datum +bytea_int2(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint16 result; + + /* Check that the byte array is not too long */ + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range")); + + /* Convert it to an integer; most significant bytes come first */ + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT16(result); +} + +/* Cast bytea -> int4 */ +Datum +bytea_int4(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint32 result; + + /* Check that the byte array is not too long */ + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range")); + + /* Convert it to an integer; most significant bytes come first */ + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT32(result); +} + +/* Cast bytea -> int8 */ +Datum +bytea_int8(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint64 result; + + /* Check that the byte array is not too long */ + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range")); + + /* Convert it to an integer; most significant bytes come first */ + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT64(result); +} + +/* Cast int2 -> bytea; can just use int2send() */ +Datum +int2_bytea(PG_FUNCTION_ARGS) +{ + return int2send(fcinfo); +} + +/* Cast int4 -> bytea; can just use int4send() */ +Datum +int4_bytea(PG_FUNCTION_ARGS) +{ + return int4send(fcinfo); +} + +/* Cast int8 -> bytea; can just use int8send() */ +Datum +int8_bytea(PG_FUNCTION_ARGS) +{ + return int8send(fcinfo); +} diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c index 4227ab1a72b..344f58b92f7 100644 --- a/src/backend/utils/adt/date.c +++ b/src/backend/utils/adt/date.c @@ -1363,10 +1363,35 @@ timestamp_date(PG_FUNCTION_ARGS) { Timestamp timestamp = PG_GETARG_TIMESTAMP(0); DateADT result; + + result = timestamp2date_opt_overflow(timestamp, NULL); + PG_RETURN_DATEADT(result); +} + +/* + * Convert timestamp to date. + * + * On successful conversion, *overflow is set to zero if it's not NULL. + * + * If the timestamp is finite but out of the valid range for date, then: + * if overflow is NULL, we throw an out-of-range error. + * if overflow is not NULL, we store +1 or -1 there to indicate the sign + * of the overflow, and return the appropriate date infinity. + * + * Note: given the ranges of the types, overflow is only possible at + * the minimum end of the range, but we don't assume that in this code. + */ +DateADT +timestamp2date_opt_overflow(Timestamp timestamp, int *overflow) +{ + DateADT result; struct pg_tm tt, *tm = &tt; fsec_t fsec; + if (overflow) + *overflow = 0; + if (TIMESTAMP_IS_NOBEGIN(timestamp)) DATE_NOBEGIN(result); else if (TIMESTAMP_IS_NOEND(timestamp)) @@ -1374,14 +1399,30 @@ timestamp_date(PG_FUNCTION_ARGS) else { if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0) + { + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + DATE_NOBEGIN(result); + } + else + { + *overflow = 1; /* not actually reachable */ + DATE_NOEND(result); + } + return result; + } ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); + } result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE; } - PG_RETURN_DATEADT(result); + return result; } @@ -1408,11 +1449,36 @@ timestamptz_date(PG_FUNCTION_ARGS) { TimestampTz timestamp = PG_GETARG_TIMESTAMP(0); DateADT result; + + result = timestamptz2date_opt_overflow(timestamp, NULL); + PG_RETURN_DATEADT(result); +} + +/* + * Convert timestamptz to date. + * + * On successful conversion, *overflow is set to zero if it's not NULL. + * + * If the timestamptz is finite but out of the valid range for date, then: + * if overflow is NULL, we throw an out-of-range error. + * if overflow is not NULL, we store +1 or -1 there to indicate the sign + * of the overflow, and return the appropriate date infinity. + * + * Note: given the ranges of the types, overflow is only possible at + * the minimum end of the range, but we don't assume that in this code. + */ +DateADT +timestamptz2date_opt_overflow(TimestampTz timestamp, int *overflow) +{ + DateADT result; struct pg_tm tt, *tm = &tt; fsec_t fsec; int tz; + if (overflow) + *overflow = 0; + if (TIMESTAMP_IS_NOBEGIN(timestamp)) DATE_NOBEGIN(result); else if (TIMESTAMP_IS_NOEND(timestamp)) @@ -1420,14 +1486,30 @@ timestamptz_date(PG_FUNCTION_ARGS) else { if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0) + { + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + DATE_NOBEGIN(result); + } + else + { + *overflow = 1; /* not actually reachable */ + DATE_NOEND(result); + } + return result; + } ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); + } result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE; } - PG_RETURN_DATEADT(result); + return result; } diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c index 793d8a9adcc..680fee2a844 100644 --- a/src/backend/utils/adt/datetime.c +++ b/src/backend/utils/adt/datetime.c @@ -702,9 +702,18 @@ ParseFraction(char *cp, double *frac) } else { + /* + * On the other hand, let's reject anything that's not digits after + * the ".". strtod is happy with input like ".123e9", but that'd + * break callers' expectation that the result is in 0..1. (It's quite + * difficult to get here with such input, but not impossible.) + */ + if (strspn(cp + 1, "0123456789") != strlen(cp + 1)) + return DTERR_BAD_FORMAT; + errno = 0; *frac = strtod(cp, &cp); - /* check for parse failure */ + /* check for parse failure (probably redundant given prior check) */ if (*cp != '\0' || errno != 0) return DTERR_BAD_FORMAT; } @@ -2959,30 +2968,27 @@ DecodeNumberField(int len, char *str, int fmask, char *cp; /* + * This function was originally meant to cope only with DTK_NUMBER fields, + * but we now sometimes abuse it to parse (parts of) DTK_DATE fields, + * which can contain letters and other punctuation. Reject if it's not a + * valid DTK_NUMBER, that is digits and decimal point(s). (ParseFraction + * will reject if there's more than one decimal point.) + */ + if (strspn(str, "0123456789.") != len) + return DTERR_BAD_FORMAT; + + /* * Have a decimal point? Then this is a date or something with a seconds * field... */ if ((cp = strchr(str, '.')) != NULL) { - /* - * Can we use ParseFractionalSecond here? Not clear whether trailing - * junk should be rejected ... - */ - if (cp[1] == '\0') - { - /* avoid assuming that strtod will accept "." */ - *fsec = 0; - } - else - { - double frac; + int dterr; - errno = 0; - frac = strtod(cp, NULL); - if (errno != 0) - return DTERR_BAD_FORMAT; - *fsec = rint(frac * 1000000); - } + /* Convert the fraction and store at *fsec */ + dterr = ParseFractionalSecond(cp, fsec); + if (dterr) + return dterr; /* Now truncate off the fraction for further processing */ *cp = '\0'; len = strlen(str); diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c index 6d20ae07ae7..7b97d2be6ca 100644 --- a/src/backend/utils/adt/float.c +++ b/src/backend/utils/adt/float.c @@ -4065,10 +4065,11 @@ float84ge(PG_FUNCTION_ARGS) * in the histogram. width_bucket() returns an integer indicating the * bucket number that 'operand' belongs to in an equiwidth histogram * with the specified characteristics. An operand smaller than the - * lower bound is assigned to bucket 0. An operand greater than the - * upper bound is assigned to an additional bucket (with number - * count+1). We don't allow "NaN" for any of the float8 inputs, and we - * don't allow either of the histogram bounds to be +/- infinity. + * lower bound is assigned to bucket 0. An operand greater than or equal + * to the upper bound is assigned to an additional bucket (with number + * count+1). We don't allow the histogram bounds to be NaN or +/- infinity, + * but we do allow those values for the operand (taking NaN to be larger + * than any other value, as we do in comparisons). */ Datum width_bucket_float8(PG_FUNCTION_ARGS) @@ -4084,12 +4085,11 @@ width_bucket_float8(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), errmsg("count must be greater than zero"))); - if (isnan(operand) || isnan(bound1) || isnan(bound2)) + if (isnan(bound1) || isnan(bound2)) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), - errmsg("operand, lower bound, and upper bound cannot be NaN"))); + errmsg("lower and upper bounds cannot be NaN"))); - /* Note that we allow "operand" to be infinite */ if (isinf(bound1) || isinf(bound2)) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), @@ -4097,15 +4097,15 @@ width_bucket_float8(PG_FUNCTION_ARGS) if (bound1 < bound2) { - if (operand < bound1) - result = 0; - else if (operand >= bound2) + if (isnan(operand) || operand >= bound2) { if (pg_add_s32_overflow(count, 1, &result)) ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("integer out of range"))); } + else if (operand < bound1) + result = 0; else { if (!isinf(bound2 - bound1)) @@ -4135,7 +4135,7 @@ width_bucket_float8(PG_FUNCTION_ARGS) } else if (bound1 > bound2) { - if (operand > bound1) + if (isnan(operand) || operand > bound1) result = 0; else if (operand <= bound2) { diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 5bd1e01f7e4..1d05481181d 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -3590,14 +3590,15 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out, if (matched < 2) ereturn(escontext,, (errcode(ERRCODE_INVALID_DATETIME_FORMAT), - errmsg("invalid input string for \"Y,YYY\""))); + errmsg("invalid value \"%s\" for \"%s\"", + s, "Y,YYY"))); /* years += (millennia * 1000); */ if (pg_mul_s32_overflow(millennia, 1000, &millennia) || pg_add_s32_overflow(years, millennia, &years)) ereturn(escontext,, (errcode(ERRCODE_DATETIME_FIELD_OVERFLOW), - errmsg("value for \"Y,YYY\" in source string is out of range"))); + errmsg("value for \"%s\" in source string is out of range", "Y,YYY"))); if (!from_char_set_int(&out->year, years, n, escontext)) return; diff --git a/src/backend/utils/adt/inet_net_pton.c b/src/backend/utils/adt/inet_net_pton.c index ef2236d9f04..3b0db2a3799 100644 --- a/src/backend/utils/adt/inet_net_pton.c +++ b/src/backend/utils/adt/inet_net_pton.c @@ -115,8 +115,7 @@ inet_cidr_pton_ipv4(const char *src, u_char *dst, size_t size) src++; /* skip x or X. */ while ((ch = *src++) != '\0' && isxdigit((unsigned char) ch)) { - if (isupper((unsigned char) ch)) - ch = tolower((unsigned char) ch); + ch = pg_ascii_tolower((unsigned char) ch); n = strchr(xdigits, ch) - xdigits; assert(n >= 0 && n <= 15); if (dirty == 0) diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 7f4cf614585..4216ac17f43 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -98,7 +98,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale) else if (locale->is_default) return pg_tolower(c); else - return tolower_l(c, locale->info.lt); + return char_tolower(c, locale); } @@ -209,7 +209,17 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) * way. */ - if (pg_database_encoding_max_length() > 1 || (locale->provider == COLLPROVIDER_ICU)) + if (locale->ctype_is_c || + (char_tolower_enabled(locale) && + pg_database_encoding_max_length() == 1)) + { + p = VARDATA_ANY(pat); + plen = VARSIZE_ANY_EXHDR(pat); + s = VARDATA_ANY(str); + slen = VARSIZE_ANY_EXHDR(str); + return SB_IMatchText(s, slen, p, plen, locale); + } + else { pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation, PointerGetDatum(pat))); @@ -224,14 +234,6 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) else return MB_MatchText(s, slen, p, plen, 0); } - else - { - p = VARDATA_ANY(pat); - plen = VARSIZE_ANY_EXHDR(pat); - s = VARDATA_ANY(str); - slen = VARSIZE_ANY_EXHDR(str); - return SB_IMatchText(s, slen, p, plen, locale); - } } /* diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index 8fdc677371f..999f23f86d5 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -1495,13 +1495,8 @@ pattern_char_isalpha(char c, bool is_multibyte, { if (locale->ctype_is_c) return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); - else if (is_multibyte && IS_HIGHBIT_SET(c)) - return true; - else if (locale->provider != COLLPROVIDER_LIBC) - return IS_HIGHBIT_SET(c) || - (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); else - return isalpha_l((unsigned char) c, locale->info.lt); + return char_is_cased(c, locale); } diff --git a/src/backend/utils/adt/mcxtfuncs.c b/src/backend/utils/adt/mcxtfuncs.c index 7ec2c225016..fe6dce9cba3 100644 --- a/src/backend/utils/adt/mcxtfuncs.c +++ b/src/backend/utils/adt/mcxtfuncs.c @@ -15,27 +15,30 @@ #include "postgres.h" -#include "access/twophase.h" -#include "catalog/pg_authid_d.h" #include "funcapi.h" #include "mb/pg_wchar.h" -#include "miscadmin.h" #include "storage/proc.h" #include "storage/procarray.h" -#include "utils/acl.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/hsearch.h" -#include "utils/memutils.h" -#include "utils/wait_event_types.h" /* ---------- * The max bytes for showing identifiers of MemoryContext. * ---------- */ #define MEMORY_CONTEXT_IDENT_DISPLAY_SIZE 1024 -struct MemoryStatsBackendState *memCxtState = NULL; -struct MemoryStatsCtl *memCxtArea = NULL; + +/* + * MemoryContextId + * Used for storage of transient identifiers for + * pg_get_backend_memory_contexts. + */ +typedef struct MemoryContextId +{ + MemoryContext context; + int context_id; +} MemoryContextId; /* * int_list_to_array @@ -86,7 +89,7 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore, */ for (MemoryContext cur = context; cur != NULL; cur = cur->parent) { - MemoryStatsContextId *entry; + MemoryContextId *entry; bool found; entry = hash_search(context_id_lookup, &cur, HASH_FIND, &found); @@ -140,51 +143,36 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore, else nulls[1] = true; - type = ContextTypeToString(context->type); - - values[2] = CStringGetTextDatum(type); - values[3] = Int32GetDatum(list_length(path)); /* level */ - values[4] = int_list_to_array(path); - values[5] = Int64GetDatum(stat.totalspace); - values[6] = Int64GetDatum(stat.nblocks); - values[7] = Int64GetDatum(stat.freespace); - values[8] = Int64GetDatum(stat.freechunks); - values[9] = Int64GetDatum(stat.totalspace - stat.freespace); - - tuplestore_putvalues(tupstore, tupdesc, values, nulls); - list_free(path); -} - -/* - * ContextTypeToString - * Returns a textual representation of a context type - * - * This should cover the same types as MemoryContextIsValid. - */ -const char * -ContextTypeToString(NodeTag type) -{ - const char *context_type; - - switch (type) + switch (context->type) { case T_AllocSetContext: - context_type = "AllocSet"; + type = "AllocSet"; break; case T_GenerationContext: - context_type = "Generation"; + type = "Generation"; break; case T_SlabContext: - context_type = "Slab"; + type = "Slab"; break; case T_BumpContext: - context_type = "Bump"; + type = "Bump"; break; default: - context_type = "???"; + type = "???"; break; } - return context_type; + + values[2] = CStringGetTextDatum(type); + values[3] = Int32GetDatum(list_length(path)); /* level */ + values[4] = int_list_to_array(path); + values[5] = Int64GetDatum(stat.totalspace); + values[6] = Int64GetDatum(stat.nblocks); + values[7] = Int64GetDatum(stat.freespace); + values[8] = Int64GetDatum(stat.freechunks); + values[9] = Int64GetDatum(stat.totalspace - stat.freespace); + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + list_free(path); } /* @@ -201,7 +189,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS) HTAB *context_id_lookup; ctl.keysize = sizeof(MemoryContext); - ctl.entrysize = sizeof(MemoryStatsContextId); + ctl.entrysize = sizeof(MemoryContextId); ctl.hcxt = CurrentMemoryContext; context_id_lookup = hash_create("pg_get_backend_memory_contexts", @@ -228,7 +216,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS) foreach_ptr(MemoryContextData, cur, contexts) { - MemoryStatsContextId *entry; + MemoryContextId *entry; bool found; /* @@ -236,8 +224,8 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS) * PutMemoryContextsStatsTupleStore needs this to populate the "path" * column with the parent context_ids. */ - entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &cur, - HASH_ENTER, &found); + entry = (MemoryContextId *) hash_search(context_id_lookup, &cur, + HASH_ENTER, &found); entry->context_id = context_id++; Assert(!found); @@ -317,349 +305,3 @@ pg_log_backend_memory_contexts(PG_FUNCTION_ARGS) PG_RETURN_BOOL(true); } - -/* - * pg_get_process_memory_contexts - * Signal a backend or an auxiliary process to send its memory contexts, - * wait for the results and display them. - * - * By default, only superusers or users with ROLE_PG_READ_ALL_STATS are allowed - * to signal a process to return the memory contexts. This is because allowing - * any users to issue this request at an unbounded rate would cause lots of - * requests to be sent, which can lead to denial of service. Additional roles - * can be permitted with GRANT. - * - * On receipt of this signal, a backend or an auxiliary process sets the flag - * in the signal handler, which causes the next CHECK_FOR_INTERRUPTS() - * or process-specific interrupt handler to copy the memory context details - * to a dynamic shared memory space. - * - * We have defined a limit on DSA memory that could be allocated per process - - * if the process has more memory contexts than what can fit in the allocated - * size, the excess contexts are summarized and represented as cumulative total - * at the end of the buffer. - * - * After sending the signal, wait on a condition variable. The publishing - * backend, after copying the data to shared memory, sends signal on that - * condition variable. There is one condition variable per publishing backend. - * Once the condition variable is signalled, check if the latest memory context - * information is available and display. - * - * If the publishing backend does not respond before the condition variable - * times out, which is set to MEMSTATS_WAIT_TIMEOUT, retry given that there is - * time left within the timeout specified by the user, before giving up and - * returning previously published statistics, if any. If no previous statistics - * exist, return NULL. - */ -#define MEMSTATS_WAIT_TIMEOUT 100 -Datum -pg_get_process_memory_contexts(PG_FUNCTION_ARGS) -{ - int pid = PG_GETARG_INT32(0); - bool summary = PG_GETARG_BOOL(1); - double timeout = PG_GETARG_FLOAT8(2); - PGPROC *proc; - ProcNumber procNumber = INVALID_PROC_NUMBER; - bool proc_is_aux = false; - ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; - MemoryStatsEntry *memcxt_info; - TimestampTz start_timestamp; - - /* - * See if the process with given pid is a backend or an auxiliary process - * and remember the type for when we requery the process later. - */ - proc = BackendPidGetProc(pid); - if (proc == NULL) - { - proc = AuxiliaryPidGetProc(pid); - proc_is_aux = true; - } - - /* - * BackendPidGetProc() and AuxiliaryPidGetProc() return NULL if the pid - * isn't valid; this is however not a problem and leave with a WARNING. - * See comment in pg_log_backend_memory_contexts for a discussion on this. - */ - if (proc == NULL) - { - /* - * This is just a warning so a loop-through-resultset will not abort - * if one backend terminated on its own during the run. - */ - ereport(WARNING, - errmsg("PID %d is not a PostgreSQL server process", pid)); - PG_RETURN_NULL(); - } - - InitMaterializedSRF(fcinfo, 0); - - procNumber = GetNumberFromPGProc(proc); - - LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE); - memCxtState[procNumber].summary = summary; - LWLockRelease(&memCxtState[procNumber].lw_lock); - - start_timestamp = GetCurrentTimestamp(); - - /* - * Send a signal to a PostgreSQL process, informing it we want it to - * produce information about its memory contexts. - */ - if (SendProcSignal(pid, PROCSIG_GET_MEMORY_CONTEXT, procNumber) < 0) - { - ereport(WARNING, - errmsg("could not send signal to process %d: %m", pid)); - PG_RETURN_NULL(); - } - - /* - * Even if the proc has published statistics, the may not be due to the - * current request, but previously published stats. Check if the stats - * are updated by comparing the timestamp, if the stats are newer than our - * previously recorded timestamp from before sending the procsignal, they - * must by definition be updated. Wait for the timeout specified by the - * user, following which display old statistics if available or return - * NULL. - */ - while (1) - { - long msecs; - - /* - * We expect to come out of sleep when the requested process has - * finished publishing the statistics, verified using the valid DSA - * pointer. - * - * Make sure that the information belongs to pid we requested - * information for, Otherwise loop back and wait for the server - * process to finish publishing statistics. - */ - LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE); - - /* - * Note in procnumber.h file says that a procNumber can be re-used for - * a different backend immediately after a backend exits. In case an - * old process' data was there and not updated by the current process - * in the slot identified by the procNumber, the pid of the requested - * process and the proc_id might not match. - */ - if (memCxtState[procNumber].proc_id == pid) - { - /* - * Break if the latest stats have been read, indicated by - * statistics timestamp being newer than the current request - * timestamp. - */ - msecs = TimestampDifferenceMilliseconds(start_timestamp, - memCxtState[procNumber].stats_timestamp); - - if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer) - && msecs > 0) - break; - } - LWLockRelease(&memCxtState[procNumber].lw_lock); - - /* - * Recheck the state of the backend before sleeping on the condition - * variable to ensure the process is still alive. Only check the - * relevant process type based on the earlier PID check. - */ - if (proc_is_aux) - proc = AuxiliaryPidGetProc(pid); - else - proc = BackendPidGetProc(pid); - - /* - * The process ending during memory context processing is not an - * error. - */ - if (proc == NULL) - { - ereport(WARNING, - errmsg("PID %d is no longer a PostgreSQL server process", - pid)); - PG_RETURN_NULL(); - } - - msecs = TimestampDifferenceMilliseconds(start_timestamp, GetCurrentTimestamp()); - - /* - * If we haven't already exceeded the timeout value, sleep for the - * remainder of the timeout on the condition variable. - */ - if (msecs > 0 && msecs < (timeout * 1000)) - { - /* - * Wait for the timeout as defined by the user. If no updated - * statistics are available within the allowed time then display - * previously published statistics if there are any. If no - * previous statistics are available then return NULL. The timer - * is defined in milliseconds since that's what the condition - * variable sleep uses. - */ - if (ConditionVariableTimedSleep(&memCxtState[procNumber].memcxt_cv, - ((timeout * 1000) - msecs), WAIT_EVENT_MEM_CXT_PUBLISH)) - { - LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE); - /* Displaying previously published statistics if available */ - if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer)) - break; - else - { - LWLockRelease(&memCxtState[procNumber].lw_lock); - PG_RETURN_NULL(); - } - } - } - else - { - LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE); - /* Displaying previously published statistics if available */ - if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer)) - break; - else - { - LWLockRelease(&memCxtState[procNumber].lw_lock); - PG_RETURN_NULL(); - } - } - } - - /* - * We should only reach here with a valid DSA handle, either containing - * updated statistics or previously published statistics (identified by - * the timestamp. - */ - Assert(memCxtArea->memstats_dsa_handle != DSA_HANDLE_INVALID); - /* Attach to the dsa area if we have not already done so */ - if (MemoryStatsDsaArea == NULL) - { - MemoryContext oldcontext = CurrentMemoryContext; - - MemoryContextSwitchTo(TopMemoryContext); - MemoryStatsDsaArea = dsa_attach(memCxtArea->memstats_dsa_handle); - MemoryContextSwitchTo(oldcontext); - dsa_pin_mapping(MemoryStatsDsaArea); - } - - /* - * Backend has finished publishing the stats, project them. - */ - memcxt_info = (MemoryStatsEntry *) - dsa_get_address(MemoryStatsDsaArea, memCxtState[procNumber].memstats_dsa_pointer); - -#define PG_GET_PROCESS_MEMORY_CONTEXTS_COLS 12 - for (int i = 0; i < memCxtState[procNumber].total_stats; i++) - { - ArrayType *path_array; - int path_length; - Datum values[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS]; - bool nulls[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS]; - char *name; - char *ident; - Datum *path_datum = NULL; - int *path_int = NULL; - - memset(values, 0, sizeof(values)); - memset(nulls, 0, sizeof(nulls)); - - if (DsaPointerIsValid(memcxt_info[i].name)) - { - name = (char *) dsa_get_address(MemoryStatsDsaArea, memcxt_info[i].name); - values[0] = CStringGetTextDatum(name); - } - else - nulls[0] = true; - - if (DsaPointerIsValid(memcxt_info[i].ident)) - { - ident = (char *) dsa_get_address(MemoryStatsDsaArea, memcxt_info[i].ident); - values[1] = CStringGetTextDatum(ident); - } - else - nulls[1] = true; - - values[2] = CStringGetTextDatum(ContextTypeToString(memcxt_info[i].type)); - - path_length = memcxt_info[i].path_length; - path_datum = (Datum *) palloc(path_length * sizeof(Datum)); - if (DsaPointerIsValid(memcxt_info[i].path)) - { - path_int = (int *) dsa_get_address(MemoryStatsDsaArea, memcxt_info[i].path); - for (int j = 0; j < path_length; j++) - path_datum[j] = Int32GetDatum(path_int[j]); - path_array = construct_array_builtin(path_datum, path_length, INT4OID); - values[3] = PointerGetDatum(path_array); - } - else - nulls[3] = true; - - values[4] = Int32GetDatum(memcxt_info[i].levels); - values[5] = Int64GetDatum(memcxt_info[i].totalspace); - values[6] = Int64GetDatum(memcxt_info[i].nblocks); - values[7] = Int64GetDatum(memcxt_info[i].freespace); - values[8] = Int64GetDatum(memcxt_info[i].freechunks); - values[9] = Int64GetDatum(memcxt_info[i].totalspace - - memcxt_info[i].freespace); - values[10] = Int32GetDatum(memcxt_info[i].num_agg_stats); - values[11] = TimestampTzGetDatum(memCxtState[procNumber].stats_timestamp); - - tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, - values, nulls); - } - LWLockRelease(&memCxtState[procNumber].lw_lock); - - ConditionVariableCancelSleep(); - - PG_RETURN_NULL(); -} - -Size -MemoryContextReportingShmemSize(void) -{ - Size sz = 0; - Size TotalProcs = 0; - - TotalProcs = add_size(TotalProcs, NUM_AUXILIARY_PROCS); - TotalProcs = add_size(TotalProcs, MaxBackends); - sz = add_size(sz, mul_size(TotalProcs, sizeof(MemoryStatsBackendState))); - - sz = add_size(sz, sizeof(MemoryStatsCtl)); - - return sz; -} - -/* - * Initialize shared memory for displaying memory context statistics - */ -void -MemoryContextReportingShmemInit(void) -{ - bool found; - - memCxtArea = (MemoryStatsCtl *) - ShmemInitStruct("MemoryStatsCtl", - sizeof(MemoryStatsCtl), &found); - - if (!found) - { - LWLockInitialize(&memCxtArea->lw_lock, LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE); - memCxtArea->memstats_dsa_handle = DSA_HANDLE_INVALID; - } - - memCxtState = (MemoryStatsBackendState *) - ShmemInitStruct("MemoryStatsBackendState", - ((MaxBackends + NUM_AUXILIARY_PROCS) * sizeof(MemoryStatsBackendState)), - &found); - - if (found) - return; - - for (int i = 0; i < (MaxBackends + NUM_AUXILIARY_PROCS); i++) - { - ConditionVariableInit(&memCxtState[i].memcxt_cv); - LWLockInitialize(&memCxtState[i].lw_lock, LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC); - memCxtState[i].memstats_dsa_pointer = InvalidDsaPointer; - } -} diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build index 244f48f4fd7..ed9bbd7b926 100644 --- a/src/backend/utils/adt/meson.build +++ b/src/backend/utils/adt/meson.build @@ -12,6 +12,7 @@ backend_sources += files( 'arrayutils.c', 'ascii.c', 'bool.c', + 'bytea.c', 'cash.c', 'char.c', 'cryptohashfuncs.c', diff --git a/src/backend/utils/adt/network.c b/src/backend/utils/adt/network.c index f03fcc1147b..9fd211b2d45 100644 --- a/src/backend/utils/adt/network.c +++ b/src/backend/utils/adt/network.c @@ -12,8 +12,6 @@ #include <netinet/in.h> #include <arpa/inet.h> -#include "access/stratnum.h" -#include "catalog/pg_opfamily.h" #include "catalog/pg_type.h" #include "common/hashfn.h" #include "common/ip.h" diff --git a/src/backend/utils/adt/network_spgist.c b/src/backend/utils/adt/network_spgist.c index a84747d9275..602276a35c3 100644 --- a/src/backend/utils/adt/network_spgist.c +++ b/src/backend/utils/adt/network_spgist.c @@ -37,7 +37,6 @@ #include "catalog/pg_type.h" #include "utils/fmgrprotos.h" #include "utils/inet.h" -#include "varatt.h" static int inet_spg_node_number(const inet *val, int commonbits); diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index 40dcbc7b671..c9233565d57 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -1958,9 +1958,11 @@ generate_series_numeric_support(PG_FUNCTION_ARGS) * in the histogram. width_bucket() returns an integer indicating the * bucket number that 'operand' belongs to in an equiwidth histogram * with the specified characteristics. An operand smaller than the - * lower bound is assigned to bucket 0. An operand greater than the - * upper bound is assigned to an additional bucket (with number - * count+1). We don't allow "NaN" for any of the numeric arguments. + * lower bound is assigned to bucket 0. An operand greater than or equal + * to the upper bound is assigned to an additional bucket (with number + * count+1). We don't allow the histogram bounds to be NaN or +/- infinity, + * but we do allow those values for the operand (taking NaN to be larger + * than any other value, as we do in comparisons). */ Datum width_bucket_numeric(PG_FUNCTION_ARGS) @@ -1978,17 +1980,13 @@ width_bucket_numeric(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), errmsg("count must be greater than zero"))); - if (NUMERIC_IS_SPECIAL(operand) || - NUMERIC_IS_SPECIAL(bound1) || - NUMERIC_IS_SPECIAL(bound2)) + if (NUMERIC_IS_SPECIAL(bound1) || NUMERIC_IS_SPECIAL(bound2)) { - if (NUMERIC_IS_NAN(operand) || - NUMERIC_IS_NAN(bound1) || - NUMERIC_IS_NAN(bound2)) + if (NUMERIC_IS_NAN(bound1) || NUMERIC_IS_NAN(bound2)) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), - errmsg("operand, lower bound, and upper bound cannot be NaN"))); - /* We allow "operand" to be infinite; cmp_numerics will cope */ + errmsg("lower and upper bounds cannot be NaN"))); + if (NUMERIC_IS_INF(bound1) || NUMERIC_IS_INF(bound2)) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index a858f27cadc..97c2ac1faf9 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -41,11 +41,11 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" #include "utils/builtins.h" -#include "utils/formatting.h" #include "utils/guc_hooks.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/pg_locale.h" +#include "utils/relcache.h" #include "utils/syscache.h" #ifdef WIN32 @@ -79,31 +79,6 @@ extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context); extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context); extern char *get_collation_actual_version_libc(const char *collcollate); -extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strfold_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - -extern size_t strlower_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strfold_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - -extern size_t strlower_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - /* GUC settings */ char *locale_messages; char *locale_monetary; @@ -1092,6 +1067,9 @@ create_pg_locale(Oid collid, MemoryContext context) Assert((result->collate_is_c && result->collate == NULL) || (!result->collate_is_c && result->collate != NULL)); + Assert((result->ctype_is_c && result->ctype == NULL) || + (!result->ctype_is_c && result->ctype != NULL)); + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion, &isnull); if (!isnull) @@ -1256,77 +1234,31 @@ size_t pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strlower_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strlower_icu(dst, dstsize, src, srclen, locale); -#endif - else if (locale->provider == COLLPROVIDER_LIBC) - return strlower_libc(dst, dstsize, src, srclen, locale); - else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strlower(dst, dstsize, src, srclen, locale); } size_t pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strtitle_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strtitle_icu(dst, dstsize, src, srclen, locale); -#endif - else if (locale->provider == COLLPROVIDER_LIBC) - return strtitle_libc(dst, dstsize, src, srclen, locale); - else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strtitle(dst, dstsize, src, srclen, locale); } size_t pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strupper_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strupper_icu(dst, dstsize, src, srclen, locale); -#endif - else if (locale->provider == COLLPROVIDER_LIBC) - return strupper_libc(dst, dstsize, src, srclen, locale); - else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strupper(dst, dstsize, src, srclen, locale); } size_t pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strfold_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strfold_icu(dst, dstsize, src, srclen, locale); -#endif - /* for libc, just use strlower */ - else if (locale->provider == COLLPROVIDER_LIBC) - return strlower_libc(dst, dstsize, src, srclen, locale); + if (locale->ctype->strfold) + return locale->ctype->strfold(dst, dstsize, src, srclen, locale); else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strlower(dst, dstsize, src, srclen, locale); } /* @@ -1464,6 +1396,41 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, } /* + * char_is_cased() + * + * Fuzzy test of whether the given char is case-varying or not. The argument + * is a single byte, so in a multibyte encoding, just assume any non-ASCII + * char is case-varying. + */ +bool +char_is_cased(char ch, pg_locale_t locale) +{ + return locale->ctype->char_is_cased(ch, locale); +} + +/* + * char_tolower_enabled() + * + * Does the provider support char_tolower()? + */ +bool +char_tolower_enabled(pg_locale_t locale) +{ + return (locale->ctype->char_tolower != NULL); +} + +/* + * char_tolower() + * + * Convert char (single-byte encoding) to lowercase. + */ +char +char_tolower(unsigned char ch, pg_locale_t locale) +{ + return locale->ctype->char_tolower(ch, locale); +} + +/* * Return required encoding ID for the given locale, or -1 if any encoding is * valid for the locale. */ diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c index f51768830cd..0c9fbdb40f2 100644 --- a/src/backend/utils/adt/pg_locale_builtin.c +++ b/src/backend/utils/adt/pg_locale_builtin.c @@ -18,22 +18,12 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" #include "utils/builtins.h" -#include "utils/memutils.h" #include "utils/pg_locale.h" #include "utils/syscache.h" extern pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context); extern char *get_collation_actual_version_builtin(const char *collcollate); -extern size_t strlower_builtin(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_builtin(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_builtin(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strfold_builtin(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); - struct WordBoundaryState { @@ -77,7 +67,7 @@ initcap_wbnext(void *state) return wbstate->len; } -size_t +static size_t strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -85,7 +75,7 @@ strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, locale->info.builtin.casemap_full); } -size_t +static size_t strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -103,7 +93,7 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, initcap_wbnext, &wbstate); } -size_t +static size_t strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -111,7 +101,7 @@ strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, locale->info.builtin.casemap_full); } -size_t +static size_t strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -119,6 +109,98 @@ strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, locale->info.builtin.casemap_full); } +static bool +wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isdigit(wc, !locale->info.builtin.casemap_full); +} + +static bool +wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isalpha(wc); +} + +static bool +wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isalnum(wc, !locale->info.builtin.casemap_full); +} + +static bool +wc_isupper_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isupper(wc); +} + +static bool +wc_islower_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_islower(wc); +} + +static bool +wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isgraph(wc); +} + +static bool +wc_isprint_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isprint(wc); +} + +static bool +wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_ispunct(wc, !locale->info.builtin.casemap_full); +} + +static bool +wc_isspace_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isspace(wc); +} + +static bool +char_is_cased_builtin(char ch, pg_locale_t locale) +{ + return IS_HIGHBIT_SET(ch) || + (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); +} + +static pg_wchar +wc_toupper_builtin(pg_wchar wc, pg_locale_t locale) +{ + return unicode_uppercase_simple(wc); +} + +static pg_wchar +wc_tolower_builtin(pg_wchar wc, pg_locale_t locale) +{ + return unicode_lowercase_simple(wc); +} + +static const struct ctype_methods ctype_methods_builtin = { + .strlower = strlower_builtin, + .strtitle = strtitle_builtin, + .strupper = strupper_builtin, + .strfold = strfold_builtin, + .wc_isdigit = wc_isdigit_builtin, + .wc_isalpha = wc_isalpha_builtin, + .wc_isalnum = wc_isalnum_builtin, + .wc_isupper = wc_isupper_builtin, + .wc_islower = wc_islower_builtin, + .wc_isgraph = wc_isgraph_builtin, + .wc_isprint = wc_isprint_builtin, + .wc_ispunct = wc_ispunct_builtin, + .wc_isspace = wc_isspace_builtin, + .char_is_cased = char_is_cased_builtin, + .wc_tolower = wc_tolower_builtin, + .wc_toupper = wc_toupper_builtin, +}; + pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context) { @@ -158,10 +240,11 @@ create_pg_locale_builtin(Oid collid, MemoryContext context) result->info.builtin.locale = MemoryContextStrdup(context, locstr); result->info.builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0); - result->provider = COLLPROVIDER_BUILTIN; result->deterministic = true; result->collate_is_c = true; result->ctype_is_c = (strcmp(locstr, "C") == 0); + if (!result->ctype_is_c) + result->ctype = &ctype_methods_builtin; return result; } diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c index a32c32a0744..96741e08269 100644 --- a/src/backend/utils/adt/pg_locale_icu.c +++ b/src/backend/utils/adt/pg_locale_icu.c @@ -48,19 +48,22 @@ #define TEXTBUFLEN 1024 extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context); -extern size_t strlower_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strfold_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); #ifdef USE_ICU extern UCollator *pg_ucol_open(const char *loc_str); +static size_t strlower_icu(char *dest, size_t destsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static size_t strtitle_icu(char *dest, size_t destsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static size_t strupper_icu(char *dest, size_t destsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static size_t strfold_icu(char *dest, size_t destsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static int strncoll_icu(const char *arg1, ssize_t len1, + const char *arg2, ssize_t len2, + pg_locale_t locale); static size_t strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale); @@ -118,6 +121,25 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity, const char *locale, UErrorCode *pErrorCode); +static bool +char_is_cased_icu(char ch, pg_locale_t locale) +{ + return IS_HIGHBIT_SET(ch) || + (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); +} + +static pg_wchar +toupper_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_toupper(wc); +} + +static pg_wchar +tolower_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_tolower(wc); +} + static const struct collate_methods collate_methods_icu = { .strncoll = strncoll_icu, .strnxfrm = strnxfrm_icu, @@ -136,6 +158,78 @@ static const struct collate_methods collate_methods_icu_utf8 = { .strxfrm_is_safe = true, }; +static bool +wc_isdigit_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isdigit(wc); +} + +static bool +wc_isalpha_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isalpha(wc); +} + +static bool +wc_isalnum_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isalnum(wc); +} + +static bool +wc_isupper_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isupper(wc); +} + +static bool +wc_islower_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_islower(wc); +} + +static bool +wc_isgraph_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isgraph(wc); +} + +static bool +wc_isprint_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isprint(wc); +} + +static bool +wc_ispunct_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_ispunct(wc); +} + +static bool +wc_isspace_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isspace(wc); +} + +static const struct ctype_methods ctype_methods_icu = { + .strlower = strlower_icu, + .strtitle = strtitle_icu, + .strupper = strupper_icu, + .strfold = strfold_icu, + .wc_isdigit = wc_isdigit_icu, + .wc_isalpha = wc_isalpha_icu, + .wc_isalnum = wc_isalnum_icu, + .wc_isupper = wc_isupper_icu, + .wc_islower = wc_islower_icu, + .wc_isgraph = wc_isgraph_icu, + .wc_isprint = wc_isprint_icu, + .wc_ispunct = wc_ispunct_icu, + .wc_isspace = wc_isspace_icu, + .char_is_cased = char_is_cased_icu, + .wc_toupper = toupper_icu, + .wc_tolower = tolower_icu, +}; #endif pg_locale_t @@ -198,7 +292,6 @@ create_pg_locale_icu(Oid collid, MemoryContext context) result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); result->info.icu.locale = MemoryContextStrdup(context, iculocstr); result->info.icu.ucol = collator; - result->provider = COLLPROVIDER_ICU; result->deterministic = deterministic; result->collate_is_c = false; result->ctype_is_c = false; @@ -206,6 +299,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context) result->collate = &collate_methods_icu_utf8; else result->collate = &collate_methods_icu; + result->ctype = &ctype_methods_icu; return result; #else @@ -379,7 +473,7 @@ make_icu_collator(const char *iculocstr, const char *icurules) } } -size_t +static size_t strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -399,7 +493,7 @@ strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, return result_len; } -size_t +static size_t strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -419,7 +513,7 @@ strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, return result_len; } -size_t +static size_t strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -439,7 +533,7 @@ strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, return result_len; } -size_t +static size_t strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -474,8 +568,6 @@ strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2 int result; UErrorCode status; - Assert(locale->provider == COLLPROVIDER_ICU); - Assert(GetDatabaseEncoding() == PG_UTF8); status = U_ZERO_ERROR; @@ -503,8 +595,6 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t uchar_bsize; Size result_bsize; - Assert(locale->provider == COLLPROVIDER_ICU); - init_icu_converter(); ulen = uchar_length(icu_converter, src, srclen); @@ -549,8 +639,6 @@ strnxfrm_prefix_icu_utf8(char *dest, size_t destsize, uint32_t state[2]; UErrorCode status; - Assert(locale->provider == COLLPROVIDER_ICU); - Assert(GetDatabaseEncoding() == PG_UTF8); uiter_setUTF8(&iter, src, srclen); @@ -749,8 +837,6 @@ strncoll_icu(const char *arg1, ssize_t len1, *uchar2; int result; - Assert(locale->provider == COLLPROVIDER_ICU); - /* if encoding is UTF8, use more efficient strncoll_icu_utf8 */ #ifdef HAVE_UCOL_STRCOLLUTF8 Assert(GetDatabaseEncoding() != PG_UTF8); @@ -799,8 +885,6 @@ strnxfrm_prefix_icu(char *dest, size_t destsize, size_t uchar_bsize; Size result_bsize; - Assert(locale->provider == COLLPROVIDER_ICU); - /* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */ Assert(GetDatabaseEncoding() != PG_UTF8); diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index 199857e22db..e9f9fc1e369 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -34,6 +34,46 @@ #endif /* + * For the libc provider, to provide as much functionality as possible on a + * variety of platforms without going so far as to implement everything from + * scratch, we use several implementation strategies depending on the + * situation: + * + * 1. In C/POSIX collations, we use hard-wired code. We can't depend on + * the <ctype.h> functions since those will obey LC_CTYPE. Note that these + * collations don't give a fig about multibyte characters. + * + * 2. When working in UTF8 encoding, we use the <wctype.h> functions. + * This assumes that every platform uses Unicode codepoints directly + * as the wchar_t representation of Unicode. (XXX: ICU makes this assumption + * even for non-UTF8 encodings, which may be a problem.) On some platforms + * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF. + * + * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar + * values up to 255, and punt for values above that. This is 100% correct + * only in single-byte encodings such as LATINn. However, non-Unicode + * multibyte encodings are mostly Far Eastern character sets for which the + * properties being tested here aren't very relevant for higher code values + * anyway. The difficulty with using the <wctype.h> functions with + * non-Unicode multibyte encodings is that we can have no certainty that + * the platform's wchar_t representation matches what we do in pg_wchar + * conversions. + * + * As a special case, in the "default" collation, (2) and (3) force ASCII + * letters to follow ASCII upcase/downcase rules, while in a non-default + * collation we just let the library functions do what they will. The case + * where this matters is treatment of I/i in Turkish, and the behavior is + * meant to match the upper()/lower() SQL functions. + * + * We store the active collation setting in static variables. In principle + * it could be passed down to here via the regex library's "struct vars" data + * structure; but that would require somewhat invasive changes in the regex + * library, and right now there's no real benefit to be gained from that. + * + * NB: the coding here assumes pg_wchar is an unsigned type. + */ + +/* * Size of stack buffer to use for string transformations, used to avoid heap * allocations in typical cases. This should be large enough that most strings * will fit, but small enough that we feel comfortable putting it on the @@ -43,13 +83,6 @@ extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context); -extern size_t strlower_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - static int strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale); @@ -85,6 +118,251 @@ static size_t strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale); +static bool +wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isdigit_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isalpha_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isalnum_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isupper_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return islower_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isgraph_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isprint_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return ispunct_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isspace_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswdigit_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswalpha_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswalnum_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswupper_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswlower_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswgraph_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswprint_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswpunct_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswspace_l((wint_t) wc, locale->info.lt); +} + +static char +char_tolower_libc(unsigned char ch, pg_locale_t locale) +{ + Assert(pg_database_encoding_max_length() == 1); + return tolower_l(ch, locale->info.lt); +} + +static bool +char_is_cased_libc(char ch, pg_locale_t locale) +{ + bool is_multibyte = pg_database_encoding_max_length() > 1; + + if (is_multibyte && IS_HIGHBIT_SET(ch)) + return true; + else + return isalpha_l((unsigned char) ch, locale->info.lt); +} + +static pg_wchar +toupper_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() != PG_UTF8); + + /* force C behavior for ASCII characters, per comments above */ + if (locale->is_default && wc <= (pg_wchar) 127) + return pg_ascii_toupper((unsigned char) wc); + if (wc <= (pg_wchar) UCHAR_MAX) + return toupper_l((unsigned char) wc, locale->info.lt); + else + return wc; +} + +static pg_wchar +toupper_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() == PG_UTF8); + + /* force C behavior for ASCII characters, per comments above */ + if (locale->is_default && wc <= (pg_wchar) 127) + return pg_ascii_toupper((unsigned char) wc); + if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF) + return towupper_l((wint_t) wc, locale->info.lt); + else + return wc; +} + +static pg_wchar +tolower_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() != PG_UTF8); + + /* force C behavior for ASCII characters, per comments above */ + if (locale->is_default && wc <= (pg_wchar) 127) + return pg_ascii_tolower((unsigned char) wc); + if (wc <= (pg_wchar) UCHAR_MAX) + return tolower_l((unsigned char) wc, locale->info.lt); + else + return wc; +} + +static pg_wchar +tolower_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() == PG_UTF8); + + /* force C behavior for ASCII characters, per comments above */ + if (locale->is_default && wc <= (pg_wchar) 127) + return pg_ascii_tolower((unsigned char) wc); + if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF) + return towlower_l((wint_t) wc, locale->info.lt); + else + return wc; +} + +static const struct ctype_methods ctype_methods_libc_sb = { + .strlower = strlower_libc_sb, + .strtitle = strtitle_libc_sb, + .strupper = strupper_libc_sb, + .wc_isdigit = wc_isdigit_libc_sb, + .wc_isalpha = wc_isalpha_libc_sb, + .wc_isalnum = wc_isalnum_libc_sb, + .wc_isupper = wc_isupper_libc_sb, + .wc_islower = wc_islower_libc_sb, + .wc_isgraph = wc_isgraph_libc_sb, + .wc_isprint = wc_isprint_libc_sb, + .wc_ispunct = wc_ispunct_libc_sb, + .wc_isspace = wc_isspace_libc_sb, + .char_is_cased = char_is_cased_libc, + .char_tolower = char_tolower_libc, + .wc_toupper = toupper_libc_sb, + .wc_tolower = tolower_libc_sb, + .max_chr = UCHAR_MAX, +}; + +/* + * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but + * single-byte semantics for pattern matching. + */ +static const struct ctype_methods ctype_methods_libc_other_mb = { + .strlower = strlower_libc_mb, + .strtitle = strtitle_libc_mb, + .strupper = strupper_libc_mb, + .wc_isdigit = wc_isdigit_libc_sb, + .wc_isalpha = wc_isalpha_libc_sb, + .wc_isalnum = wc_isalnum_libc_sb, + .wc_isupper = wc_isupper_libc_sb, + .wc_islower = wc_islower_libc_sb, + .wc_isgraph = wc_isgraph_libc_sb, + .wc_isprint = wc_isprint_libc_sb, + .wc_ispunct = wc_ispunct_libc_sb, + .wc_isspace = wc_isspace_libc_sb, + .char_is_cased = char_is_cased_libc, + .char_tolower = char_tolower_libc, + .wc_toupper = toupper_libc_sb, + .wc_tolower = tolower_libc_sb, + .max_chr = UCHAR_MAX, +}; + +static const struct ctype_methods ctype_methods_libc_utf8 = { + .strlower = strlower_libc_mb, + .strtitle = strtitle_libc_mb, + .strupper = strupper_libc_mb, + .wc_isdigit = wc_isdigit_libc_mb, + .wc_isalpha = wc_isalpha_libc_mb, + .wc_isalnum = wc_isalnum_libc_mb, + .wc_isupper = wc_isupper_libc_mb, + .wc_islower = wc_islower_libc_mb, + .wc_isgraph = wc_isgraph_libc_mb, + .wc_isprint = wc_isprint_libc_mb, + .wc_ispunct = wc_ispunct_libc_mb, + .wc_isspace = wc_isspace_libc_mb, + .char_is_cased = char_is_cased_libc, + .char_tolower = char_tolower_libc, + .wc_toupper = toupper_libc_mb, + .wc_tolower = tolower_libc_mb, +}; + static const struct collate_methods collate_methods_libc = { .strncoll = strncoll_libc, .strnxfrm = strnxfrm_libc, @@ -119,36 +397,6 @@ static const struct collate_methods collate_methods_libc_win32_utf8 = { }; #endif -size_t -strlower_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale) -{ - if (pg_database_encoding_max_length() > 1) - return strlower_libc_mb(dst, dstsize, src, srclen, locale); - else - return strlower_libc_sb(dst, dstsize, src, srclen, locale); -} - -size_t -strtitle_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale) -{ - if (pg_database_encoding_max_length() > 1) - return strtitle_libc_mb(dst, dstsize, src, srclen, locale); - else - return strtitle_libc_sb(dst, dstsize, src, srclen, locale); -} - -size_t -strupper_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale) -{ - if (pg_database_encoding_max_length() > 1) - return strupper_libc_mb(dst, dstsize, src, srclen, locale); - else - return strupper_libc_sb(dst, dstsize, src, srclen, locale); -} - static size_t strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) @@ -465,7 +713,6 @@ create_pg_locale_libc(Oid collid, MemoryContext context) loc = make_libc_collator(collate, ctype); result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); - result->provider = COLLPROVIDER_LIBC; result->deterministic = true; result->collate_is_c = (strcmp(collate, "C") == 0) || (strcmp(collate, "POSIX") == 0); @@ -481,6 +728,15 @@ create_pg_locale_libc(Oid collid, MemoryContext context) #endif result->collate = &collate_methods_libc; } + if (!result->ctype_is_c) + { + if (GetDatabaseEncoding() == PG_UTF8) + result->ctype = &ctype_methods_libc_utf8; + else if (pg_database_encoding_max_length() > 1) + result->ctype = &ctype_methods_libc_other_mb; + else + result->ctype = &ctype_methods_libc_sb; + } return result; } @@ -576,8 +832,6 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, const char *arg2n; int result; - Assert(locale->provider == COLLPROVIDER_LIBC); - if (bufsize1 + bufsize2 > TEXTBUFLEN) buf = palloc(bufsize1 + bufsize2); @@ -632,8 +886,6 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t bufsize = srclen + 1; size_t result; - Assert(locale->provider == COLLPROVIDER_LIBC); - if (srclen == -1) return strxfrm_l(dest, src, destsize, locale->info.lt); @@ -742,7 +994,6 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2, int r; int result; - Assert(locale->provider == COLLPROVIDER_LIBC); Assert(GetDatabaseEncoding() == PG_UTF8); if (len1 == -1) diff --git a/src/backend/utils/adt/pg_lsn.c b/src/backend/utils/adt/pg_lsn.c index 16311590a14..12de2446f5b 100644 --- a/src/backend/utils/adt/pg_lsn.c +++ b/src/backend/utils/adt/pg_lsn.c @@ -83,7 +83,7 @@ pg_lsn_out(PG_FUNCTION_ARGS) char buf[MAXPG_LSNLEN + 1]; char *result; - snprintf(buf, sizeof buf, "%X/%X", LSN_FORMAT_ARGS(lsn)); + snprintf(buf, sizeof buf, "%X/%08X", LSN_FORMAT_ARGS(lsn)); result = pstrdup(buf); PG_RETURN_CSTRING(result); } diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 97af7c6554f..1c12ddbae49 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -640,10 +640,10 @@ pg_stat_get_activity(PG_FUNCTION_ARGS) values[28] = BoolGetDatum(false); /* GSS credentials not * delegated */ } - if (beentry->st_query_id == 0) + if (beentry->st_query_id == INT64CONST(0)) nulls[30] = true; else - values[30] = UInt64GetDatum(beentry->st_query_id); + values[30] = Int64GetDatum(beentry->st_query_id); } else { @@ -1510,7 +1510,7 @@ pg_stat_io_build_tuples(ReturnSetInfo *rsinfo, bktype_stats->bytes[io_obj][io_context][io_op]; /* Convert to numeric */ - snprintf(buf, sizeof buf, UINT64_FORMAT, byte); + snprintf(buf, sizeof buf, INT64_FORMAT, byte); values[byte_idx] = DirectFunctionCall3(numeric_in, CStringGetDatum(buf), ObjectIdGetDatum(0), diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index edee1f7880b..6e2864cbbda 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -773,8 +773,11 @@ similar_escape_internal(text *pat_text, text *esc_text) int plen, elen; bool afterescape = false; - bool incharclass = false; int nquotes = 0; + int charclass_depth = 0; /* Nesting level of character classes, + * encompassed by square brackets */ + int charclass_start = 0; /* State of the character class start, + * for carets */ p = VARDATA_ANY(pat_text); plen = VARSIZE_ANY_EXHDR(pat_text); @@ -904,7 +907,7 @@ similar_escape_internal(text *pat_text, text *esc_text) /* fast path */ if (afterescape) { - if (pchar == '"' && !incharclass) /* escape-double-quote? */ + if (pchar == '"' && charclass_depth < 1) /* escape-double-quote? */ { /* emit appropriate part separator, per notes above */ if (nquotes == 0) @@ -953,18 +956,41 @@ similar_escape_internal(text *pat_text, text *esc_text) /* SQL escape character; do not send to output */ afterescape = true; } - else if (incharclass) + else if (charclass_depth > 0) { if (pchar == '\\') *r++ = '\\'; *r++ = pchar; - if (pchar == ']') - incharclass = false; + + /* + * Ignore a closing bracket at the start of a character class. + * Such a bracket is taken literally rather than closing the + * class. "charclass_start" is 1 right at the beginning of a + * class and 2 after an initial caret. + */ + if (pchar == ']' && charclass_start > 2) + charclass_depth--; + else if (pchar == '[') + charclass_depth++; + + /* + * If there is a caret right after the opening bracket, it negates + * the character class, but a following closing bracket should + * still be treated as a normal character. That holds only for + * the first caret, so only the values 1 and 2 mean that closing + * brackets should be taken literally. + */ + if (pchar == '^') + charclass_start++; + else + charclass_start = 3; /* definitely past the start */ } else if (pchar == '[') { + /* start of a character class */ *r++ = pchar; - incharclass = true; + charclass_depth++; + charclass_start = 1; } else if (pchar == '%') { diff --git a/src/backend/utils/adt/regproc.c b/src/backend/utils/adt/regproc.c index 5ee608a2b39..b8bbe95e82e 100644 --- a/src/backend/utils/adt/regproc.c +++ b/src/backend/utils/adt/regproc.c @@ -30,6 +30,7 @@ #include "catalog/pg_ts_config.h" #include "catalog/pg_ts_dict.h" #include "catalog/pg_type.h" +#include "commands/dbcommands.h" #include "lib/stringinfo.h" #include "mb/pg_wchar.h" #include "miscadmin.h" @@ -1764,6 +1765,123 @@ regnamespacesend(PG_FUNCTION_ARGS) } /* + * regdatabasein - converts database name to database OID + * + * We also accept a numeric OID, for symmetry with the output routine. + * + * '-' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_database entry. + */ +Datum +regdatabasein(PG_FUNCTION_ARGS) +{ + char *db_name_or_oid = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + Oid result; + List *names; + + /* Handle "-" or numeric OID */ + if (parseDashOrOid(db_name_or_oid, &result, escontext)) + PG_RETURN_OID(result); + + /* The rest of this wouldn't work in bootstrap mode */ + if (IsBootstrapProcessingMode()) + elog(ERROR, "regdatabase values must be OIDs in bootstrap mode"); + + /* Normal case: see if the name matches any pg_database entry. */ + names = stringToQualifiedNameList(db_name_or_oid, escontext); + if (names == NIL) + PG_RETURN_NULL(); + + if (list_length(names) != 1) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_NAME), + errmsg("invalid name syntax"))); + + result = get_database_oid(strVal(linitial(names)), true); + + if (!OidIsValid(result)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("database \"%s\" does not exist", + strVal(linitial(names))))); + + PG_RETURN_OID(result); +} + +/* + * to_regdatabase - converts database name to database OID + * + * If the name is not found, we return NULL. + */ +Datum +to_regdatabase(PG_FUNCTION_ARGS) +{ + char *db_name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + Datum result; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!DirectInputFunctionCallSafe(regdatabasein, db_name, + InvalidOid, -1, + (Node *) &escontext, + &result)) + PG_RETURN_NULL(); + PG_RETURN_DATUM(result); +} + +/* + * regdatabaseout - converts database OID to database name + */ +Datum +regdatabaseout(PG_FUNCTION_ARGS) +{ + Oid dboid = PG_GETARG_OID(0); + char *result; + + if (dboid == InvalidOid) + { + result = pstrdup("-"); + PG_RETURN_CSTRING(result); + } + + result = get_database_name(dboid); + + if (result) + { + /* pstrdup is not really necessary, but it avoids a compiler warning */ + result = pstrdup(quote_identifier(result)); + } + else + { + /* If OID doesn't match any database, return it numerically */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", dboid); + } + + PG_RETURN_CSTRING(result); +} + +/* + * regdatabaserecv - converts external binary format to regdatabase + */ +Datum +regdatabaserecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regdatabasesend - converts regdatabase to binary format + */ +Datum +regdatabasesend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + +/* * text_regclass: convert text to regclass * * This could be replaced by CoerceViaIO, except that we need to treat diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index 6239900fa28..059fc5ebf60 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -30,7 +30,6 @@ #include "access/xact.h" #include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" -#include "catalog/pg_proc.h" #include "commands/trigger.h" #include "executor/executor.h" #include "executor/spi.h" @@ -46,7 +45,6 @@ #include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/memutils.h" -#include "utils/rangetypes.h" #include "utils/rel.h" #include "utils/rls.h" #include "utils/ruleutils.h" diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index a96b1b9c0bc..ce6a626eba2 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -103,7 +103,6 @@ #include "access/table.h" #include "access/tableam.h" #include "access/visibilitymap.h" -#include "catalog/pg_am.h" #include "catalog/pg_collation.h" #include "catalog/pg_operator.h" #include "catalog/pg_statistic.h" @@ -4620,6 +4619,7 @@ convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue, case REGDICTIONARYOID: case REGROLEOID: case REGNAMESPACEOID: + case REGDATABASEOID: *scaledvalue = convert_numeric_to_scalar(value, valuetypid, &failure); *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid, @@ -4752,6 +4752,7 @@ convert_numeric_to_scalar(Datum value, Oid typid, bool *failure) case REGDICTIONARYOID: case REGROLEOID: case REGNAMESPACEOID: + case REGDATABASEOID: /* we can treat OIDs as integers... */ return (double) DatumGetObjectId(value); } diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 347089b7626..0a5848a4ab2 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -6477,7 +6477,7 @@ timestamp2timestamptz_opt_overflow(Timestamp timestamp, int *overflow) if (TIMESTAMP_NOT_FINITE(timestamp)) return timestamp; - /* We don't expect this to fail, but check it pro forma */ + /* timestamp2tm should not fail on valid timestamps, but cope */ if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) == 0) { tz = DetermineTimeZoneOffset(tm, session_timezone); @@ -6485,23 +6485,22 @@ timestamp2timestamptz_opt_overflow(Timestamp timestamp, int *overflow) result = dt2local(timestamp, -tz); if (IS_VALID_TIMESTAMP(result)) - { return result; + } + + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + TIMESTAMP_NOBEGIN(result); } - else if (overflow) + else { - if (result < MIN_TIMESTAMP) - { - *overflow = -1; - TIMESTAMP_NOBEGIN(result); - } - else - { - *overflow = 1; - TIMESTAMP_NOEND(result); - } - return result; + *overflow = 1; + TIMESTAMP_NOEND(result); } + return result; } ereport(ERROR, @@ -6531,27 +6530,81 @@ timestamptz_timestamp(PG_FUNCTION_ARGS) PG_RETURN_TIMESTAMP(timestamptz2timestamp(timestamp)); } +/* + * Convert timestamptz to timestamp, throwing error for overflow. + */ static Timestamp timestamptz2timestamp(TimestampTz timestamp) { + return timestamptz2timestamp_opt_overflow(timestamp, NULL); +} + +/* + * Convert timestamp with time zone to timestamp. + * + * On successful conversion, *overflow is set to zero if it's not NULL. + * + * If the timestamptz is finite but out of the valid range for timestamp, then: + * if overflow is NULL, we throw an out-of-range error. + * if overflow is not NULL, we store +1 or -1 there to indicate the sign + * of the overflow, and return the appropriate timestamp infinity. + */ +Timestamp +timestamptz2timestamp_opt_overflow(TimestampTz timestamp, int *overflow) +{ Timestamp result; struct pg_tm tt, *tm = &tt; fsec_t fsec; int tz; + if (overflow) + *overflow = 0; + if (TIMESTAMP_NOT_FINITE(timestamp)) result = timestamp; else { if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0) + { + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + TIMESTAMP_NOBEGIN(result); + } + else + { + *overflow = 1; + TIMESTAMP_NOEND(result); + } + return result; + } ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); + } if (tm2timestamp(tm, fsec, NULL, &result) != 0) + { + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + TIMESTAMP_NOBEGIN(result); + } + else + { + *overflow = 1; + TIMESTAMP_NOEND(result); + } + return result; + } ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); + } } return result; } diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 3e4d5568bde..ffae8c23abf 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -35,7 +35,6 @@ #include "port/pg_bswap.h" #include "regex/regex.h" #include "utils/builtins.h" -#include "utils/bytea.h" #include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/memutils.h" @@ -43,10 +42,6 @@ #include "utils/sortsupport.h" #include "utils/varlena.h" - -/* GUC variable */ -int bytea_output = BYTEA_OUTPUT_HEX; - typedef struct varlena VarString; /* @@ -148,12 +143,6 @@ static int text_position_get_match_pos(TextPositionState *state); static void text_position_cleanup(TextPositionState *state); static void check_collation_set(Oid collid); static int text_cmp(text *arg1, text *arg2, Oid collid); -static bytea *bytea_catenate(bytea *t1, bytea *t2); -static bytea *bytea_substring(Datum str, - int S, - int L, - bool length_not_specified); -static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl); static void appendStringInfoText(StringInfo str, const text *t); static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate); static void split_text_accum_result(SplitTextOutputData *tstate, @@ -279,307 +268,6 @@ text_to_cstring_buffer(const text *src, char *dst, size_t dst_len) * USER I/O ROUTINES * *****************************************************************************/ - -#define VAL(CH) ((CH) - '0') -#define DIG(VAL) ((VAL) + '0') - -/* - * byteain - converts from printable representation of byte array - * - * Non-printable characters must be passed as '\nnn' (octal) and are - * converted to internal form. '\' must be passed as '\\'. - * ereport(ERROR, ...) if bad form. - * - * BUGS: - * The input is scanned twice. - * The error checking of input is minimal. - */ -Datum -byteain(PG_FUNCTION_ARGS) -{ - char *inputText = PG_GETARG_CSTRING(0); - Node *escontext = fcinfo->context; - char *tp; - char *rp; - int bc; - bytea *result; - - /* Recognize hex input */ - if (inputText[0] == '\\' && inputText[1] == 'x') - { - size_t len = strlen(inputText); - - bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */ - result = palloc(bc); - bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result), - escontext); - SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ - - PG_RETURN_BYTEA_P(result); - } - - /* Else, it's the traditional escaped style */ - for (bc = 0, tp = inputText; *tp != '\0'; bc++) - { - if (tp[0] != '\\') - tp++; - else if ((tp[0] == '\\') && - (tp[1] >= '0' && tp[1] <= '3') && - (tp[2] >= '0' && tp[2] <= '7') && - (tp[3] >= '0' && tp[3] <= '7')) - tp += 4; - else if ((tp[0] == '\\') && - (tp[1] == '\\')) - tp += 2; - else - { - /* - * one backslash, not followed by another or ### valid octal - */ - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type %s", "bytea"))); - } - } - - bc += VARHDRSZ; - - result = (bytea *) palloc(bc); - SET_VARSIZE(result, bc); - - tp = inputText; - rp = VARDATA(result); - while (*tp != '\0') - { - if (tp[0] != '\\') - *rp++ = *tp++; - else if ((tp[0] == '\\') && - (tp[1] >= '0' && tp[1] <= '3') && - (tp[2] >= '0' && tp[2] <= '7') && - (tp[3] >= '0' && tp[3] <= '7')) - { - bc = VAL(tp[1]); - bc <<= 3; - bc += VAL(tp[2]); - bc <<= 3; - *rp++ = bc + VAL(tp[3]); - - tp += 4; - } - else if ((tp[0] == '\\') && - (tp[1] == '\\')) - { - *rp++ = '\\'; - tp += 2; - } - else - { - /* - * We should never get here. The first pass should not allow it. - */ - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type %s", "bytea"))); - } - } - - PG_RETURN_BYTEA_P(result); -} - -/* - * byteaout - converts to printable representation of byte array - * - * In the traditional escaped format, non-printable characters are - * printed as '\nnn' (octal) and '\' as '\\'. - */ -Datum -byteaout(PG_FUNCTION_ARGS) -{ - bytea *vlena = PG_GETARG_BYTEA_PP(0); - char *result; - char *rp; - - if (bytea_output == BYTEA_OUTPUT_HEX) - { - /* Print hex format */ - rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); - *rp++ = '\\'; - *rp++ = 'x'; - rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); - } - else if (bytea_output == BYTEA_OUTPUT_ESCAPE) - { - /* Print traditional escaped format */ - char *vp; - uint64 len; - int i; - - len = 1; /* empty string has 1 char */ - vp = VARDATA_ANY(vlena); - for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) - { - if (*vp == '\\') - len += 2; - else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) - len += 4; - else - len++; - } - - /* - * In principle len can't overflow uint32 if the input fit in 1GB, but - * for safety let's check rather than relying on palloc's internal - * check. - */ - if (len > MaxAllocSize) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg_internal("result of bytea output conversion is too large"))); - rp = result = (char *) palloc(len); - - vp = VARDATA_ANY(vlena); - for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) - { - if (*vp == '\\') - { - *rp++ = '\\'; - *rp++ = '\\'; - } - else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) - { - int val; /* holds unprintable chars */ - - val = *vp; - rp[0] = '\\'; - rp[3] = DIG(val & 07); - val >>= 3; - rp[2] = DIG(val & 07); - val >>= 3; - rp[1] = DIG(val & 03); - rp += 4; - } - else - *rp++ = *vp; - } - } - else - { - elog(ERROR, "unrecognized \"bytea_output\" setting: %d", - bytea_output); - rp = result = NULL; /* keep compiler quiet */ - } - *rp = '\0'; - PG_RETURN_CSTRING(result); -} - -/* - * bytearecv - converts external binary format to bytea - */ -Datum -bytearecv(PG_FUNCTION_ARGS) -{ - StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); - bytea *result; - int nbytes; - - nbytes = buf->len - buf->cursor; - result = (bytea *) palloc(nbytes + VARHDRSZ); - SET_VARSIZE(result, nbytes + VARHDRSZ); - pq_copymsgbytes(buf, VARDATA(result), nbytes); - PG_RETURN_BYTEA_P(result); -} - -/* - * byteasend - converts bytea to binary format - * - * This is a special case: just copy the input... - */ -Datum -byteasend(PG_FUNCTION_ARGS) -{ - bytea *vlena = PG_GETARG_BYTEA_P_COPY(0); - - PG_RETURN_BYTEA_P(vlena); -} - -Datum -bytea_string_agg_transfn(PG_FUNCTION_ARGS) -{ - StringInfo state; - - state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); - - /* Append the value unless null, preceding it with the delimiter. */ - if (!PG_ARGISNULL(1)) - { - bytea *value = PG_GETARG_BYTEA_PP(1); - bool isfirst = false; - - /* - * You might think we can just throw away the first delimiter, however - * we must keep it as we may be a parallel worker doing partial - * aggregation building a state to send to the main process. We need - * to keep the delimiter of every aggregation so that the combine - * function can properly join up the strings of two separately - * partially aggregated results. The first delimiter is only stripped - * off in the final function. To know how much to strip off the front - * of the string, we store the length of the first delimiter in the - * StringInfo's cursor field, which we don't otherwise need here. - */ - if (state == NULL) - { - state = makeStringAggState(fcinfo); - isfirst = true; - } - - if (!PG_ARGISNULL(2)) - { - bytea *delim = PG_GETARG_BYTEA_PP(2); - - appendBinaryStringInfo(state, VARDATA_ANY(delim), - VARSIZE_ANY_EXHDR(delim)); - if (isfirst) - state->cursor = VARSIZE_ANY_EXHDR(delim); - } - - appendBinaryStringInfo(state, VARDATA_ANY(value), - VARSIZE_ANY_EXHDR(value)); - } - - /* - * The transition type for string_agg() is declared to be "internal", - * which is a pass-by-value type the same size as a pointer. - */ - if (state) - PG_RETURN_POINTER(state); - PG_RETURN_NULL(); -} - -Datum -bytea_string_agg_finalfn(PG_FUNCTION_ARGS) -{ - StringInfo state; - - /* cannot be called directly because of internal-type argument */ - Assert(AggCheckCallContext(fcinfo, NULL)); - - state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); - - if (state != NULL) - { - /* As per comment in transfn, strip data before the cursor position */ - bytea *result; - int strippedlen = state->len - state->cursor; - - result = (bytea *) palloc(strippedlen + VARHDRSZ); - SET_VARSIZE(result, strippedlen + VARHDRSZ); - memcpy(VARDATA(result), &state->data[state->cursor], strippedlen); - PG_RETURN_BYTEA_P(result); - } - else - PG_RETURN_NULL(); -} - /* * textin - converts cstring to internal representation */ @@ -2959,467 +2647,6 @@ bttext_pattern_sortsupport(PG_FUNCTION_ARGS) } -/*------------------------------------------------------------- - * byteaoctetlen - * - * get the number of bytes contained in an instance of type 'bytea' - *------------------------------------------------------------- - */ -Datum -byteaoctetlen(PG_FUNCTION_ARGS) -{ - Datum str = PG_GETARG_DATUM(0); - - /* We need not detoast the input at all */ - PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ); -} - -/* - * byteacat - - * takes two bytea* and returns a bytea* that is the concatenation of - * the two. - * - * Cloned from textcat and modified as required. - */ -Datum -byteacat(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - - PG_RETURN_BYTEA_P(bytea_catenate(t1, t2)); -} - -/* - * bytea_catenate - * Guts of byteacat(), broken out so it can be used by other functions - * - * Arguments can be in short-header form, but not compressed or out-of-line - */ -static bytea * -bytea_catenate(bytea *t1, bytea *t2) -{ - bytea *result; - int len1, - len2, - len; - char *ptr; - - len1 = VARSIZE_ANY_EXHDR(t1); - len2 = VARSIZE_ANY_EXHDR(t2); - - /* paranoia ... probably should throw error instead? */ - if (len1 < 0) - len1 = 0; - if (len2 < 0) - len2 = 0; - - len = len1 + len2 + VARHDRSZ; - result = (bytea *) palloc(len); - - /* Set size of result string... */ - SET_VARSIZE(result, len); - - /* Fill data field of result string... */ - ptr = VARDATA(result); - if (len1 > 0) - memcpy(ptr, VARDATA_ANY(t1), len1); - if (len2 > 0) - memcpy(ptr + len1, VARDATA_ANY(t2), len2); - - return result; -} - -#define PG_STR_GET_BYTEA(str_) \ - DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_))) - -/* - * bytea_substr() - * Return a substring starting at the specified position. - * Cloned from text_substr and modified as required. - * - * Input: - * - string - * - starting position (is one-based) - * - string length (optional) - * - * If the starting position is zero or less, then return from the start of the string - * adjusting the length to be consistent with the "negative start" per SQL. - * If the length is less than zero, an ERROR is thrown. If no third argument - * (length) is provided, the length to the end of the string is assumed. - */ -Datum -bytea_substr(PG_FUNCTION_ARGS) -{ - PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), - PG_GETARG_INT32(1), - PG_GETARG_INT32(2), - false)); -} - -/* - * bytea_substr_no_len - - * Wrapper to avoid opr_sanity failure due to - * one function accepting a different number of args. - */ -Datum -bytea_substr_no_len(PG_FUNCTION_ARGS) -{ - PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), - PG_GETARG_INT32(1), - -1, - true)); -} - -static bytea * -bytea_substring(Datum str, - int S, - int L, - bool length_not_specified) -{ - int32 S1; /* adjusted start position */ - int32 L1; /* adjusted substring length */ - int32 E; /* end position */ - - /* - * The logic here should generally match text_substring(). - */ - S1 = Max(S, 1); - - if (length_not_specified) - { - /* - * Not passed a length - DatumGetByteaPSlice() grabs everything to the - * end of the string if we pass it a negative value for length. - */ - L1 = -1; - } - else if (L < 0) - { - /* SQL99 says to throw an error for E < S, i.e., negative length */ - ereport(ERROR, - (errcode(ERRCODE_SUBSTRING_ERROR), - errmsg("negative substring length not allowed"))); - L1 = -1; /* silence stupider compilers */ - } - else if (pg_add_s32_overflow(S, L, &E)) - { - /* - * L could be large enough for S + L to overflow, in which case the - * substring must run to end of string. - */ - L1 = -1; - } - else - { - /* - * A zero or negative value for the end position can happen if the - * start was negative or one. SQL99 says to return a zero-length - * string. - */ - if (E < 1) - return PG_STR_GET_BYTEA(""); - - L1 = E - S1; - } - - /* - * If the start position is past the end of the string, SQL99 says to - * return a zero-length string -- DatumGetByteaPSlice() will do that for - * us. We need only convert S1 to zero-based starting position. - */ - return DatumGetByteaPSlice(str, S1 - 1, L1); -} - -/* - * byteaoverlay - * Replace specified substring of first string with second - * - * The SQL standard defines OVERLAY() in terms of substring and concatenation. - * This code is a direct implementation of what the standard says. - */ -Datum -byteaoverlay(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - int sp = PG_GETARG_INT32(2); /* substring start position */ - int sl = PG_GETARG_INT32(3); /* substring length */ - - PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); -} - -Datum -byteaoverlay_no_len(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - int sp = PG_GETARG_INT32(2); /* substring start position */ - int sl; - - sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */ - PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); -} - -static bytea * -bytea_overlay(bytea *t1, bytea *t2, int sp, int sl) -{ - bytea *result; - bytea *s1; - bytea *s2; - int sp_pl_sl; - - /* - * Check for possible integer-overflow cases. For negative sp, throw a - * "substring length" error because that's what should be expected - * according to the spec's definition of OVERLAY(). - */ - if (sp <= 0) - ereport(ERROR, - (errcode(ERRCODE_SUBSTRING_ERROR), - errmsg("negative substring length not allowed"))); - if (pg_add_s32_overflow(sp, sl, &sp_pl_sl)) - ereport(ERROR, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("integer out of range"))); - - s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false); - s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true); - result = bytea_catenate(s1, t2); - result = bytea_catenate(result, s2); - - return result; -} - -/* - * bit_count - */ -Datum -bytea_bit_count(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - - PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1))); -} - -/* - * byteapos - - * Return the position of the specified substring. - * Implements the SQL POSITION() function. - * Cloned from textpos and modified as required. - */ -Datum -byteapos(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - int pos; - int px, - p; - int len1, - len2; - char *p1, - *p2; - - len1 = VARSIZE_ANY_EXHDR(t1); - len2 = VARSIZE_ANY_EXHDR(t2); - - if (len2 <= 0) - PG_RETURN_INT32(1); /* result for empty pattern */ - - p1 = VARDATA_ANY(t1); - p2 = VARDATA_ANY(t2); - - pos = 0; - px = (len1 - len2); - for (p = 0; p <= px; p++) - { - if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0)) - { - pos = p + 1; - break; - }; - p1++; - }; - - PG_RETURN_INT32(pos); -} - -/*------------------------------------------------------------- - * byteaGetByte - * - * this routine treats "bytea" as an array of bytes. - * It returns the Nth byte (a number between 0 and 255). - *------------------------------------------------------------- - */ -Datum -byteaGetByte(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int32 n = PG_GETARG_INT32(1); - int len; - int byte; - - len = VARSIZE_ANY_EXHDR(v); - - if (n < 0 || n >= len) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %d out of valid range, 0..%d", - n, len - 1))); - - byte = ((unsigned char *) VARDATA_ANY(v))[n]; - - PG_RETURN_INT32(byte); -} - -/*------------------------------------------------------------- - * byteaGetBit - * - * This routine treats a "bytea" type like an array of bits. - * It returns the value of the Nth bit (0 or 1). - * - *------------------------------------------------------------- - */ -Datum -byteaGetBit(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int64 n = PG_GETARG_INT64(1); - int byteNo, - bitNo; - int len; - int byte; - - len = VARSIZE_ANY_EXHDR(v); - - if (n < 0 || n >= (int64) len * 8) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, - n, (int64) len * 8 - 1))); - - /* n/8 is now known < len, so safe to cast to int */ - byteNo = (int) (n / 8); - bitNo = (int) (n % 8); - - byte = ((unsigned char *) VARDATA_ANY(v))[byteNo]; - - if (byte & (1 << bitNo)) - PG_RETURN_INT32(1); - else - PG_RETURN_INT32(0); -} - -/*------------------------------------------------------------- - * byteaSetByte - * - * Given an instance of type 'bytea' creates a new one with - * the Nth byte set to the given value. - * - *------------------------------------------------------------- - */ -Datum -byteaSetByte(PG_FUNCTION_ARGS) -{ - bytea *res = PG_GETARG_BYTEA_P_COPY(0); - int32 n = PG_GETARG_INT32(1); - int32 newByte = PG_GETARG_INT32(2); - int len; - - len = VARSIZE(res) - VARHDRSZ; - - if (n < 0 || n >= len) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %d out of valid range, 0..%d", - n, len - 1))); - - /* - * Now set the byte. - */ - ((unsigned char *) VARDATA(res))[n] = newByte; - - PG_RETURN_BYTEA_P(res); -} - -/*------------------------------------------------------------- - * byteaSetBit - * - * Given an instance of type 'bytea' creates a new one with - * the Nth bit set to the given value. - * - *------------------------------------------------------------- - */ -Datum -byteaSetBit(PG_FUNCTION_ARGS) -{ - bytea *res = PG_GETARG_BYTEA_P_COPY(0); - int64 n = PG_GETARG_INT64(1); - int32 newBit = PG_GETARG_INT32(2); - int len; - int oldByte, - newByte; - int byteNo, - bitNo; - - len = VARSIZE(res) - VARHDRSZ; - - if (n < 0 || n >= (int64) len * 8) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, - n, (int64) len * 8 - 1))); - - /* n/8 is now known < len, so safe to cast to int */ - byteNo = (int) (n / 8); - bitNo = (int) (n % 8); - - /* - * sanity check! - */ - if (newBit != 0 && newBit != 1) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("new bit must be 0 or 1"))); - - /* - * Update the byte. - */ - oldByte = ((unsigned char *) VARDATA(res))[byteNo]; - - if (newBit == 0) - newByte = oldByte & (~(1 << bitNo)); - else - newByte = oldByte | (1 << bitNo); - - ((unsigned char *) VARDATA(res))[byteNo] = newByte; - - PG_RETURN_BYTEA_P(res); -} - -/* - * Return reversed bytea - */ -Datum -bytea_reverse(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - const char *p = VARDATA_ANY(v); - int len = VARSIZE_ANY_EXHDR(v); - const char *endp = p + len; - bytea *result = palloc(len + VARHDRSZ); - char *dst = (char *) VARDATA(result) + len; - - SET_VARSIZE(result, len + VARHDRSZ); - - while (p < endp) - *(--dst) = *p++; - - PG_RETURN_BYTEA_P(result); -} - - /* text_name() * Converts a text type to a Name type. */ @@ -3849,331 +3076,6 @@ SplitGUCList(char *rawstring, char separator, return true; } - -/***************************************************************************** - * Comparison Functions used for bytea - * - * Note: btree indexes need these routines not to leak memory; therefore, - * be careful to free working copies of toasted datums. Most places don't - * need to be so careful. - *****************************************************************************/ - -Datum -byteaeq(PG_FUNCTION_ARGS) -{ - Datum arg1 = PG_GETARG_DATUM(0); - Datum arg2 = PG_GETARG_DATUM(1); - bool result; - Size len1, - len2; - - /* - * We can use a fast path for unequal lengths, which might save us from - * having to detoast one or both values. - */ - len1 = toast_raw_datum_size(arg1); - len2 = toast_raw_datum_size(arg2); - if (len1 != len2) - result = false; - else - { - bytea *barg1 = DatumGetByteaPP(arg1); - bytea *barg2 = DatumGetByteaPP(arg2); - - result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), - len1 - VARHDRSZ) == 0); - - PG_FREE_IF_COPY(barg1, 0); - PG_FREE_IF_COPY(barg2, 1); - } - - PG_RETURN_BOOL(result); -} - -Datum -byteane(PG_FUNCTION_ARGS) -{ - Datum arg1 = PG_GETARG_DATUM(0); - Datum arg2 = PG_GETARG_DATUM(1); - bool result; - Size len1, - len2; - - /* - * We can use a fast path for unequal lengths, which might save us from - * having to detoast one or both values. - */ - len1 = toast_raw_datum_size(arg1); - len2 = toast_raw_datum_size(arg2); - if (len1 != len2) - result = true; - else - { - bytea *barg1 = DatumGetByteaPP(arg1); - bytea *barg2 = DatumGetByteaPP(arg2); - - result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), - len1 - VARHDRSZ) != 0); - - PG_FREE_IF_COPY(barg1, 0); - PG_FREE_IF_COPY(barg2, 1); - } - - PG_RETURN_BOOL(result); -} - -Datum -bytealt(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2))); -} - -Datum -byteale(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2))); -} - -Datum -byteagt(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2))); -} - -Datum -byteage(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2))); -} - -Datum -byteacmp(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - if ((cmp == 0) && (len1 != len2)) - cmp = (len1 < len2) ? -1 : 1; - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_INT32(cmp); -} - -Datum -bytea_larger(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - bytea *result; - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2); - - PG_RETURN_BYTEA_P(result); -} - -Datum -bytea_smaller(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - bytea *result; - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2); - - PG_RETURN_BYTEA_P(result); -} - -Datum -bytea_sortsupport(PG_FUNCTION_ARGS) -{ - SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); - MemoryContext oldcontext; - - oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); - - /* Use generic string SortSupport, forcing "C" collation */ - varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID); - - MemoryContextSwitchTo(oldcontext); - - PG_RETURN_VOID(); -} - -/* Cast bytea -> int2 */ -Datum -bytea_int2(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int len = VARSIZE_ANY_EXHDR(v); - uint16 result; - - /* Check that the byte array is not too long */ - if (len > sizeof(result)) - ereport(ERROR, - errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("smallint out of range")); - - /* Convert it to an integer; most significant bytes come first */ - result = 0; - for (int i = 0; i < len; i++) - { - result <<= BITS_PER_BYTE; - result |= ((unsigned char *) VARDATA_ANY(v))[i]; - } - - PG_RETURN_INT16(result); -} - -/* Cast bytea -> int4 */ -Datum -bytea_int4(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int len = VARSIZE_ANY_EXHDR(v); - uint32 result; - - /* Check that the byte array is not too long */ - if (len > sizeof(result)) - ereport(ERROR, - errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("integer out of range")); - - /* Convert it to an integer; most significant bytes come first */ - result = 0; - for (int i = 0; i < len; i++) - { - result <<= BITS_PER_BYTE; - result |= ((unsigned char *) VARDATA_ANY(v))[i]; - } - - PG_RETURN_INT32(result); -} - -/* Cast bytea -> int8 */ -Datum -bytea_int8(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int len = VARSIZE_ANY_EXHDR(v); - uint64 result; - - /* Check that the byte array is not too long */ - if (len > sizeof(result)) - ereport(ERROR, - errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("bigint out of range")); - - /* Convert it to an integer; most significant bytes come first */ - result = 0; - for (int i = 0; i < len; i++) - { - result <<= BITS_PER_BYTE; - result |= ((unsigned char *) VARDATA_ANY(v))[i]; - } - - PG_RETURN_INT64(result); -} - -/* Cast int2 -> bytea; can just use int2send() */ -Datum -int2_bytea(PG_FUNCTION_ARGS) -{ - return int2send(fcinfo); -} - -/* Cast int4 -> bytea; can just use int4send() */ -Datum -int4_bytea(PG_FUNCTION_ARGS) -{ - return int4send(fcinfo); -} - -/* Cast int8 -> bytea; can just use int8send() */ -Datum -int8_bytea(PG_FUNCTION_ARGS) -{ - return int8send(fcinfo); -} - /* * appendStringInfoText * diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index a4150bff2ea..2bd39b6ac4b 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -529,14 +529,36 @@ xmltext(PG_FUNCTION_ARGS) #ifdef USE_LIBXML text *arg = PG_GETARG_TEXT_PP(0); text *result; - xmlChar *xmlbuf = NULL; + volatile xmlChar *xmlbuf = NULL; + PgXmlErrorContext *xmlerrcxt; + + /* Otherwise, we gotta spin up some error handling. */ + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); - xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg)); + PG_TRY(); + { + xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg)); - Assert(xmlbuf); + if (xmlbuf == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlChar"); + + result = cstring_to_text_with_len((const char *) xmlbuf, + xmlStrlen((const xmlChar *) xmlbuf)); + } + PG_CATCH(); + { + if (xmlbuf) + xmlFree((xmlChar *) xmlbuf); + + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlFree((xmlChar *) xmlbuf); + pg_xml_done(xmlerrcxt, false); - result = cstring_to_text_with_len((const char *) xmlbuf, xmlStrlen(xmlbuf)); - xmlFree(xmlbuf); PG_RETURN_XML_P(result); #else NO_XML_SUPPORT(); @@ -770,7 +792,10 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent) if (oldroot != NULL) xmlFreeNode(oldroot); - xmlAddChildList(root, content_nodes); + if (xmlAddChildList(root, content_nodes) == NULL || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not append xml node list"); /* * We use this node to insert newlines in the dump. Note: in at @@ -931,7 +956,10 @@ xmlelement(XmlExpr *xexpr, xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate xmlTextWriter"); - xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name); + if (xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not start xml element"); forboth(arg, named_arg_strings, narg, xexpr->arg_names) { @@ -939,19 +967,30 @@ xmlelement(XmlExpr *xexpr, char *argname = strVal(lfirst(narg)); if (str) - xmlTextWriterWriteAttribute(writer, - (xmlChar *) argname, - (xmlChar *) str); + { + if (xmlTextWriterWriteAttribute(writer, + (xmlChar *) argname, + (xmlChar *) str) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not write xml attribute"); + } } foreach(arg, arg_strings) { char *str = (char *) lfirst(arg); - xmlTextWriterWriteRaw(writer, (xmlChar *) str); + if (xmlTextWriterWriteRaw(writer, (xmlChar *) str) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not write raw xml text"); } - xmlTextWriterEndElement(writer); + if (xmlTextWriterEndElement(writer) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not end xml element"); /* we MUST do this now to flush data out to the buffer ... */ xmlFreeTextWriter(writer); @@ -4220,20 +4259,27 @@ xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt) } else { - xmlChar *str; + volatile xmlChar *str = NULL; - str = xmlXPathCastNodeToString(cur); PG_TRY(); { + char *escaped; + + str = xmlXPathCastNodeToString(cur); + if (str == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlChar"); + /* Here we rely on XML having the same representation as TEXT */ - char *escaped = escape_xml((char *) str); + escaped = escape_xml((char *) str); result = (xmltype *) cstring_to_text(escaped); pfree(escaped); } PG_FINALLY(); { - xmlFree(str); + if (str) + xmlFree((xmlChar *) str); } PG_END_TRY(); } diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 657648996c2..d1b25214376 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -317,6 +317,7 @@ GetCCHashEqFuncs(Oid keytype, CCHashFN *hashfunc, RegProcedure *eqfunc, CCFastEq case REGDICTIONARYOID: case REGROLEOID: case REGNAMESPACEOID: + case REGDATABASEOID: *hashfunc = int4hashfast; *fasteqfunc = int4eqfast; *eqfunc = F_OIDEQ; diff --git a/src/backend/utils/cache/funccache.c b/src/backend/utils/cache/funccache.c index 150c502a612..afc048a051e 100644 --- a/src/backend/utils/cache/funccache.c +++ b/src/backend/utils/cache/funccache.c @@ -491,6 +491,7 @@ cached_function_compile(FunctionCallInfo fcinfo, CachedFunctionHashKey hashkey; bool function_valid = false; bool hashkey_valid = false; + bool new_function = false; /* * Lookup the pg_proc tuple by Oid; we'll need it in any case @@ -570,13 +571,15 @@ recheck: /* * Create the new function struct, if not done already. The function - * structs are never thrown away, so keep them in TopMemoryContext. + * cache entry will be kept for the life of the backend, so put it in + * TopMemoryContext. */ Assert(cacheEntrySize >= sizeof(CachedFunction)); if (function == NULL) { function = (CachedFunction *) MemoryContextAllocZero(TopMemoryContext, cacheEntrySize); + new_function = true; } else { @@ -585,17 +588,36 @@ recheck: } /* - * Fill in the CachedFunction part. fn_hashkey and use_count remain - * zeroes for now. + * However, if function compilation fails, we'd like not to leak the + * function struct, so use a PG_TRY block to prevent that. (It's up + * to the compile callback function to avoid its own internal leakage + * in such cases.) Unfortunately, freeing the struct is only safe if + * we just allocated it: otherwise there are probably fn_extra + * pointers to it. */ - function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); - function->fn_tid = procTup->t_self; - function->dcallback = dcallback; + PG_TRY(); + { + /* + * Do the hard, language-specific part. + */ + ccallback(fcinfo, procTup, &hashkey, function, forValidator); + } + PG_CATCH(); + { + if (new_function) + pfree(function); + PG_RE_THROW(); + } + PG_END_TRY(); /* - * Do the hard, language-specific part. + * Fill in the CachedFunction part. (We do this last to prevent the + * function from looking valid before it's fully built.) fn_hashkey + * will be set by cfunc_hashtable_insert; use_count remains zero. */ - ccallback(fcinfo, procTup, &hashkey, function, forValidator); + function->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data); + function->fn_tid = procTup->t_self; + function->dcallback = dcallback; /* * Add the completed struct to the hash table. diff --git a/src/backend/utils/fmgr/dfmgr.c b/src/backend/utils/fmgr/dfmgr.c index 603632581d0..4bb84ff7087 100644 --- a/src/backend/utils/fmgr/dfmgr.c +++ b/src/backend/utils/fmgr/dfmgr.c @@ -99,6 +99,14 @@ load_external_function(const char *filename, const char *funcname, void *lib_handle; void *retval; + /* + * If the value starts with "$libdir/", strip that. This is because many + * extensions have hardcoded '$libdir/foo' as their library name, which + * prevents using the path. + */ + if (strncmp(filename, "$libdir/", 8) == 0) + filename += 8; + /* Expand the possibly-abbreviated filename to an exact path name */ fullname = expand_dynamic_library_name(filename); @@ -456,14 +464,6 @@ expand_dynamic_library_name(const char *name) Assert(name); - /* - * If the value starts with "$libdir/", strip that. This is because many - * extensions have hardcoded '$libdir/foo' as their library name, which - * prevents using the path. - */ - if (strncmp(name, "$libdir/", 8) == 0) - name += 8; - have_slash = (first_dir_separator(name) != NULL); if (!have_slash) diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index 92b0446b80c..d31cb45a058 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -39,7 +39,6 @@ volatile sig_atomic_t TransactionTimeoutPending = false; volatile sig_atomic_t IdleSessionTimeoutPending = false; volatile sig_atomic_t ProcSignalBarrierPending = false; volatile sig_atomic_t LogMemoryContextPending = false; -volatile sig_atomic_t PublishMemoryContextPending = false; volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false; volatile uint32 InterruptHoldoffCount = 0; volatile uint32 QueryCancelHoldoffCount = 0; diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 89d72cdd5ff..c86ceefda94 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -663,13 +663,6 @@ BaseInit(void) * drop ephemeral slots, which in turn triggers stats reporting. */ ReplicationSlotInitialize(); - - /* - * The before shmem exit callback frees the DSA memory occupied by the - * latest memory context statistics that could be published by this proc - * if requested. - */ - before_shmem_exit(AtProcExit_memstats_cleanup, 0); } diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 308016d7763..886ecbad871 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -39,6 +39,7 @@ #include "mb/pg_wchar.h" #include "utils/fmgrprotos.h" #include "utils/memutils.h" +#include "utils/relcache.h" #include "varatt.h" /* diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 2f8cbd86759..511dc32d519 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -1028,7 +1028,7 @@ struct config_bool ConfigureNamesBool[] = }, { {"enable_distinct_reordering", PGC_USERSET, QUERY_TUNING_METHOD, - gettext_noop("Enables reordering of DISTINCT pathkeys."), + gettext_noop("Enables reordering of DISTINCT keys."), NULL, GUC_EXPLAIN }, @@ -1602,11 +1602,11 @@ struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, { - {"log_lock_failure", PGC_SUSET, LOGGING_WHAT, + {"log_lock_failures", PGC_SUSET, LOGGING_WHAT, gettext_noop("Logs lock failures."), NULL }, - &log_lock_failure, + &log_lock_failures, false, NULL, NULL, NULL }, @@ -4837,7 +4837,7 @@ struct config_string ConfigureNamesString[] = { {"ssl_groups", PGC_SIGHUP, CONN_AUTH_SSL, gettext_noop("Sets the group(s) to use for Diffie-Hellman key exchange."), - gettext_noop("Multiple groups can be specified using colon-separated list."), + gettext_noop("Multiple groups can be specified using a colon-separated list."), GUC_SUPERUSER_ONLY }, &SSLECDHCurve, diff --git a/src/backend/utils/misc/injection_point.c b/src/backend/utils/misc/injection_point.c index f58ebc8ee52..83b887b6978 100644 --- a/src/backend/utils/misc/injection_point.c +++ b/src/backend/utils/misc/injection_point.c @@ -584,3 +584,49 @@ IsInjectionPointAttached(const char *name) return false; /* silence compiler */ #endif } + +/* + * Retrieve a list of all the injection points currently attached. + * + * This list is palloc'd in the current memory context. + */ +List * +InjectionPointList(void) +{ +#ifdef USE_INJECTION_POINTS + List *inj_points = NIL; + uint32 max_inuse; + + LWLockAcquire(InjectionPointLock, LW_SHARED); + + max_inuse = pg_atomic_read_u32(&ActiveInjectionPoints->max_inuse); + + for (uint32 idx = 0; idx < max_inuse; idx++) + { + InjectionPointEntry *entry; + InjectionPointData *inj_point; + uint64 generation; + + entry = &ActiveInjectionPoints->entries[idx]; + generation = pg_atomic_read_u64(&entry->generation); + + /* skip free slots */ + if (generation % 2 == 0) + continue; + + inj_point = (InjectionPointData *) palloc0(sizeof(InjectionPointData)); + inj_point->name = pstrdup(entry->name); + inj_point->library = pstrdup(entry->library); + inj_point->function = pstrdup(entry->function); + inj_points = lappend(inj_points, inj_point); + } + + LWLockRelease(InjectionPointLock); + + return inj_points; + +#else + elog(ERROR, "Injection points are not supported by this build"); + return NIL; /* keep compiler quiet */ +#endif +} diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 34826d01380..341f88adc87 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -97,6 +97,7 @@ #password_encryption = scram-sha-256 # scram-sha-256 or md5 #scram_iterations = 4096 #md5_password_warnings = on +#oauth_validator_libraries = '' # comma-separated list of trusted validator modules # GSSAPI using Kerberos #krb_server_keyfile = 'FILE:${sysconfdir}/krb5.keytab' @@ -121,9 +122,6 @@ #ssl_passphrase_command = '' #ssl_passphrase_command_supports_reload = off -# OAuth -#oauth_validator_libraries = '' # comma-separated list of trusted validator modules - #------------------------------------------------------------------------------ # RESOURCE USAGE (except WAL) @@ -180,13 +178,11 @@ #temp_file_limit = -1 # limits per-process temp file space # in kilobytes, or -1 for no limit +#file_copy_method = copy # copy, clone (if supported by OS) + #max_notify_queue_pages = 1048576 # limits the number of SLRU pages allocated # for NOTIFY / LISTEN queue -#file_copy_method = copy # the default is the first option - # copy - # clone (if system support is available) - # - Kernel Resources - #max_files_per_process = 1000 # min 64 @@ -628,7 +624,7 @@ # %% = '%' # e.g. '<%u%%%d> ' #log_lock_waits = off # log lock waits >= deadlock_timeout -#log_lock_failure = off # log lock failures +#log_lock_failures = off # log lock failures #log_recovery_conflict_waits = off # log standby recovery conflict waits # >= deadlock_timeout #log_parameter_max_length = -1 # when logging statements, limit logged diff --git a/src/backend/utils/mmgr/alignedalloc.c b/src/backend/utils/mmgr/alignedalloc.c index 85aee389d6b..7eea695de62 100644 --- a/src/backend/utils/mmgr/alignedalloc.c +++ b/src/backend/utils/mmgr/alignedalloc.c @@ -45,6 +45,7 @@ AlignedAllocFree(void *pointer) GetMemoryChunkContext(unaligned)->name, chunk); #endif + /* Recursively pfree the unaligned chunk */ pfree(unaligned); } @@ -96,18 +97,32 @@ AlignedAllocRealloc(void *pointer, Size size, int flags) Assert(old_size >= redirchunk->requested_size); #endif + /* + * To keep things simple, we always allocate a new aligned chunk and copy + * data into it. Because of the above inaccuracy, this may end in copying + * more data than was in the original allocation request size, but that + * should be OK. + */ ctx = GetMemoryChunkContext(unaligned); newptr = MemoryContextAllocAligned(ctx, size, alignto, flags); - /* - * We may memcpy beyond the end of the original allocation request size, - * so we must mark the entire allocation as defined. - */ - if (likely(newptr != NULL)) + /* Cope cleanly with OOM */ + if (unlikely(newptr == NULL)) { - VALGRIND_MAKE_MEM_DEFINED(pointer, old_size); - memcpy(newptr, pointer, Min(size, old_size)); + VALGRIND_MAKE_MEM_NOACCESS(redirchunk, sizeof(MemoryChunk)); + return MemoryContextAllocationFailure(ctx, size, flags); } + + /* + * We may memcpy more than the original allocation request size, which + * would result in trying to copy trailing bytes that the original + * MemoryContextAllocAligned call marked NOACCESS. So we must mark the + * entire old_size as defined. That's slightly annoying, but probably not + * worth improving. + */ + VALGRIND_MAKE_MEM_DEFINED(pointer, old_size); + memcpy(newptr, pointer, Min(size, old_size)); + pfree(unaligned); return newptr; diff --git a/src/backend/utils/mmgr/dsa.c b/src/backend/utils/mmgr/dsa.c index 17d4f7a7a06..be43e9351c3 100644 --- a/src/backend/utils/mmgr/dsa.c +++ b/src/backend/utils/mmgr/dsa.c @@ -532,6 +532,21 @@ dsa_attach(dsa_handle handle) } /* + * Returns whether the area with the given handle was already attached by the + * current process. The area must have been created with dsa_create (not + * dsa_create_in_place). + */ +bool +dsa_is_attached(dsa_handle handle) +{ + /* + * An area handle is really a DSM segment handle for the first segment, so + * we can just search for that. + */ + return dsm_find_mapping(handle) != NULL; +} + +/* * Attach to an area that was created with dsa_create_in_place. The caller * must somehow know the location in memory that was used when the area was * created, though it may be mapped at a different virtual address in this diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index 7d28ca706eb..15fa4d0a55e 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -23,11 +23,6 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" -#include "nodes/pg_list.h" -#include "storage/lwlock.h" -#include "storage/ipc.h" -#include "utils/dsa.h" -#include "utils/hsearch.h" #include "utils/memdebug.h" #include "utils/memutils.h" #include "utils/memutils_internal.h" @@ -140,17 +135,6 @@ static const MemoryContextMethods mcxt_methods[] = { }; #undef BOGUS_MCTX -/* - * This is passed to MemoryContextStatsInternal to determine whether - * to print context statistics or not and where to print them logs or - * stderr. - */ -typedef enum PrintDestination -{ - PRINT_STATS_TO_STDERR = 0, - PRINT_STATS_TO_LOGS, - PRINT_STATS_NONE -} PrintDestination; /* * CurrentMemoryContext @@ -172,31 +156,16 @@ MemoryContext CurTransactionContext = NULL; /* This is a transient link to the active portal's memory context: */ MemoryContext PortalContext = NULL; -dsa_area *MemoryStatsDsaArea = NULL; static void MemoryContextDeleteOnly(MemoryContext context); static void MemoryContextCallResetCallbacks(MemoryContext context); static void MemoryContextStatsInternal(MemoryContext context, int level, int max_level, int max_children, MemoryContextCounters *totals, - PrintDestination print_location, - int *num_contexts); + bool print_to_stderr); static void MemoryContextStatsPrint(MemoryContext context, void *passthru, const char *stats_string, bool print_to_stderr); -static void PublishMemoryContext(MemoryStatsEntry *memcxt_info, - int curr_id, MemoryContext context, - List *path, - MemoryContextCounters stat, - int num_contexts, dsa_area *area, - int max_levels); -static void compute_contexts_count_and_ids(List *contexts, HTAB *context_id_lookup, - int *stats_count, - bool summary); -static List *compute_context_path(MemoryContext c, HTAB *context_id_lookup); -static void free_memorycontextstate_dsa(dsa_area *area, int total_stats, - dsa_pointer prev_dsa_pointer); -static void end_memorycontext_reporting(void); /* * You should not do memory allocations within a critical section, because @@ -862,19 +831,11 @@ MemoryContextStatsDetail(MemoryContext context, bool print_to_stderr) { MemoryContextCounters grand_totals; - int num_contexts; - PrintDestination print_location; memset(&grand_totals, 0, sizeof(grand_totals)); - if (print_to_stderr) - print_location = PRINT_STATS_TO_STDERR; - else - print_location = PRINT_STATS_TO_LOGS; - - /* num_contexts report number of contexts aggregated in the output */ MemoryContextStatsInternal(context, 1, max_level, max_children, - &grand_totals, print_location, &num_contexts); + &grand_totals, print_to_stderr); if (print_to_stderr) fprintf(stderr, @@ -909,14 +870,13 @@ MemoryContextStatsDetail(MemoryContext context, * One recursion level for MemoryContextStats * * Print stats for this context if possible, but in any case accumulate counts - * into *totals (if not NULL). The callers should make sure that print_location - * is set to PRINT_STATS_TO_STDERR or PRINT_STATS_TO_LOGS or PRINT_STATS_NONE. + * into *totals (if not NULL). */ static void MemoryContextStatsInternal(MemoryContext context, int level, int max_level, int max_children, MemoryContextCounters *totals, - PrintDestination print_location, int *num_contexts) + bool print_to_stderr) { MemoryContext child; int ichild; @@ -924,39 +884,10 @@ MemoryContextStatsInternal(MemoryContext context, int level, Assert(MemoryContextIsValid(context)); /* Examine the context itself */ - switch (print_location) - { - case PRINT_STATS_TO_STDERR: - context->methods->stats(context, - MemoryContextStatsPrint, - &level, - totals, true); - break; - - case PRINT_STATS_TO_LOGS: - context->methods->stats(context, - MemoryContextStatsPrint, - &level, - totals, false); - break; - - case PRINT_STATS_NONE: - - /* - * Do not print the statistics if print_location is - * PRINT_STATS_NONE, only compute totals. This is used in - * reporting of memory context statistics via a sql function. Last - * parameter is not relevant. - */ - context->methods->stats(context, - NULL, - NULL, - totals, false); - break; - } - - /* Increment the context count for each of the recursive call */ - *num_contexts = *num_contexts + 1; + context->methods->stats(context, + MemoryContextStatsPrint, + &level, + totals, print_to_stderr); /* * Examine children. @@ -976,7 +907,7 @@ MemoryContextStatsInternal(MemoryContext context, int level, MemoryContextStatsInternal(child, level + 1, max_level, max_children, totals, - print_location, num_contexts); + print_to_stderr); } } @@ -995,13 +926,7 @@ MemoryContextStatsInternal(MemoryContext context, int level, child = MemoryContextTraverseNext(child, context); } - /* - * Add the count of children contexts which are traversed in the - * non-recursive manner. - */ - *num_contexts = *num_contexts + ichild; - - if (print_location == PRINT_STATS_TO_STDERR) + if (print_to_stderr) { for (int i = 0; i < level; i++) fprintf(stderr, " "); @@ -1014,7 +939,7 @@ MemoryContextStatsInternal(MemoryContext context, int level, local_totals.freechunks, local_totals.totalspace - local_totals.freespace); } - else if (print_location == PRINT_STATS_TO_LOGS) + else ereport(LOG_SERVER_ONLY, (errhidestmt(true), errhidecontext(true), @@ -1356,22 +1281,6 @@ HandleLogMemoryContextInterrupt(void) } /* - * HandleGetMemoryContextInterrupt - * Handle receipt of an interrupt indicating a request to publish memory - * contexts statistics. - * - * All the actual work is deferred to ProcessGetMemoryContextInterrupt() as - * this cannot be performed in a signal handler. - */ -void -HandleGetMemoryContextInterrupt(void) -{ - InterruptPending = true; - PublishMemoryContextPending = true; - /* latch will be set by procsignal_sigusr1_handler */ -} - -/* * ProcessLogMemoryContextInterrupt * Perform logging of memory contexts of this backend process. * @@ -1408,539 +1317,6 @@ ProcessLogMemoryContextInterrupt(void) MemoryContextStatsDetail(TopMemoryContext, 100, 100, false); } -/* - * ProcessGetMemoryContextInterrupt - * Generate information about memory contexts used by the process. - * - * Performs a breadth first search on the memory context tree, thus parents - * statistics are reported before their children in the monitoring function - * output. - * - * Statistics for all the processes are shared via the same dynamic shared - * area. Statistics written by each process are tracked independently in - * per-process DSA pointers. These pointers are stored in static shared memory. - * - * We calculate maximum number of context's statistics that can be displayed - * using a pre-determined limit for memory available per process for this - * utility maximum size of statistics for each context. The remaining context - * statistics if any are captured as a cumulative total at the end of - * individual context's statistics. - * - * If summary is true, we capture the level 1 and level 2 contexts - * statistics. For that we traverse the memory context tree recursively in - * depth first search manner to cover all the children of a parent context, to - * be able to display a cumulative total of memory consumption by a parent at - * level 2 and all its children. - */ -void -ProcessGetMemoryContextInterrupt(void) -{ - List *contexts; - HASHCTL ctl; - HTAB *context_id_lookup; - int context_id = 0; - MemoryStatsEntry *meminfo; - bool summary = false; - int max_stats; - int idx = MyProcNumber; - int stats_count = 0; - int stats_num = 0; - MemoryContextCounters stat; - int num_individual_stats = 0; - - PublishMemoryContextPending = false; - - /* - * The hash table is used for constructing "path" column of the view, - * similar to its local backend counterpart. - */ - ctl.keysize = sizeof(MemoryContext); - ctl.entrysize = sizeof(MemoryStatsContextId); - ctl.hcxt = CurrentMemoryContext; - - context_id_lookup = hash_create("pg_get_remote_backend_memory_contexts", - 256, - &ctl, - HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); - - /* List of contexts to process in the next round - start at the top. */ - contexts = list_make1(TopMemoryContext); - - /* Compute the number of stats that can fit in the defined limit */ - max_stats = - MEMORY_CONTEXT_REPORT_MAX_PER_BACKEND / MAX_MEMORY_CONTEXT_STATS_SIZE; - LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE); - summary = memCxtState[idx].summary; - LWLockRelease(&memCxtState[idx].lw_lock); - - /* - * Traverse the memory context tree to find total number of contexts. If - * summary is requested report the total number of contexts at level 1 and - * 2 from the top. Also, populate the hash table of context ids. - */ - compute_contexts_count_and_ids(contexts, context_id_lookup, &stats_count, - summary); - - /* - * Allocate memory in this process's DSA for storing statistics of the - * memory contexts upto max_stats, for contexts that don't fit within a - * limit, a cumulative total is written as the last record in the DSA - * segment. - */ - stats_num = Min(stats_count, max_stats); - - LWLockAcquire(&memCxtArea->lw_lock, LW_EXCLUSIVE); - - /* - * Create a DSA and send handle to the client process after storing the - * context statistics. If number of contexts exceed a predefined limit - * (1MB), a cumulative total is stored for such contexts. - */ - if (memCxtArea->memstats_dsa_handle == DSA_HANDLE_INVALID) - { - MemoryContext oldcontext = CurrentMemoryContext; - dsa_handle handle; - - MemoryContextSwitchTo(TopMemoryContext); - - MemoryStatsDsaArea = dsa_create(memCxtArea->lw_lock.tranche); - - handle = dsa_get_handle(MemoryStatsDsaArea); - MemoryContextSwitchTo(oldcontext); - - dsa_pin_mapping(MemoryStatsDsaArea); - - /* - * Pin the DSA area, this is to make sure the area remains attachable - * even if the backend that created it exits. This is done so that the - * statistics are published even if the process exits while a client - * is waiting. Also, other processes that publish statistics will use - * the same area. - */ - dsa_pin(MemoryStatsDsaArea); - - /* Set the handle in shared memory */ - memCxtArea->memstats_dsa_handle = handle; - } - - /* - * If DSA exists, created by another process publishing statistics, attach - * to it. - */ - else if (MemoryStatsDsaArea == NULL) - { - MemoryContext oldcontext = CurrentMemoryContext; - - MemoryContextSwitchTo(TopMemoryContext); - MemoryStatsDsaArea = dsa_attach(memCxtArea->memstats_dsa_handle); - MemoryContextSwitchTo(oldcontext); - dsa_pin_mapping(MemoryStatsDsaArea); - } - LWLockRelease(&memCxtArea->lw_lock); - - /* - * Hold the process lock to protect writes to process specific memory. Two - * processes publishing statistics do not block each other. - */ - LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE); - memCxtState[idx].proc_id = MyProcPid; - - if (DsaPointerIsValid(memCxtState[idx].memstats_dsa_pointer)) - { - /* - * Free any previous allocations, free the name, ident and path - * pointers before freeing the pointer that contains them. - */ - free_memorycontextstate_dsa(MemoryStatsDsaArea, memCxtState[idx].total_stats, - memCxtState[idx].memstats_dsa_pointer); - } - - /* - * Assigning total stats before allocating memory so that memory cleanup - * can run if any subsequent dsa_allocate call to allocate name/ident/path - * fails. - */ - memCxtState[idx].total_stats = stats_num; - memCxtState[idx].memstats_dsa_pointer = - dsa_allocate0(MemoryStatsDsaArea, stats_num * sizeof(MemoryStatsEntry)); - - meminfo = (MemoryStatsEntry *) - dsa_get_address(MemoryStatsDsaArea, memCxtState[idx].memstats_dsa_pointer); - - if (summary) - { - int cxt_id = 0; - List *path = NIL; - - /* Copy TopMemoryContext statistics to DSA */ - memset(&stat, 0, sizeof(stat)); - (*TopMemoryContext->methods->stats) (TopMemoryContext, NULL, NULL, - &stat, true); - path = lcons_int(1, path); - PublishMemoryContext(meminfo, cxt_id, TopMemoryContext, path, stat, - 1, MemoryStatsDsaArea, 100); - cxt_id = cxt_id + 1; - - /* - * Copy statistics for each of TopMemoryContexts children. This - * includes statistics of at most 100 children per node, with each - * child node limited to a depth of 100 in its subtree. - */ - for (MemoryContext c = TopMemoryContext->firstchild; c != NULL; - c = c->nextchild) - { - MemoryContextCounters grand_totals; - int num_contexts = 0; - - path = NIL; - memset(&grand_totals, 0, sizeof(grand_totals)); - - MemoryContextStatsInternal(c, 1, 100, 100, &grand_totals, - PRINT_STATS_NONE, &num_contexts); - - path = compute_context_path(c, context_id_lookup); - - /* - * Register the stats entry first, that way the cleanup handler - * can reach it in case of allocation failures of one or more - * members. - */ - memCxtState[idx].total_stats = cxt_id++; - PublishMemoryContext(meminfo, cxt_id, c, path, - grand_totals, num_contexts, MemoryStatsDsaArea, 100); - } - memCxtState[idx].total_stats = cxt_id; - - /* Notify waiting backends and return */ - end_memorycontext_reporting(); - - hash_destroy(context_id_lookup); - - return; - } - - foreach_ptr(MemoryContextData, cur, contexts) - { - List *path = NIL; - - /* - * Figure out the transient context_id of this context and each of its - * ancestors, to compute a path for this context. - */ - path = compute_context_path(cur, context_id_lookup); - - /* Examine the context stats */ - memset(&stat, 0, sizeof(stat)); - (*cur->methods->stats) (cur, NULL, NULL, &stat, true); - - /* Account for saving one statistics slot for cumulative reporting */ - if (context_id < (max_stats - 1) || stats_count <= max_stats) - { - /* Copy statistics to DSA memory */ - PublishMemoryContext(meminfo, context_id, cur, path, stat, 1, MemoryStatsDsaArea, 100); - } - else - { - meminfo[max_stats - 1].totalspace += stat.totalspace; - meminfo[max_stats - 1].nblocks += stat.nblocks; - meminfo[max_stats - 1].freespace += stat.freespace; - meminfo[max_stats - 1].freechunks += stat.freechunks; - } - - /* - * DSA max limit per process is reached, write aggregate of the - * remaining statistics. - * - * We can store contexts from 0 to max_stats - 1. When stats_count is - * greater than max_stats, we stop reporting individual statistics - * when context_id equals max_stats - 2. As we use max_stats - 1 array - * slot for reporting cumulative statistics or "Remaining Totals". - */ - if (stats_count > max_stats && context_id == (max_stats - 2)) - { - char *nameptr; - int namelen = strlen("Remaining Totals"); - - num_individual_stats = context_id + 1; - meminfo[max_stats - 1].name = dsa_allocate(MemoryStatsDsaArea, namelen + 1); - nameptr = dsa_get_address(MemoryStatsDsaArea, meminfo[max_stats - 1].name); - strlcpy(nameptr, "Remaining Totals", namelen + 1); - meminfo[max_stats - 1].ident = InvalidDsaPointer; - meminfo[max_stats - 1].path = InvalidDsaPointer; - meminfo[max_stats - 1].type = 0; - } - context_id++; - } - - /* - * Statistics are not aggregated, i.e individual statistics reported when - * stats_count <= max_stats. - */ - if (stats_count <= max_stats) - { - memCxtState[idx].total_stats = context_id; - } - /* Report number of aggregated memory contexts */ - else - { - meminfo[max_stats - 1].num_agg_stats = context_id - - num_individual_stats; - - /* - * Total stats equals num_individual_stats + 1 record for cumulative - * statistics. - */ - memCxtState[idx].total_stats = num_individual_stats + 1; - } - - /* Notify waiting backends and return */ - end_memorycontext_reporting(); - - hash_destroy(context_id_lookup); -} - -/* - * Update timestamp and signal all the waiting client backends after copying - * all the statistics. - */ -static void -end_memorycontext_reporting(void) -{ - memCxtState[MyProcNumber].stats_timestamp = GetCurrentTimestamp(); - LWLockRelease(&memCxtState[MyProcNumber].lw_lock); - ConditionVariableBroadcast(&memCxtState[MyProcNumber].memcxt_cv); -} - -/* - * compute_context_path - * - * Append the transient context_id of this context and each of its ancestors - * to a list, in order to compute a path. - */ -static List * -compute_context_path(MemoryContext c, HTAB *context_id_lookup) -{ - bool found; - List *path = NIL; - MemoryContext cur_context; - - for (cur_context = c; cur_context != NULL; cur_context = cur_context->parent) - { - MemoryStatsContextId *cur_entry; - - cur_entry = hash_search(context_id_lookup, &cur_context, HASH_FIND, &found); - - if (!found) - elog(ERROR, "hash table corrupted, can't construct path value"); - - path = lcons_int(cur_entry->context_id, path); - } - - return path; -} - -/* - * Return the number of contexts allocated currently by the backend - * Assign context ids to each of the contexts. - */ -static void -compute_contexts_count_and_ids(List *contexts, HTAB *context_id_lookup, - int *stats_count, bool summary) -{ - foreach_ptr(MemoryContextData, cur, contexts) - { - MemoryStatsContextId *entry; - bool found; - - entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &cur, - HASH_ENTER, &found); - Assert(!found); - - /* - * context id starts with 1 so increment the stats_count before - * assigning. - */ - entry->context_id = ++(*stats_count); - - /* Append the children of the current context to the main list. */ - for (MemoryContext c = cur->firstchild; c != NULL; c = c->nextchild) - { - if (summary) - { - entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &c, - HASH_ENTER, &found); - Assert(!found); - - entry->context_id = ++(*stats_count); - } - - contexts = lappend(contexts, c); - } - - /* - * In summary mode only the first two level (from top) contexts are - * displayed. - */ - if (summary) - break; - } -} - -/* - * PublishMemoryContext - * - * Copy the memory context statistics of a single context to a DSA memory - */ -static void -PublishMemoryContext(MemoryStatsEntry *memcxt_info, int curr_id, - MemoryContext context, List *path, - MemoryContextCounters stat, int num_contexts, - dsa_area *area, int max_levels) -{ - const char *ident = context->ident; - const char *name = context->name; - int *path_list; - - /* - * To be consistent with logging output, we label dynahash contexts with - * just the hash table name as with MemoryContextStatsPrint(). - */ - if (context->ident && strncmp(context->name, "dynahash", 8) == 0) - { - name = context->ident; - ident = NULL; - } - - if (name != NULL) - { - int namelen = strlen(name); - char *nameptr; - - if (strlen(name) >= MEMORY_CONTEXT_IDENT_SHMEM_SIZE) - namelen = pg_mbcliplen(name, namelen, - MEMORY_CONTEXT_IDENT_SHMEM_SIZE - 1); - - memcxt_info[curr_id].name = dsa_allocate(area, namelen + 1); - nameptr = (char *) dsa_get_address(area, memcxt_info[curr_id].name); - strlcpy(nameptr, name, namelen + 1); - } - else - memcxt_info[curr_id].name = InvalidDsaPointer; - - /* Trim and copy the identifier if it is not set to NULL */ - if (ident != NULL) - { - int idlen = strlen(context->ident); - char *identptr; - - /* - * Some identifiers such as SQL query string can be very long, - * truncate oversize identifiers. - */ - if (idlen >= MEMORY_CONTEXT_IDENT_SHMEM_SIZE) - idlen = pg_mbcliplen(ident, idlen, - MEMORY_CONTEXT_IDENT_SHMEM_SIZE - 1); - - memcxt_info[curr_id].ident = dsa_allocate(area, idlen + 1); - identptr = (char *) dsa_get_address(area, memcxt_info[curr_id].ident); - strlcpy(identptr, ident, idlen + 1); - } - else - memcxt_info[curr_id].ident = InvalidDsaPointer; - - /* Allocate DSA memory for storing path information */ - if (path == NIL) - memcxt_info[curr_id].path = InvalidDsaPointer; - else - { - int levels = Min(list_length(path), max_levels); - - memcxt_info[curr_id].path_length = levels; - memcxt_info[curr_id].path = dsa_allocate0(area, levels * sizeof(int)); - memcxt_info[curr_id].levels = list_length(path); - path_list = (int *) dsa_get_address(area, memcxt_info[curr_id].path); - - foreach_int(i, path) - { - path_list[foreach_current_index(i)] = i; - if (--levels == 0) - break; - } - } - memcxt_info[curr_id].type = context->type; - memcxt_info[curr_id].totalspace = stat.totalspace; - memcxt_info[curr_id].nblocks = stat.nblocks; - memcxt_info[curr_id].freespace = stat.freespace; - memcxt_info[curr_id].freechunks = stat.freechunks; - memcxt_info[curr_id].num_agg_stats = num_contexts; -} - -/* - * free_memorycontextstate_dsa - * - * Worker for freeing resources from a MemoryStatsEntry. Callers are - * responsible for ensuring that the DSA pointer is valid. - */ -static void -free_memorycontextstate_dsa(dsa_area *area, int total_stats, - dsa_pointer prev_dsa_pointer) -{ - MemoryStatsEntry *meminfo; - - meminfo = (MemoryStatsEntry *) dsa_get_address(area, prev_dsa_pointer); - Assert(meminfo != NULL); - for (int i = 0; i < total_stats; i++) - { - if (DsaPointerIsValid(meminfo[i].name)) - dsa_free(area, meminfo[i].name); - - if (DsaPointerIsValid(meminfo[i].ident)) - dsa_free(area, meminfo[i].ident); - - if (DsaPointerIsValid(meminfo[i].path)) - dsa_free(area, meminfo[i].path); - } - - dsa_free(area, memCxtState[MyProcNumber].memstats_dsa_pointer); - memCxtState[MyProcNumber].memstats_dsa_pointer = InvalidDsaPointer; -} - -/* - * Free the memory context statistics stored by this process - * in DSA area. - */ -void -AtProcExit_memstats_cleanup(int code, Datum arg) -{ - int idx = MyProcNumber; - - if (memCxtArea->memstats_dsa_handle == DSA_HANDLE_INVALID) - return; - - LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE); - - if (!DsaPointerIsValid(memCxtState[idx].memstats_dsa_pointer)) - { - LWLockRelease(&memCxtState[idx].lw_lock); - return; - } - - /* If the dsa mapping could not be found, attach to the area */ - if (MemoryStatsDsaArea == NULL) - MemoryStatsDsaArea = dsa_attach(memCxtArea->memstats_dsa_handle); - - /* - * Free the memory context statistics, free the name, ident and path - * pointers before freeing the pointer that contains these pointers and - * integer statistics. - */ - free_memorycontextstate_dsa(MemoryStatsDsaArea, memCxtState[idx].total_stats, - memCxtState[idx].memstats_dsa_pointer); - - dsa_detach(MemoryStatsDsaArea); - LWLockRelease(&memCxtState[idx].lw_lock); -} - void * palloc(Size size) { diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl index 15dd10ce40a..b7ef7ed8d06 100644 --- a/src/bin/initdb/t/001_initdb.pl +++ b/src/bin/initdb/t/001_initdb.pl @@ -76,7 +76,8 @@ command_like( 'checksums are enabled in control file'); command_ok([ 'initdb', '--sync-only', $datadir ], 'sync only'); -command_ok([ 'initdb', '--sync-only', '--no-sync-data-files', $datadir ], '--no-sync-data-files'); +command_ok([ 'initdb', '--sync-only', '--no-sync-data-files', $datadir ], + '--no-sync-data-files'); command_fails([ 'initdb', $datadir ], 'existing data directory'); if ($supports_syncfs) diff --git a/src/bin/pg_amcheck/t/004_verify_heapam.pl b/src/bin/pg_amcheck/t/004_verify_heapam.pl index 2a3af2666f5..72693660fb6 100644 --- a/src/bin/pg_amcheck/t/004_verify_heapam.pl +++ b/src/bin/pg_amcheck/t/004_verify_heapam.pl @@ -529,7 +529,7 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++) $tup->{t_infomask2} |= HEAP_NATTS_MASK; push @expected, - qr/${$header}number of attributes 2047 exceeds maximum expected for table 3/; + qr/${$header}number of attributes 2047 exceeds maximum 3 expected for table/; } elsif ($offnum == 10) { @@ -552,7 +552,7 @@ for (my $tupidx = 0; $tupidx < $ROWCOUNT; $tupidx++) $tup->{t_hoff} = 32; push @expected, - qr/${$header}number of attributes 67 exceeds maximum expected for table 3/; + qr/${$header}number of attributes 67 exceeds maximum 3 expected for table/; } elsif ($offnum == 12) { diff --git a/src/bin/pg_basebackup/meson.build b/src/bin/pg_basebackup/meson.build index 8a1c96b4f5c..3a7fc10eab0 100644 --- a/src/bin/pg_basebackup/meson.build +++ b/src/bin/pg_basebackup/meson.build @@ -93,9 +93,9 @@ tests += { 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), 'tap': { - 'env': {'GZIP_PROGRAM': gzip.found() ? gzip.path() : '', - 'TAR': tar.found() ? tar.path() : '', - 'LZ4': program_lz4.found() ? program_lz4.path() : '', + 'env': {'GZIP_PROGRAM': gzip.found() ? gzip.full_path() : '', + 'TAR': tar.found() ? tar.full_path() : '', + 'LZ4': program_lz4.found() ? program_lz4.full_path() : '', }, 'tests': [ 't/010_pg_basebackup.pl', diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index eb7354200bc..55621f35fb6 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -487,7 +487,7 @@ reached_end_position(XLogRecPtr segendpos, uint32 timeline, if (r < 0) pg_fatal("could not read from ready pipe: %m"); - if (sscanf(xlogend, "%X/%X", &hi, &lo) != 2) + if (sscanf(xlogend, "%X/%08X", &hi, &lo) != 2) pg_fatal("could not parse write-ahead log location \"%s\"", xlogend); xlogendptr = ((uint64) hi) << 32 | lo; @@ -629,7 +629,7 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier, param->wal_compress_level = wal_compress_level; /* Convert the starting position */ - if (sscanf(startpos, "%X/%X", &hi, &lo) != 2) + if (sscanf(startpos, "%X/%08X", &hi, &lo) != 2) pg_fatal("could not parse write-ahead log location \"%s\"", startpos); param->startptr = ((uint64) hi) << 32 | lo; @@ -2255,7 +2255,7 @@ BaseBackup(char *compression_algorithm, char *compression_detail, * value directly in the variable, and then set the flag that says * it's there. */ - if (sscanf(xlogend, "%X/%X", &hi, &lo) != 2) + if (sscanf(xlogend, "%X/%08X", &hi, &lo) != 2) pg_fatal("could not parse write-ahead log location \"%s\"", xlogend); xlogendptr = ((uint64) hi) << 32 | lo; diff --git a/src/bin/pg_basebackup/pg_createsubscriber.c b/src/bin/pg_basebackup/pg_createsubscriber.c index f65acc7cb11..025b893a41e 100644 --- a/src/bin/pg_basebackup/pg_createsubscriber.c +++ b/src/bin/pg_basebackup/pg_createsubscriber.c @@ -46,7 +46,7 @@ struct CreateSubscriberOptions SimpleStringList replslot_names; /* list of replication slot names */ int recovery_timeout; /* stop recovery after this time */ bool all_dbs; /* all option */ - SimpleStringList objecttypes_to_remove; /* list of object types to remove */ + SimpleStringList objecttypes_to_clean; /* list of object types to cleanup */ }; /* per-database publication/subscription info */ @@ -71,8 +71,8 @@ struct LogicalRepInfos { struct LogicalRepInfo *dbinfo; bool two_phase; /* enable-two-phase option */ - bits32 objecttypes_to_remove; /* flags indicating which object types - * to remove on subscriber */ + bits32 objecttypes_to_clean; /* flags indicating which object types + * to clean up on subscriber */ }; static void cleanup_objects_atexit(void); @@ -247,19 +247,19 @@ usage(void) printf(_(" %s [OPTION]...\n"), progname); printf(_("\nOptions:\n")); printf(_(" -a, --all create subscriptions for all databases except template\n" - " databases or databases that don't allow connections\n")); + " databases and databases that don't allow connections\n")); printf(_(" -d, --database=DBNAME database in which to create a subscription\n")); printf(_(" -D, --pgdata=DATADIR location for the subscriber data directory\n")); printf(_(" -n, --dry-run dry run, just show what would be done\n")); printf(_(" -p, --subscriber-port=PORT subscriber port number (default %s)\n"), DEFAULT_SUB_PORT); printf(_(" -P, --publisher-server=CONNSTR publisher connection string\n")); - printf(_(" -R, --remove=OBJECTTYPE remove all objects of the specified type from specified\n" - " databases on the subscriber; accepts: publications\n")); printf(_(" -s, --socketdir=DIR socket directory to use (default current dir.)\n")); printf(_(" -t, --recovery-timeout=SECS seconds to wait for recovery to end\n")); printf(_(" -T, --enable-two-phase enable two-phase commit for all subscriptions\n")); printf(_(" -U, --subscriber-username=NAME user name for subscriber connection\n")); printf(_(" -v, --verbose output verbose messages\n")); + printf(_(" --clean=OBJECTTYPE drop all objects of the specified type from specified\n" + " databases on the subscriber; accepts: \"%s\"\n"), "publications"); printf(_(" --config-file=FILENAME use specified main server configuration\n" " file when running target cluster\n")); printf(_(" --publication=NAME publication name\n")); @@ -973,7 +973,7 @@ check_publisher(const struct LogicalRepInfo *dbinfo) pg_log_warning("two_phase option will not be enabled for replication slots"); pg_log_warning_detail("Subscriptions will be created with the two_phase option disabled. " "Prepared transactions will be replicated at COMMIT PREPARED."); - pg_log_warning_hint("You can use --enable-two-phase switch to enable two_phase."); + pg_log_warning_hint("You can use the command-line option --enable-two-phase to enable two_phase."); } /* @@ -1262,7 +1262,7 @@ setup_recovery(const struct LogicalRepInfo *dbinfo, const char *datadir, const c { appendPQExpBufferStr(recoveryconfcontents, "# dry run mode"); appendPQExpBuffer(recoveryconfcontents, - "recovery_target_lsn = '%X/%X'\n", + "recovery_target_lsn = '%X/%08X'\n", LSN_FORMAT_ARGS((XLogRecPtr) InvalidXLogRecPtr)); } else @@ -1730,7 +1730,7 @@ static void check_and_drop_publications(PGconn *conn, struct LogicalRepInfo *dbinfo) { PGresult *res; - bool drop_all_pubs = dbinfos.objecttypes_to_remove & OBJECTTYPE_PUBLICATIONS; + bool drop_all_pubs = dbinfos.objecttypes_to_clean & OBJECTTYPE_PUBLICATIONS; Assert(conn != NULL); @@ -1876,7 +1876,7 @@ set_replication_progress(PGconn *conn, const struct LogicalRepInfo *dbinfo, cons if (dry_run) { suboid = InvalidOid; - lsnstr = psprintf("%X/%X", LSN_FORMAT_ARGS((XLogRecPtr) InvalidXLogRecPtr)); + lsnstr = psprintf("%X/%08X", LSN_FORMAT_ARGS((XLogRecPtr) InvalidXLogRecPtr)); } else { @@ -2026,7 +2026,6 @@ main(int argc, char **argv) {"dry-run", no_argument, NULL, 'n'}, {"subscriber-port", required_argument, NULL, 'p'}, {"publisher-server", required_argument, NULL, 'P'}, - {"remove", required_argument, NULL, 'R'}, {"socketdir", required_argument, NULL, 's'}, {"recovery-timeout", required_argument, NULL, 't'}, {"enable-two-phase", no_argument, NULL, 'T'}, @@ -2038,6 +2037,7 @@ main(int argc, char **argv) {"publication", required_argument, NULL, 2}, {"replication-slot", required_argument, NULL, 3}, {"subscription", required_argument, NULL, 4}, + {"clean", required_argument, NULL, 5}, {NULL, 0, NULL, 0} }; @@ -2109,7 +2109,7 @@ main(int argc, char **argv) get_restricted_token(); - while ((c = getopt_long(argc, argv, "ad:D:np:P:R:s:t:TU:v", + while ((c = getopt_long(argc, argv, "ad:D:np:P:s:t:TU:v", long_options, &option_index)) != -1) { switch (c) @@ -2139,12 +2139,6 @@ main(int argc, char **argv) case 'P': opt.pub_conninfo_str = pg_strdup(optarg); break; - case 'R': - if (!simple_string_list_member(&opt.objecttypes_to_remove, optarg)) - simple_string_list_append(&opt.objecttypes_to_remove, optarg); - else - pg_fatal("object type \"%s\" is specified more than once for -R/--remove", optarg); - break; case 's': opt.socket_dir = pg_strdup(optarg); canonicalize_path(opt.socket_dir); @@ -2191,6 +2185,12 @@ main(int argc, char **argv) else pg_fatal("subscription \"%s\" specified more than once for --subscription", optarg); break; + case 5: + if (!simple_string_list_member(&opt.objecttypes_to_clean, optarg)) + simple_string_list_append(&opt.objecttypes_to_clean, optarg); + else + pg_fatal("object type \"%s\" specified more than once for --clean", optarg); + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -2214,7 +2214,7 @@ main(int argc, char **argv) if (bad_switch) { - pg_log_error("%s cannot be used with -a/--all", bad_switch); + pg_log_error("options %s and -a/--all cannot be used together", bad_switch); pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit(1); } @@ -2334,14 +2334,14 @@ main(int argc, char **argv) } /* Verify the object types specified for removal from the subscriber */ - for (SimpleStringListCell *cell = opt.objecttypes_to_remove.head; cell; cell = cell->next) + for (SimpleStringListCell *cell = opt.objecttypes_to_clean.head; cell; cell = cell->next) { if (pg_strcasecmp(cell->val, "publications") == 0) - dbinfos.objecttypes_to_remove |= OBJECTTYPE_PUBLICATIONS; + dbinfos.objecttypes_to_clean |= OBJECTTYPE_PUBLICATIONS; else { - pg_log_error("invalid object type \"%s\" specified for -R/--remove", cell->val); - pg_log_error_hint("The valid option is: \"publications\""); + pg_log_error("invalid object type \"%s\" specified for --clean", cell->val); + pg_log_error_hint("The valid value is: \"%s\"", "publications"); exit(1); } } diff --git a/src/bin/pg_basebackup/pg_receivewal.c b/src/bin/pg_basebackup/pg_receivewal.c index e816cf58101..289ca14dcfe 100644 --- a/src/bin/pg_basebackup/pg_receivewal.c +++ b/src/bin/pg_basebackup/pg_receivewal.c @@ -188,14 +188,14 @@ stop_streaming(XLogRecPtr xlogpos, uint32 timeline, bool segment_finished) /* we assume that we get called once at the end of each segment */ if (verbose && segment_finished) - pg_log_info("finished segment at %X/%X (timeline %u)", + pg_log_info("finished segment at %X/%08X (timeline %u)", LSN_FORMAT_ARGS(xlogpos), timeline); if (!XLogRecPtrIsInvalid(endpos) && endpos < xlogpos) { if (verbose) - pg_log_info("stopped log streaming at %X/%X (timeline %u)", + pg_log_info("stopped log streaming at %X/%08X (timeline %u)", LSN_FORMAT_ARGS(xlogpos), timeline); time_to_stop = true; @@ -211,7 +211,7 @@ stop_streaming(XLogRecPtr xlogpos, uint32 timeline, bool segment_finished) * timeline, but it's close enough for reporting purposes. */ if (verbose && prevtimeline != 0 && prevtimeline != timeline) - pg_log_info("switched to timeline %u at %X/%X", + pg_log_info("switched to timeline %u at %X/%08X", timeline, LSN_FORMAT_ARGS(prevpos)); @@ -575,7 +575,7 @@ StreamLog(void) * Start the replication */ if (verbose) - pg_log_info("starting log streaming at %X/%X (timeline %u)", + pg_log_info("starting log streaming at %X/%08X (timeline %u)", LSN_FORMAT_ARGS(stream.startpos), stream.timeline); @@ -689,7 +689,7 @@ main(int argc, char **argv) basedir = pg_strdup(optarg); break; case 'E': - if (sscanf(optarg, "%X/%X", &hi, &lo) != 2) + if (sscanf(optarg, "%X/%08X", &hi, &lo) != 2) pg_fatal("could not parse end position \"%s\"", optarg); endpos = ((uint64) hi) << 32 | lo; break; diff --git a/src/bin/pg_basebackup/pg_recvlogical.c b/src/bin/pg_basebackup/pg_recvlogical.c index e6810efe5f0..8a5dd24e6c9 100644 --- a/src/bin/pg_basebackup/pg_recvlogical.c +++ b/src/bin/pg_basebackup/pg_recvlogical.c @@ -41,8 +41,8 @@ typedef enum /* Global Options */ static char *outfile = NULL; static int verbose = 0; -static bool two_phase = false; -static bool failover = false; +static bool two_phase = false; /* enable-two-phase option */ +static bool failover = false; /* enable-failover option */ static int noloop = 0; static int standby_message_timeout = 10 * 1000; /* 10 sec = default */ static int fsync_interval = 10 * 1000; /* 10 sec = default */ @@ -89,9 +89,9 @@ usage(void) printf(_(" --drop-slot drop the replication slot (for the slot's name see --slot)\n")); printf(_(" --start start streaming in a replication slot (for the slot's name see --slot)\n")); printf(_("\nOptions:\n")); + printf(_(" --enable-failover enable replication slot synchronization to standby servers when\n" + " creating a replication slot\n")); printf(_(" -E, --endpos=LSN exit after receiving the specified LSN\n")); - printf(_(" --failover enable replication slot synchronization to standby servers when\n" - " creating a slot\n")); printf(_(" -f, --file=FILE receive log into this file, - for stdout\n")); printf(_(" -F --fsync-interval=SECS\n" " time between fsyncs to the output file (default: %d)\n"), (fsync_interval / 1000)); @@ -105,7 +105,8 @@ usage(void) printf(_(" -s, --status-interval=SECS\n" " time between status packets sent to server (default: %d)\n"), (standby_message_timeout / 1000)); printf(_(" -S, --slot=SLOTNAME name of the logical replication slot\n")); - printf(_(" -t, --two-phase enable decoding of prepared transactions when creating a slot\n")); + printf(_(" -t, --enable-two-phase enable decoding of prepared transactions when creating a slot\n")); + printf(_(" --two-phase (same as --enable-two-phase, deprecated)\n")); printf(_(" -v, --verbose output verbose messages\n")); printf(_(" -V, --version output version information, then exit\n")); printf(_(" -?, --help show this help, then exit\n")); @@ -143,7 +144,7 @@ sendFeedback(PGconn *conn, TimestampTz now, bool force, bool replyRequested) return true; if (verbose) - pg_log_info("confirming write up to %X/%X, flush to %X/%X (slot %s)", + pg_log_info("confirming write up to %X/%08X, flush to %X/%08X (slot %s)", LSN_FORMAT_ARGS(output_written_lsn), LSN_FORMAT_ARGS(output_fsync_lsn), replication_slot); @@ -237,13 +238,13 @@ StreamLogicalLog(void) * Start the replication */ if (verbose) - pg_log_info("starting log streaming at %X/%X (slot %s)", + pg_log_info("starting log streaming at %X/%08X (slot %s)", LSN_FORMAT_ARGS(startpos), replication_slot); /* Initiate the replication stream at specified location */ query = createPQExpBuffer(); - appendPQExpBuffer(query, "START_REPLICATION SLOT \"%s\" LOGICAL %X/%X", + appendPQExpBuffer(query, "START_REPLICATION SLOT \"%s\" LOGICAL %X/%08X", replication_slot, LSN_FORMAT_ARGS(startpos)); /* print options if there are any */ @@ -698,9 +699,10 @@ main(int argc, char **argv) {"file", required_argument, NULL, 'f'}, {"fsync-interval", required_argument, NULL, 'F'}, {"no-loop", no_argument, NULL, 'n'}, - {"failover", no_argument, NULL, 5}, + {"enable-failover", no_argument, NULL, 5}, + {"enable-two-phase", no_argument, NULL, 't'}, + {"two-phase", no_argument, NULL, 't'}, /* deprecated */ {"verbose", no_argument, NULL, 'v'}, - {"two-phase", no_argument, NULL, 't'}, {"version", no_argument, NULL, 'V'}, {"help", no_argument, NULL, '?'}, /* connection options */ @@ -798,12 +800,12 @@ main(int argc, char **argv) break; /* replication options */ case 'I': - if (sscanf(optarg, "%X/%X", &hi, &lo) != 2) + if (sscanf(optarg, "%X/%08X", &hi, &lo) != 2) pg_fatal("could not parse start position \"%s\"", optarg); startpos = ((uint64) hi) << 32 | lo; break; case 'E': - if (sscanf(optarg, "%X/%X", &hi, &lo) != 2) + if (sscanf(optarg, "%X/%08X", &hi, &lo) != 2) pg_fatal("could not parse end position \"%s\"", optarg); endpos = ((uint64) hi) << 32 | lo; break; @@ -928,14 +930,14 @@ main(int argc, char **argv) { if (two_phase) { - pg_log_error("--two-phase may only be specified with --create-slot"); + pg_log_error("%s may only be specified with --create-slot", "--enable-two-phase"); pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit(1); } if (failover) { - pg_log_error("--failover may only be specified with --create-slot"); + pg_log_error("%s may only be specified with --create-slot", "--enable-failover"); pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit(1); } @@ -1073,12 +1075,12 @@ prepareToTerminate(PGconn *conn, XLogRecPtr endpos, StreamStopReason reason, pg_log_info("received interrupt signal, exiting"); break; case STREAM_STOP_KEEPALIVE: - pg_log_info("end position %X/%X reached by keepalive", + pg_log_info("end position %X/%08X reached by keepalive", LSN_FORMAT_ARGS(endpos)); break; case STREAM_STOP_END_OF_WAL: Assert(!XLogRecPtrIsInvalid(lsn)); - pg_log_info("end position %X/%X reached by WAL record at %X/%X", + pg_log_info("end position %X/%08X reached by WAL record at %X/%08X", LSN_FORMAT_ARGS(endpos), LSN_FORMAT_ARGS(lsn)); break; case STREAM_STOP_NONE: diff --git a/src/bin/pg_basebackup/receivelog.c b/src/bin/pg_basebackup/receivelog.c index 6b6e32dfbdf..d6b7f117fa3 100644 --- a/src/bin/pg_basebackup/receivelog.c +++ b/src/bin/pg_basebackup/receivelog.c @@ -571,7 +571,7 @@ ReceiveXlogStream(PGconn *conn, StreamCtl *stream) return true; /* Initiate the replication stream at specified location */ - snprintf(query, sizeof(query), "START_REPLICATION %s%X/%X TIMELINE %u", + snprintf(query, sizeof(query), "START_REPLICATION %s%X/%08X TIMELINE %u", slotcmd, LSN_FORMAT_ARGS(stream->startpos), stream->timeline); @@ -628,7 +628,7 @@ ReceiveXlogStream(PGconn *conn, StreamCtl *stream) } if (stream->startpos > stoppos) { - pg_log_error("server stopped streaming timeline %u at %X/%X, but reported next timeline %u to begin at %X/%X", + pg_log_error("server stopped streaming timeline %u at %X/%08X, but reported next timeline %u to begin at %X/%08X", stream->timeline, LSN_FORMAT_ARGS(stoppos), newtimeline, LSN_FORMAT_ARGS(stream->startpos)); goto error; @@ -720,7 +720,7 @@ ReadEndOfStreamingResult(PGresult *res, XLogRecPtr *startpos, uint32 *timeline) } *timeline = atoi(PQgetvalue(res, 0, 0)); - if (sscanf(PQgetvalue(res, 0, 1), "%X/%X", &startpos_xlogid, + if (sscanf(PQgetvalue(res, 0, 1), "%X/%08X", &startpos_xlogid, &startpos_xrecoff) != 2) { pg_log_error("could not parse next timeline's starting point \"%s\"", diff --git a/src/bin/pg_basebackup/streamutil.c b/src/bin/pg_basebackup/streamutil.c index c7b8a4c3a4b..e5a7cb6e5b1 100644 --- a/src/bin/pg_basebackup/streamutil.c +++ b/src/bin/pg_basebackup/streamutil.c @@ -445,7 +445,7 @@ RunIdentifySystem(PGconn *conn, char **sysid, TimeLineID *starttli, /* Get LSN start position if necessary */ if (startpos != NULL) { - if (sscanf(PQgetvalue(res, 0, 2), "%X/%X", &hi, &lo) != 2) + if (sscanf(PQgetvalue(res, 0, 2), "%X/%08X", &hi, &lo) != 2) { pg_log_error("could not parse write-ahead log location \"%s\"", PQgetvalue(res, 0, 2)); @@ -551,7 +551,7 @@ GetSlotInformation(PGconn *conn, const char *slot_name, uint32 hi, lo; - if (sscanf(PQgetvalue(res, 0, 1), "%X/%X", &hi, &lo) != 2) + if (sscanf(PQgetvalue(res, 0, 1), "%X/%08X", &hi, &lo) != 2) { pg_log_error("could not parse restart_lsn \"%s\" for replication slot \"%s\"", PQgetvalue(res, 0, 1), slot_name); diff --git a/src/bin/pg_basebackup/t/030_pg_recvlogical.pl b/src/bin/pg_basebackup/t/030_pg_recvlogical.pl index c82e78847b3..1b7a6f6f43f 100644 --- a/src/bin/pg_basebackup/t/030_pg_recvlogical.pl +++ b/src/bin/pg_basebackup/t/030_pg_recvlogical.pl @@ -110,7 +110,7 @@ $node->command_fails( '--dbname' => $node->connstr('postgres'), '--start', '--endpos' => $nextlsn, - '--two-phase', '--no-loop', + '--enable-two-phase', '--no-loop', '--file' => '-', ], 'incorrect usage'); @@ -142,12 +142,13 @@ $node->command_ok( '--slot' => 'test', '--dbname' => $node->connstr('postgres'), '--create-slot', - '--failover', + '--enable-failover', ], 'slot with failover created'); my $result = $node->safe_psql('postgres', - "SELECT failover FROM pg_catalog.pg_replication_slots WHERE slot_name = 'test'"); + "SELECT failover FROM pg_catalog.pg_replication_slots WHERE slot_name = 'test'" +); is($result, 't', "failover is enabled for the new slot"); done_testing(); diff --git a/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl b/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl index 2d532fee567..229fef5b3b5 100644 --- a/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl +++ b/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl @@ -331,7 +331,7 @@ $node_p->safe_psql($db1, $node_p->wait_for_replay_catchup($node_s); # Create user-defined publications, wait for streaming replication to sync them -# to the standby, then verify that '--remove' +# to the standby, then verify that '--clean' # removes them. $node_p->safe_psql( $db1, qq( @@ -399,7 +399,7 @@ command_fails_like( '--database' => $db1, '--all', ], - qr/--database cannot be used with -a\/--all/, + qr/options --database and -a\/--all cannot be used together/, 'fail if --database is used with --all'); # run pg_createsubscriber with '--publication' and '--all' and verify @@ -416,7 +416,7 @@ command_fails_like( '--all', '--publication' => 'pub1', ], - qr/--publication cannot be used with -a\/--all/, + qr/options --publication and -a\/--all cannot be used together/, 'fail if --publication is used with --all'); # run pg_createsubscriber with '--all' option @@ -446,7 +446,7 @@ is(scalar(() = $stderr =~ /creating subscription/g), # Run pg_createsubscriber on node S. --verbose is used twice # to show more information. # In passing, also test the --enable-two-phase option and -# --remove option +# --clean option command_ok( [ 'pg_createsubscriber', @@ -463,7 +463,7 @@ command_ok( '--database' => $db1, '--database' => $db2, '--enable-two-phase', - '--remove' => 'publications', + '--clean' => 'publications', ], 'run pg_createsubscriber on node S'); diff --git a/src/bin/pg_combinebackup/backup_label.c b/src/bin/pg_combinebackup/backup_label.c index e89d4603f09..e774bc78a62 100644 --- a/src/bin/pg_combinebackup/backup_label.c +++ b/src/bin/pg_combinebackup/backup_label.c @@ -247,7 +247,7 @@ parse_lsn(char *s, char *e, XLogRecPtr *lsn, char **c) unsigned lo; *e = '\0'; - success = (sscanf(s, "%X/%X%n", &hi, &lo, &nchars) == 2); + success = (sscanf(s, "%X/%08X%n", &hi, &lo, &nchars) == 2); *e = save; if (success) diff --git a/src/bin/pg_combinebackup/pg_combinebackup.c b/src/bin/pg_combinebackup/pg_combinebackup.c index 28e58cd8ef4..f5cef99f627 100644 --- a/src/bin/pg_combinebackup/pg_combinebackup.c +++ b/src/bin/pg_combinebackup/pg_combinebackup.c @@ -569,7 +569,7 @@ check_backup_label_files(int n_backups, char **backup_dirs) pg_fatal("backup at \"%s\" starts on timeline %u, but expected %u", backup_dirs[i], start_tli, check_tli); if (i < n_backups - 1 && start_lsn != check_lsn) - pg_fatal("backup at \"%s\" starts at LSN %X/%X, but expected %X/%X", + pg_fatal("backup at \"%s\" starts at LSN %X/%08X, but expected %X/%08X", backup_dirs[i], LSN_FORMAT_ARGS(start_lsn), LSN_FORMAT_ARGS(check_lsn)); diff --git a/src/bin/pg_combinebackup/t/010_hardlink.pl b/src/bin/pg_combinebackup/t/010_hardlink.pl index a0ee419090c..4f92d6676bd 100644 --- a/src/bin/pg_combinebackup/t/010_hardlink.pl +++ b/src/bin/pg_combinebackup/t/010_hardlink.pl @@ -56,7 +56,7 @@ $primary->command_ok( '--pgdata' => $backup1path, '--no-sync', '--checkpoint' => 'fast', - '--wal-method' => 'none' + '--wal-method' => 'none' ], "full backup"); @@ -74,7 +74,7 @@ $primary->command_ok( '--pgdata' => $backup2path, '--no-sync', '--checkpoint' => 'fast', - '--wal-method' => 'none', + '--wal-method' => 'none', '--incremental' => $backup1path . '/backup_manifest' ], "incremental backup"); @@ -112,45 +112,45 @@ done_testing(); # of the given data file. sub check_data_file { - my ($data_file, $last_segment_nlinks) = @_; - - my @data_file_segments = ($data_file); - - # Start checking for additional segments - my $segment_number = 1; - - while (1) - { - my $next_segment = $data_file . '.' . $segment_number; - - # If the file exists and is a regular file, add it to the list - if (-f $next_segment) - { - push @data_file_segments, $next_segment; - $segment_number++; - } - # Stop the loop if the file doesn't exist - else - { - last; - } - } - - # All segments of the given data file should contain 2 hard links, except - # for the last one, which should match the given number of links. - my $last_segment = pop @data_file_segments; - - for my $segment (@data_file_segments) - { - # Get the file's stat information of each segment - my $nlink_count = get_hard_link_count($segment); - ok($nlink_count == 2, "File '$segment' has 2 hard links"); - } - - # Get the file's stat information of the last segment - my $nlink_count = get_hard_link_count($last_segment); - ok($nlink_count == $last_segment_nlinks, - "File '$last_segment' has $last_segment_nlinks hard link(s)"); + my ($data_file, $last_segment_nlinks) = @_; + + my @data_file_segments = ($data_file); + + # Start checking for additional segments + my $segment_number = 1; + + while (1) + { + my $next_segment = $data_file . '.' . $segment_number; + + # If the file exists and is a regular file, add it to the list + if (-f $next_segment) + { + push @data_file_segments, $next_segment; + $segment_number++; + } + # Stop the loop if the file doesn't exist + else + { + last; + } + } + + # All segments of the given data file should contain 2 hard links, except + # for the last one, which should match the given number of links. + my $last_segment = pop @data_file_segments; + + for my $segment (@data_file_segments) + { + # Get the file's stat information of each segment + my $nlink_count = get_hard_link_count($segment); + ok($nlink_count == 2, "File '$segment' has 2 hard links"); + } + + # Get the file's stat information of the last segment + my $nlink_count = get_hard_link_count($last_segment); + ok($nlink_count == $last_segment_nlinks, + "File '$last_segment' has $last_segment_nlinks hard link(s)"); } @@ -159,11 +159,11 @@ sub check_data_file # that file. sub get_hard_link_count { - my ($file) = @_; + my ($file) = @_; - # Get file stats - my @stats = stat($file); - my $nlink = $stats[3]; # Number of hard links + # Get file stats + my @stats = stat($file); + my $nlink = $stats[3]; # Number of hard links - return $nlink; + return $nlink; } diff --git a/src/bin/pg_combinebackup/write_manifest.c b/src/bin/pg_combinebackup/write_manifest.c index 313f8929df5..819a3fd0b7a 100644 --- a/src/bin/pg_combinebackup/write_manifest.c +++ b/src/bin/pg_combinebackup/write_manifest.c @@ -155,7 +155,7 @@ finalize_manifest(manifest_writer *mwriter, for (wal_range = first_wal_range; wal_range != NULL; wal_range = wal_range->next) appendStringInfo(&mwriter->buf, - "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%X\", \"End-LSN\": \"%X/%X\" }", + "%s{ \"Timeline\": %u, \"Start-LSN\": \"%X/%08X\", \"End-LSN\": \"%X/%08X\" }", wal_range == first_wal_range ? "" : ",\n", wal_range->tli, LSN_FORMAT_ARGS(wal_range->start_lsn), diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index 7bb801bb886..10de058ce91 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -245,9 +245,9 @@ main(int argc, char *argv[]) dbState(ControlFile->state)); printf(_("pg_control last modified: %s\n"), pgctime_str); - printf(_("Latest checkpoint location: %X/%X\n"), + printf(_("Latest checkpoint location: %X/%08X\n"), LSN_FORMAT_ARGS(ControlFile->checkPoint)); - printf(_("Latest checkpoint's REDO location: %X/%X\n"), + printf(_("Latest checkpoint's REDO location: %X/%08X\n"), LSN_FORMAT_ARGS(ControlFile->checkPointCopy.redo)); printf(_("Latest checkpoint's REDO WAL file: %s\n"), xlogfilename); @@ -282,15 +282,15 @@ main(int argc, char *argv[]) ControlFile->checkPointCopy.newestCommitTsXid); printf(_("Time of latest checkpoint: %s\n"), ckpttime_str); - printf(_("Fake LSN counter for unlogged rels: %X/%X\n"), + printf(_("Fake LSN counter for unlogged rels: %X/%08X\n"), LSN_FORMAT_ARGS(ControlFile->unloggedLSN)); - printf(_("Minimum recovery ending location: %X/%X\n"), + printf(_("Minimum recovery ending location: %X/%08X\n"), LSN_FORMAT_ARGS(ControlFile->minRecoveryPoint)); printf(_("Min recovery ending loc's timeline: %u\n"), ControlFile->minRecoveryPointTLI); - printf(_("Backup start location: %X/%X\n"), + printf(_("Backup start location: %X/%08X\n"), LSN_FORMAT_ARGS(ControlFile->backupStartPoint)); - printf(_("Backup end location: %X/%X\n"), + printf(_("Backup end location: %X/%08X\n"), LSN_FORMAT_ARGS(ControlFile->backupEndPoint)); printf(_("End-of-backup record required: %s\n"), ControlFile->backupEndRequired ? _("yes") : _("no")); diff --git a/src/bin/pg_dump/meson.build b/src/bin/pg_dump/meson.build index d8e9e101254..4a4ebbd8ec9 100644 --- a/src/bin/pg_dump/meson.build +++ b/src/bin/pg_dump/meson.build @@ -91,9 +91,9 @@ tests += { 'bd': meson.current_build_dir(), 'tap': { 'env': { - 'GZIP_PROGRAM': gzip.found() ? gzip.path() : '', - 'LZ4': program_lz4.found() ? program_lz4.path() : '', - 'ZSTD': program_zstd.found() ? program_zstd.path() : '', + 'GZIP_PROGRAM': gzip.found() ? gzip.full_path() : '', + 'LZ4': program_lz4.found() ? program_lz4.full_path() : '', + 'ZSTD': program_zstd.found() ? program_zstd.full_path() : '', 'with_icu': icu.found() ? 'yes' : 'no', }, 'tests': [ diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c index afa42337b11..197c1295d93 100644 --- a/src/bin/pg_dump/pg_backup_archiver.c +++ b/src/bin/pg_dump/pg_backup_archiver.c @@ -152,7 +152,7 @@ InitDumpOptions(DumpOptions *opts) opts->dumpSections = DUMP_UNSECTIONED; opts->dumpSchema = true; opts->dumpData = true; - opts->dumpStatistics = true; + opts->dumpStatistics = false; } /* @@ -2655,7 +2655,7 @@ WriteToc(ArchiveHandle *AH) pg_fatal("unexpected TOC entry in WriteToc(): %d %s %s", te->dumpId, te->desc, te->tag); - if (fseeko(AH->FH, te->defnLen, SEEK_CUR != 0)) + if (fseeko(AH->FH, te->defnLen, SEEK_CUR) != 0) pg_fatal("error during file seek: %m"); } else if (te->defnDumper) diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c index 21b00792a8a..bc2a2fb4797 100644 --- a/src/bin/pg_dump/pg_backup_directory.c +++ b/src/bin/pg_dump/pg_backup_directory.c @@ -412,10 +412,15 @@ _LoadLOs(ArchiveHandle *AH, TocEntry *te) /* * Note: before archive v16, there was always only one BLOBS TOC entry, - * now there can be multiple. We don't need to worry what version we are - * reading though, because tctx->filename should be correct either way. + * now there can be multiple. Furthermore, although the actual filename + * was always "blobs.toc" before v16, the value of tctx->filename did not + * match that before commit 548e50976 fixed it. For simplicity we assume + * it must be "blobs.toc" in all archives before v16. */ - setFilePath(AH, tocfname, tctx->filename); + if (AH->version < K_VERS_1_16) + setFilePath(AH, tocfname, "blobs.toc"); + else + setFilePath(AH, tocfname, tctx->filename); CFH = ctx->LOsTocFH = InitDiscoverCompressFileHandle(tocfname, PG_BINARY_R); diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 37432e66efd..1937997ea67 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -350,7 +350,9 @@ static void buildMatViewRefreshDependencies(Archive *fout); static void getTableDataFKConstraints(void); static void determineNotNullFlags(Archive *fout, PGresult *res, int r, TableInfo *tbinfo, int j, - int i_notnull_name, int i_notnull_invalidoid, + int i_notnull_name, + int i_notnull_comment, + int i_notnull_invalidoid, int i_notnull_noinherit, int i_notnull_islocal, PQExpBuffer *invalidnotnulloids); @@ -1235,7 +1237,7 @@ main(int argc, char **argv) static void help(const char *progname) { - printf(_("%s dumps a database as a text file or to other formats.\n\n"), progname); + printf(_("%s exports a PostgreSQL database as an SQL script or to other formats.\n\n"), progname); printf(_("Usage:\n")); printf(_(" %s [OPTION]... [DBNAME]\n"), progname); @@ -6890,7 +6892,8 @@ getRelationStatistics(Archive *fout, DumpableObject *rel, int32 relpages, (relkind == RELKIND_PARTITIONED_TABLE) || (relkind == RELKIND_INDEX) || (relkind == RELKIND_PARTITIONED_INDEX) || - (relkind == RELKIND_MATVIEW)) + (relkind == RELKIND_MATVIEW || + relkind == RELKIND_FOREIGN_TABLE)) { RelStatsInfo *info = pg_malloc0(sizeof(RelStatsInfo)); DumpableObject *dobj = &info->dobj; @@ -6929,6 +6932,7 @@ getRelationStatistics(Archive *fout, DumpableObject *rel, int32 relpages, case RELKIND_RELATION: case RELKIND_PARTITIONED_TABLE: case RELKIND_MATVIEW: + case RELKIND_FOREIGN_TABLE: info->section = SECTION_DATA; break; case RELKIND_INDEX: @@ -6936,7 +6940,7 @@ getRelationStatistics(Archive *fout, DumpableObject *rel, int32 relpages, info->section = SECTION_POST_DATA; break; default: - pg_fatal("cannot dump statistics for relation kind '%c'", + pg_fatal("cannot dump statistics for relation kind \"%c\"", info->relkind); } @@ -9004,6 +9008,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) int i_attalign; int i_attislocal; int i_notnull_name; + int i_notnull_comment; int i_notnull_noinherit; int i_notnull_islocal; int i_notnull_invalidoid; @@ -9087,7 +9092,8 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) /* * Find out any NOT NULL markings for each column. In 18 and up we read - * pg_constraint to obtain the constraint name. notnull_noinherit is set + * pg_constraint to obtain the constraint name, and for valid constraints + * also pg_description to obtain its comment. notnull_noinherit is set * according to the NO INHERIT property. For versions prior to 18, we * store an empty string as the name when a constraint is marked as * attnotnull (this cues dumpTableSchema to print the NOT NULL clause @@ -9095,7 +9101,8 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) * * For invalid constraints, we need to store their OIDs for processing * elsewhere, so we bring the pg_constraint.oid value when the constraint - * is invalid, and NULL otherwise. + * is invalid, and NULL otherwise. Their comments are handled not here + * but by collectComments, because they're their own dumpable object. * * We track in notnull_islocal whether the constraint was defined directly * in this table or via an ancestor, for binary upgrade. flagInhAttrs @@ -9105,6 +9112,8 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) if (fout->remoteVersion >= 180000) appendPQExpBufferStr(q, "co.conname AS notnull_name,\n" + "CASE WHEN co.convalidated THEN pt.description" + " ELSE NULL END AS notnull_comment,\n" "CASE WHEN NOT co.convalidated THEN co.oid " "ELSE NULL END AS notnull_invalidoid,\n" "co.connoinherit AS notnull_noinherit,\n" @@ -9112,6 +9121,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) else appendPQExpBufferStr(q, "CASE WHEN a.attnotnull THEN '' ELSE NULL END AS notnull_name,\n" + "NULL AS notnull_comment,\n" "NULL AS notnull_invalidoid,\n" "false AS notnull_noinherit,\n" "a.attislocal AS notnull_islocal,\n"); @@ -9155,15 +9165,16 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) /* * In versions 18 and up, we need pg_constraint for explicit NOT NULL - * entries. Also, we need to know if the NOT NULL for each column is - * backing a primary key. + * entries and pg_description to get their comments. */ if (fout->remoteVersion >= 180000) appendPQExpBufferStr(q, " LEFT JOIN pg_catalog.pg_constraint co ON " "(a.attrelid = co.conrelid\n" " AND co.contype = 'n' AND " - "co.conkey = array[a.attnum])\n"); + "co.conkey = array[a.attnum])\n" + " LEFT JOIN pg_catalog.pg_description pt ON " + "(pt.classoid = co.tableoid AND pt.objoid = co.oid)\n"); appendPQExpBufferStr(q, "WHERE a.attnum > 0::pg_catalog.int2\n" @@ -9187,6 +9198,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) i_attalign = PQfnumber(res, "attalign"); i_attislocal = PQfnumber(res, "attislocal"); i_notnull_name = PQfnumber(res, "notnull_name"); + i_notnull_comment = PQfnumber(res, "notnull_comment"); i_notnull_invalidoid = PQfnumber(res, "notnull_invalidoid"); i_notnull_noinherit = PQfnumber(res, "notnull_noinherit"); i_notnull_islocal = PQfnumber(res, "notnull_islocal"); @@ -9255,6 +9267,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) tbinfo->attfdwoptions = (char **) pg_malloc(numatts * sizeof(char *)); tbinfo->attmissingval = (char **) pg_malloc(numatts * sizeof(char *)); tbinfo->notnull_constrs = (char **) pg_malloc(numatts * sizeof(char *)); + tbinfo->notnull_comment = (char **) pg_malloc(numatts * sizeof(char *)); tbinfo->notnull_invalid = (bool *) pg_malloc(numatts * sizeof(bool)); tbinfo->notnull_noinh = (bool *) pg_malloc(numatts * sizeof(bool)); tbinfo->notnull_islocal = (bool *) pg_malloc(numatts * sizeof(bool)); @@ -9286,11 +9299,14 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) determineNotNullFlags(fout, res, r, tbinfo, j, i_notnull_name, + i_notnull_comment, i_notnull_invalidoid, i_notnull_noinherit, i_notnull_islocal, &invalidnotnulloids); + tbinfo->notnull_comment[j] = PQgetisnull(res, r, i_notnull_comment) ? + NULL : pg_strdup(PQgetvalue(res, r, i_notnull_comment)); tbinfo->attoptions[j] = pg_strdup(PQgetvalue(res, r, i_attoptions)); tbinfo->attcollation[j] = atooid(PQgetvalue(res, r, i_attcollation)); tbinfo->attcompression[j] = *(PQgetvalue(res, r, i_attcompression)); @@ -9461,7 +9477,7 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) int i_consrc; int i_conislocal; - pg_log_info("finding invalid not null constraints"); + pg_log_info("finding invalid not-null constraints"); resetPQExpBuffer(q); appendPQExpBuffer(q, @@ -9702,8 +9718,9 @@ getTableAttrs(Archive *fout, TableInfo *tblinfo, int numTables) * 4) The column has a constraint with a known name; in that case * notnull_constrs carries that name and dumpTableSchema will print * "CONSTRAINT the_name NOT NULL". However, if the name is the default - * (table_column_not_null), there's no need to print that name in the dump, - * so notnull_constrs is set to the empty string and it behaves as case 2. + * (table_column_not_null) and there's no comment on the constraint, + * there's no need to print that name in the dump, so notnull_constrs + * is set to the empty string and it behaves as case 2. * * In a child table that inherits from a parent already containing NOT NULL * constraints and the columns in the child don't have their own NOT NULL @@ -9730,6 +9747,7 @@ static void determineNotNullFlags(Archive *fout, PGresult *res, int r, TableInfo *tbinfo, int j, int i_notnull_name, + int i_notnull_comment, int i_notnull_invalidoid, int i_notnull_noinherit, int i_notnull_islocal, @@ -9803,11 +9821,13 @@ determineNotNullFlags(Archive *fout, PGresult *res, int r, { /* * In binary upgrade of inheritance child tables, must have a - * constraint name that we can UPDATE later. + * constraint name that we can UPDATE later; same if there's a + * comment on the constraint. */ - if (dopt->binary_upgrade && - !tbinfo->ispartition && - !tbinfo->notnull_islocal) + if ((dopt->binary_upgrade && + !tbinfo->ispartition && + !tbinfo->notnull_islocal) || + !PQgetisnull(res, r, i_notnull_comment)) { tbinfo->notnull_constrs[j] = pstrdup(PQgetvalue(res, r, i_notnull_name)); @@ -10855,7 +10875,7 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg, const TocEntry *te) expected_te = expected_te->next; if (te != expected_te) - pg_fatal("stats dumped out of order (current: %d %s %s) (expected: %d %s %s)", + pg_fatal("statistics dumped out of order (current: %d %s %s, expected: %d %s %s)", te->dumpId, te->desc, te->tag, expected_te->dumpId, expected_te->desc, expected_te->tag); @@ -10996,7 +11016,7 @@ dumpRelationStats_dumper(Archive *fout, const void *userArg, const TocEntry *te) appendStringLiteralAH(out, rsinfo->dobj.name, fout); if (PQgetisnull(res, rownum, i_attname)) - pg_fatal("attname cannot be NULL"); + pg_fatal("unexpected null attname"); attname = PQgetvalue(res, rownum, i_attname); /* @@ -17684,6 +17704,56 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo) if (tbinfo->dobj.dump & DUMP_COMPONENT_SECLABEL) dumpTableSecLabel(fout, tbinfo, reltypename); + /* + * Dump comments for not-null constraints that aren't to be dumped + * separately (those are processed by collectComments/dumpComment). + */ + if (!fout->dopt->no_comments && dopt->dumpSchema && + fout->remoteVersion >= 180000) + { + PQExpBuffer comment = NULL; + PQExpBuffer tag = NULL; + + for (j = 0; j < tbinfo->numatts; j++) + { + if (tbinfo->notnull_constrs[j] != NULL && + tbinfo->notnull_comment[j] != NULL) + { + if (comment == NULL) + { + comment = createPQExpBuffer(); + tag = createPQExpBuffer(); + } + else + { + resetPQExpBuffer(comment); + resetPQExpBuffer(tag); + } + + appendPQExpBuffer(comment, "COMMENT ON CONSTRAINT %s ON %s IS ", + fmtId(tbinfo->notnull_constrs[j]), qualrelname); + appendStringLiteralAH(comment, tbinfo->notnull_comment[j], fout); + appendPQExpBufferStr(comment, ";\n"); + + appendPQExpBuffer(tag, "CONSTRAINT %s ON %s", + fmtId(tbinfo->notnull_constrs[j]), qrelname); + + ArchiveEntry(fout, nilCatalogId, createDumpId(), + ARCHIVE_OPTS(.tag = tag->data, + .namespace = tbinfo->dobj.namespace->dobj.name, + .owner = tbinfo->rolname, + .description = "COMMENT", + .section = SECTION_NONE, + .createStmt = comment->data, + .deps = &(tbinfo->dobj.dumpId), + .nDeps = 1)); + } + } + + destroyPQExpBuffer(comment); + destroyPQExpBuffer(tag); + } + /* Dump comments on inlined table constraints */ for (j = 0; j < tbinfo->ncheck; j++) { diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h index 7417eab6aef..39eef1d6617 100644 --- a/src/bin/pg_dump/pg_dump.h +++ b/src/bin/pg_dump/pg_dump.h @@ -365,6 +365,7 @@ typedef struct _tableInfo * there isn't one on this column. If * empty string, unnamed constraint * (pre-v17) */ + char **notnull_comment; /* comment thereof */ bool *notnull_invalid; /* true for NOT NULL NOT VALID */ bool *notnull_noinh; /* NOT NULL is NO INHERIT */ bool *notnull_islocal; /* true if NOT NULL has local definition */ diff --git a/src/bin/pg_dump/pg_dumpall.c b/src/bin/pg_dump/pg_dumpall.c index 7f9c302b719..3cbcad65c5f 100644 --- a/src/bin/pg_dump/pg_dumpall.c +++ b/src/bin/pg_dump/pg_dumpall.c @@ -525,7 +525,7 @@ main(int argc, char *argv[]) OPF = fopen(global_path, PG_BINARY_W); if (!OPF) - pg_fatal("could not open \"%s\": %m", global_path); + pg_fatal("could not open file \"%s\": %m", global_path); } else if (filename) { @@ -699,7 +699,7 @@ main(int argc, char *argv[]) static void help(void) { - printf(_("%s extracts a PostgreSQL database cluster based on specified dump format.\n\n"), progname); + printf(_("%s exports a PostgreSQL database cluster as an SQL script or to other formats.\n\n"), progname); printf(_("Usage:\n")); printf(_(" %s [OPTION]...\n"), progname); @@ -1659,14 +1659,14 @@ dumpDatabases(PGconn *conn, ArchiveFormat archDumpFormat) /* Create a subdirectory with 'databases' name under main directory. */ if (mkdir(db_subdir, pg_dir_create_mode) != 0) - pg_fatal("could not create subdirectory \"%s\": %m", db_subdir); + pg_fatal("could not create directory \"%s\": %m", db_subdir); snprintf(map_file_path, MAXPGPATH, "%s/map.dat", filename); /* Create a map file (to store dboid and dbname) */ map_file = fopen(map_file_path, PG_BINARY_W); if (!map_file) - pg_fatal("could not open map file: %s", strerror(errno)); + pg_fatal("could not open file \"%s\": %m", map_file_path); } for (i = 0; i < PQntuples(res); i++) @@ -1976,7 +1976,7 @@ parseDumpFormat(const char *format) else if (pg_strcasecmp(format, "tar") == 0) archDumpFormat = archTar; else - pg_fatal("unrecognized archive format \"%s\"; please specify \"c\", \"d\", \"p\", or \"t\"", + pg_fatal("unrecognized output format \"%s\"; please specify \"c\", \"d\", \"p\", or \"t\"", format); return archDumpFormat; diff --git a/src/bin/pg_dump/pg_restore.c b/src/bin/pg_dump/pg_restore.c index f2182e91825..6ef789cb06d 100644 --- a/src/bin/pg_dump/pg_restore.c +++ b/src/bin/pg_dump/pg_restore.c @@ -523,7 +523,7 @@ main(int argc, char **argv) */ if (!globals_only && opts->createDB != 1) { - pg_log_error("-C/--create option should be specified when restoring an archive created by pg_dumpall"); + pg_log_error("option -C/--create must be specified when restoring an archive created by pg_dumpall"); pg_log_error_hint("Try \"%s --help\" for more information.", progname); pg_log_error_hint("Individual databases can be restored using their specific archives."); exit_nicely(1); @@ -557,7 +557,7 @@ main(int argc, char **argv) if (conn) PQfinish(conn); - pg_log_info("database restoring skipped as -g/--globals-only option was specified"); + pg_log_info("database restoring skipped because option -g/--globals-only was specified"); } else { @@ -712,9 +712,9 @@ usage(const char *progname) printf(_(" --use-set-session-authorization\n" " use SET SESSION AUTHORIZATION commands instead of\n" " ALTER OWNER commands to set ownership\n")); - printf(_(" --with-data dump the data\n")); - printf(_(" --with-schema dump the schema\n")); - printf(_(" --with-statistics dump the statistics\n")); + printf(_(" --with-data restore the data\n")); + printf(_(" --with-schema restore the schema\n")); + printf(_(" --with-statistics restore the statistics\n")); printf(_("\nConnection options:\n")); printf(_(" -h, --host=HOSTNAME database server host or socket directory\n")); @@ -725,8 +725,8 @@ usage(const char *progname) printf(_(" --role=ROLENAME do SET ROLE before restore\n")); printf(_("\n" - "The options -I, -n, -N, -P, -t, -T, --section, and --exclude-database can be combined\n" - "and specified multiple times to select multiple objects.\n")); + "The options -I, -n, -N, -P, -t, -T, --section, and --exclude-database can be\n" + "combined and specified multiple times to select multiple objects.\n")); printf(_("\nIf no input file name is supplied, then standard input is used.\n\n")); printf(_("Report bugs to <%s>.\n"), PACKAGE_BUGREPORT); printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); @@ -946,7 +946,7 @@ get_dbnames_list_to_restore(PGconn *conn, query = createPQExpBuffer(); if (!conn) - pg_log_info("considering PATTERN as NAME for --exclude-database option as no db connection while doing pg_restore."); + pg_log_info("considering PATTERN as NAME for --exclude-database option as no database connection while doing pg_restore"); /* * Process one by one all dbnames and if specified to skip restoring, then @@ -992,7 +992,7 @@ get_dbnames_list_to_restore(PGconn *conn, if ((PQresultStatus(res) == PGRES_TUPLES_OK) && PQntuples(res)) { skip_db_restore = true; - pg_log_info("database \"%s\" matches exclude pattern: \"%s\"", dbidname->str, pat_cell->val); + pg_log_info("database name \"%s\" matches exclude pattern \"%s\"", dbidname->str, pat_cell->val); } PQclear(res); @@ -1048,7 +1048,7 @@ get_dbname_oid_list_from_mfile(const char *dumpdirpath, SimplePtrList *dbname_oi */ if (!file_exists_in_directory(dumpdirpath, "map.dat")) { - pg_log_info("database restoring is skipped as \"map.dat\" is not present in \"%s\"", dumpdirpath); + pg_log_info("database restoring is skipped because file \"%s\" does not exist in directory \"%s\"", "map.dat", dumpdirpath); return 0; } @@ -1058,7 +1058,7 @@ get_dbname_oid_list_from_mfile(const char *dumpdirpath, SimplePtrList *dbname_oi pfile = fopen(map_file_path, PG_BINARY_R); if (pfile == NULL) - pg_fatal("could not open \"%s\": %m", map_file_path); + pg_fatal("could not open file \"%s\": %m", map_file_path); initStringInfo(&linebuf); @@ -1086,10 +1086,10 @@ get_dbname_oid_list_from_mfile(const char *dumpdirpath, SimplePtrList *dbname_oi /* Report error and exit if the file has any corrupted data. */ if (!OidIsValid(db_oid) || namelen <= 1) - pg_fatal("invalid entry in \"%s\" at line: %d", map_file_path, + pg_fatal("invalid entry in file \"%s\" on line %d", map_file_path, count + 1); - pg_log_info("found database \"%s\" (OID: %u) in \"%s\"", + pg_log_info("found database \"%s\" (OID: %u) in file \"%s\"", dbname, db_oid, map_file_path); dbidname = pg_malloc(offsetof(DbOidName, str) + namelen + 1); @@ -1142,11 +1142,14 @@ restore_all_databases(PGconn *conn, const char *dumpdirpath, if (dbname_oid_list.head == NULL) return process_global_sql_commands(conn, dumpdirpath, opts->filename); - pg_log_info("found %d database names in \"map.dat\"", num_total_db); + pg_log_info(ngettext("found %d database name in \"%s\"", + "found %d database names in \"%s\"", + num_total_db), + num_total_db, "map.dat"); if (!conn) { - pg_log_info("trying to connect database \"postgres\""); + pg_log_info("trying to connect to database \"%s\"", "postgres"); conn = ConnectDatabase("postgres", NULL, opts->cparams.pghost, opts->cparams.pgport, opts->cparams.username, TRI_DEFAULT, @@ -1155,7 +1158,7 @@ restore_all_databases(PGconn *conn, const char *dumpdirpath, /* Try with template1. */ if (!conn) { - pg_log_info("trying to connect database \"template1\""); + pg_log_info("trying to connect to database \"%s\"", "template1"); conn = ConnectDatabase("template1", NULL, opts->cparams.pghost, opts->cparams.pgport, opts->cparams.username, TRI_DEFAULT, @@ -1179,7 +1182,9 @@ restore_all_databases(PGconn *conn, const char *dumpdirpath, /* Exit if no db needs to be restored. */ if (dbname_oid_list.head == NULL || num_db_restore == 0) { - pg_log_info("no database needs to restore out of %d databases", num_total_db); + pg_log_info(ngettext("no database needs restoring out of %d database", + "no database needs restoring out of %d databases", num_total_db), + num_total_db); return n_errors_total; } @@ -1314,7 +1319,7 @@ process_global_sql_commands(PGconn *conn, const char *dumpdirpath, const char *o pfile = fopen(global_file_path, PG_BINARY_R); if (pfile == NULL) - pg_fatal("could not open \"%s\": %m", global_file_path); + pg_fatal("could not open file \"%s\": %m", global_file_path); /* * If outfile is given, then just copy all global.dat file data into @@ -1354,15 +1359,17 @@ process_global_sql_commands(PGconn *conn, const char *dumpdirpath, const char *o break; default: n_errors++; - pg_log_error("could not execute query: \"%s\" \nCommand was: \"%s\"", PQerrorMessage(conn), sqlstatement.data); + pg_log_error("could not execute query: %s", PQerrorMessage(conn)); + pg_log_error_detail("Command was: %s", sqlstatement.data); } PQclear(result); } /* Print a summary of ignored errors during global.dat. */ if (n_errors) - pg_log_warning("ignored %d errors in \"%s\"", n_errors, global_file_path); - + pg_log_warning(ngettext("ignored %d error in file \"%s\"", + "ignored %d errors in file \"%s\"", n_errors), + n_errors, global_file_path); fclose(pfile); return n_errors; diff --git a/src/bin/pg_dump/t/001_basic.pl b/src/bin/pg_dump/t/001_basic.pl index 84ca25e17d6..c3c5fae11ea 100644 --- a/src/bin/pg_dump/t/001_basic.pl +++ b/src/bin/pg_dump/t/001_basic.pl @@ -240,17 +240,20 @@ command_fails_like( command_fails_like( [ 'pg_restore', '--exclude-database=foo', '--globals-only', '-d', 'xxx' ], qr/\Qpg_restore: error: option --exclude-database cannot be used together with -g\/--globals-only\E/, - 'pg_restore: option --exclude-database cannot be used together with -g/--globals-only'); + 'pg_restore: option --exclude-database cannot be used together with -g/--globals-only' +); command_fails_like( [ 'pg_restore', '--exclude-database=foo', '-d', 'xxx', 'dumpdir' ], qr/\Qpg_restore: error: option --exclude-database can be used only when restoring an archive created by pg_dumpall\E/, - 'When option --exclude-database is used in pg_restore with dump of pg_dump'); + 'When option --exclude-database is used in pg_restore with dump of pg_dump' +); command_fails_like( [ 'pg_restore', '--globals-only', '-d', 'xxx', 'dumpdir' ], qr/\Qpg_restore: error: option -g\/--globals-only can be used only when restoring an archive created by pg_dumpall\E/, - 'When option --globals-only is not used in pg_restore with dump of pg_dump'); + 'When option --globals-only is not used in pg_restore with dump of pg_dump' +); # also fails for -r and -t, but it seems pointless to add more tests for those. command_fails_like( @@ -261,6 +264,6 @@ command_fails_like( command_fails_like( [ 'pg_dumpall', '--format', 'x' ], - qr/\Qpg_dumpall: error: unrecognized archive format "x";\E/, - 'pg_dumpall: unrecognized archive format'); + qr/\Qpg_dumpall: error: unrecognized output format "x";\E/, + 'pg_dumpall: unrecognized output format'); done_testing(); diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl index cf34f71ea11..2485d8f360e 100644 --- a/src/bin/pg_dump/t/002_pg_dump.pl +++ b/src/bin/pg_dump/t/002_pg_dump.pl @@ -68,6 +68,7 @@ my %pgdump_runs = ( '--no-data', '--sequence-data', '--binary-upgrade', + '--with-statistics', '--dbname' => 'postgres', # alternative way to specify database ], restore_cmd => [ @@ -75,6 +76,7 @@ my %pgdump_runs = ( '--format' => 'custom', '--verbose', '--file' => "$tempdir/binary_upgrade.sql", + '--with-statistics', "$tempdir/binary_upgrade.dump", ], }, @@ -88,11 +90,13 @@ my %pgdump_runs = ( '--format' => 'custom', '--compress' => '1', '--file' => "$tempdir/compression_gzip_custom.dump", + '--with-statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/compression_gzip_custom.sql", + '--with-statistics', "$tempdir/compression_gzip_custom.dump", ], command_like => { @@ -115,6 +119,7 @@ my %pgdump_runs = ( '--format' => 'directory', '--compress' => 'gzip:1', '--file' => "$tempdir/compression_gzip_dir", + '--with-statistics', 'postgres', ], # Give coverage for manually compressed blobs.toc files during @@ -132,6 +137,7 @@ my %pgdump_runs = ( 'pg_restore', '--jobs' => '2', '--file' => "$tempdir/compression_gzip_dir.sql", + '--with-statistics', "$tempdir/compression_gzip_dir", ], }, @@ -144,6 +150,7 @@ my %pgdump_runs = ( '--format' => 'plain', '--compress' => '1', '--file' => "$tempdir/compression_gzip_plain.sql.gz", + '--with-statistics', 'postgres', ], # Decompress the generated file to run through the tests. @@ -162,11 +169,13 @@ my %pgdump_runs = ( '--format' => 'custom', '--compress' => 'lz4', '--file' => "$tempdir/compression_lz4_custom.dump", + '--with-statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/compression_lz4_custom.sql", + '--with-statistics', "$tempdir/compression_lz4_custom.dump", ], command_like => { @@ -189,6 +198,7 @@ my %pgdump_runs = ( '--format' => 'directory', '--compress' => 'lz4:1', '--file' => "$tempdir/compression_lz4_dir", + '--with-statistics', 'postgres', ], # Verify that data files were compressed @@ -200,6 +210,7 @@ my %pgdump_runs = ( 'pg_restore', '--jobs' => '2', '--file' => "$tempdir/compression_lz4_dir.sql", + '--with-statistics', "$tempdir/compression_lz4_dir", ], }, @@ -212,6 +223,7 @@ my %pgdump_runs = ( '--format' => 'plain', '--compress' => 'lz4', '--file' => "$tempdir/compression_lz4_plain.sql.lz4", + '--with-statistics', 'postgres', ], # Decompress the generated file to run through the tests. @@ -233,11 +245,13 @@ my %pgdump_runs = ( '--format' => 'custom', '--compress' => 'zstd', '--file' => "$tempdir/compression_zstd_custom.dump", + '--with-statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/compression_zstd_custom.sql", + '--with-statistics', "$tempdir/compression_zstd_custom.dump", ], command_like => { @@ -259,6 +273,7 @@ my %pgdump_runs = ( '--format' => 'directory', '--compress' => 'zstd:1', '--file' => "$tempdir/compression_zstd_dir", + '--with-statistics', 'postgres', ], # Give coverage for manually compressed blobs.toc files during @@ -279,6 +294,7 @@ my %pgdump_runs = ( 'pg_restore', '--jobs' => '2', '--file' => "$tempdir/compression_zstd_dir.sql", + '--with-statistics', "$tempdir/compression_zstd_dir", ], }, @@ -292,6 +308,7 @@ my %pgdump_runs = ( '--format' => 'plain', '--compress' => 'zstd:long', '--file' => "$tempdir/compression_zstd_plain.sql.zst", + '--with-statistics', 'postgres', ], # Decompress the generated file to run through the tests. @@ -310,6 +327,7 @@ my %pgdump_runs = ( 'pg_dump', '--no-sync', '--file' => "$tempdir/clean.sql", '--clean', + '--with-statistics', '--dbname' => 'postgres', # alternative way to specify database ], }, @@ -320,6 +338,7 @@ my %pgdump_runs = ( '--clean', '--if-exists', '--encoding' => 'UTF8', # no-op, just for testing + '--with-statistics', 'postgres', ], }, @@ -338,6 +357,7 @@ my %pgdump_runs = ( '--create', '--no-reconnect', # no-op, just for testing '--verbose', + '--with-statistics', 'postgres', ], }, @@ -348,7 +368,7 @@ my %pgdump_runs = ( '--data-only', '--superuser' => 'test_superuser', '--disable-triggers', - '--verbose', # no-op, just make sure it works + '--verbose', # no-op, just make sure it works 'postgres', ], }, @@ -356,6 +376,7 @@ my %pgdump_runs = ( dump_cmd => [ 'pg_dump', '--no-sync', '--file' => "$tempdir/defaults.sql", + '--with-statistics', 'postgres', ], }, @@ -364,6 +385,7 @@ my %pgdump_runs = ( dump_cmd => [ 'pg_dump', '--no-sync', '--file' => "$tempdir/defaults_no_public.sql", + '--with-statistics', 'regress_pg_dump_test', ], }, @@ -373,6 +395,7 @@ my %pgdump_runs = ( 'pg_dump', '--no-sync', '--clean', '--file' => "$tempdir/defaults_no_public_clean.sql", + '--with-statistics', 'regress_pg_dump_test', ], }, @@ -381,6 +404,7 @@ my %pgdump_runs = ( dump_cmd => [ 'pg_dump', '--no-sync', '--file' => "$tempdir/defaults_public_owner.sql", + '--with-statistics', 'regress_public_owner', ], }, @@ -395,12 +419,14 @@ my %pgdump_runs = ( 'pg_dump', '--format' => 'custom', '--file' => "$tempdir/defaults_custom_format.dump", + '--with-statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--format' => 'custom', '--file' => "$tempdir/defaults_custom_format.sql", + '--with-statistics', "$tempdir/defaults_custom_format.dump", ], command_like => { @@ -425,12 +451,14 @@ my %pgdump_runs = ( 'pg_dump', '--format' => 'directory', '--file' => "$tempdir/defaults_dir_format", + '--with-statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--format' => 'directory', '--file' => "$tempdir/defaults_dir_format.sql", + '--with-statistics', "$tempdir/defaults_dir_format", ], command_like => { @@ -456,11 +484,13 @@ my %pgdump_runs = ( '--format' => 'directory', '--jobs' => 2, '--file' => "$tempdir/defaults_parallel", + '--with-statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/defaults_parallel.sql", + '--with-statistics', "$tempdir/defaults_parallel", ], }, @@ -472,12 +502,14 @@ my %pgdump_runs = ( 'pg_dump', '--format' => 'tar', '--file' => "$tempdir/defaults_tar_format.tar", + '--with-statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--format' => 'tar', '--file' => "$tempdir/defaults_tar_format.sql", + '--with-statistics', "$tempdir/defaults_tar_format.tar", ], }, @@ -486,6 +518,7 @@ my %pgdump_runs = ( 'pg_dump', '--no-sync', '--file' => "$tempdir/exclude_dump_test_schema.sql", '--exclude-schema' => 'dump_test', + '--with-statistics', 'postgres', ], }, @@ -494,6 +527,7 @@ my %pgdump_runs = ( 'pg_dump', '--no-sync', '--file' => "$tempdir/exclude_test_table.sql", '--exclude-table' => 'dump_test.test_table', + '--with-statistics', 'postgres', ], }, @@ -502,6 +536,7 @@ my %pgdump_runs = ( 'pg_dump', '--no-sync', '--file' => "$tempdir/exclude_measurement.sql", '--exclude-table-and-children' => 'dump_test.measurement', + '--with-statistics', 'postgres', ], }, @@ -511,6 +546,7 @@ my %pgdump_runs = ( '--file' => "$tempdir/exclude_measurement_data.sql", '--exclude-table-data-and-children' => 'dump_test.measurement', '--no-unlogged-table-data', + '--with-statistics', 'postgres', ], }, @@ -520,6 +556,7 @@ my %pgdump_runs = ( '--file' => "$tempdir/exclude_test_table_data.sql", '--exclude-table-data' => 'dump_test.test_table', '--no-unlogged-table-data', + '--with-statistics', 'postgres', ], }, @@ -538,6 +575,7 @@ my %pgdump_runs = ( '--file' => "$tempdir/pg_dumpall_globals.sql", '--globals-only', '--no-sync', + '--with-statistics', ], }, pg_dumpall_globals_clean => { @@ -547,12 +585,14 @@ my %pgdump_runs = ( '--globals-only', '--clean', '--no-sync', + '--with-statistics', ], }, pg_dumpall_dbprivs => { dump_cmd => [ 'pg_dumpall', '--no-sync', '--file' => "$tempdir/pg_dumpall_dbprivs.sql", + '--with-statistics', ], }, pg_dumpall_exclude => { @@ -562,6 +602,7 @@ my %pgdump_runs = ( '--file' => "$tempdir/pg_dumpall_exclude.sql", '--exclude-database' => '*dump_test*', '--no-sync', + '--with-statistics', ], }, no_toast_compression => { @@ -569,6 +610,7 @@ my %pgdump_runs = ( 'pg_dump', '--no-sync', '--file' => "$tempdir/no_toast_compression.sql", '--no-toast-compression', + '--with-statistics', 'postgres', ], }, @@ -577,6 +619,7 @@ my %pgdump_runs = ( 'pg_dump', '--no-sync', '--file' => "$tempdir/no_large_objects.sql", '--no-large-objects', + '--with-statistics', 'postgres', ], }, @@ -585,6 +628,7 @@ my %pgdump_runs = ( 'pg_dump', '--no-sync', '--file' => "$tempdir/no_policies.sql", '--no-policies', + '--with-statistics', 'postgres', ], }, @@ -593,6 +637,7 @@ my %pgdump_runs = ( 'pg_dump', '--no-sync', '--file' => "$tempdir/no_privs.sql", '--no-privileges', + '--with-statistics', 'postgres', ], }, @@ -601,6 +646,7 @@ my %pgdump_runs = ( 'pg_dump', '--no-sync', '--file' => "$tempdir/no_owner.sql", '--no-owner', + '--with-statistics', 'postgres', ], }, @@ -609,6 +655,7 @@ my %pgdump_runs = ( 'pg_dump', '--no-sync', '--file' => "$tempdir/no_table_access_method.sql", '--no-table-access-method', + '--with-statistics', 'postgres', ], }, @@ -617,6 +664,7 @@ my %pgdump_runs = ( 'pg_dump', '--no-sync', '--file' => "$tempdir/only_dump_test_schema.sql", '--schema' => 'dump_test', + '--with-statistics', 'postgres', ], }, @@ -627,6 +675,7 @@ my %pgdump_runs = ( '--table' => 'dump_test.test_table', '--lock-wait-timeout' => (1000 * $PostgreSQL::Test::Utils::timeout_default), + '--with-statistics', 'postgres', ], }, @@ -637,6 +686,7 @@ my %pgdump_runs = ( '--table-and-children' => 'dump_test.measurement', '--lock-wait-timeout' => (1000 * $PostgreSQL::Test::Utils::timeout_default), + '--with-statistics', 'postgres', ], }, @@ -646,6 +696,7 @@ my %pgdump_runs = ( '--file' => "$tempdir/role.sql", '--role' => 'regress_dump_test_role', '--schema' => 'dump_test_second_schema', + '--with-statistics', 'postgres', ], }, @@ -658,11 +709,13 @@ my %pgdump_runs = ( '--file' => "$tempdir/role_parallel", '--role' => 'regress_dump_test_role', '--schema' => 'dump_test_second_schema', + '--with-statistics', 'postgres', ], restore_cmd => [ 'pg_restore', '--file' => "$tempdir/role_parallel.sql", + '--with-statistics', "$tempdir/role_parallel", ], }, @@ -691,6 +744,7 @@ my %pgdump_runs = ( 'pg_dump', '--no-sync', '--file' => "$tempdir/section_pre_data.sql", '--section' => 'pre-data', + '--with-statistics', 'postgres', ], }, @@ -699,6 +753,7 @@ my %pgdump_runs = ( 'pg_dump', '--no-sync', '--file' => "$tempdir/section_data.sql", '--section' => 'data', + '--with-statistics', 'postgres', ], }, @@ -707,6 +762,7 @@ my %pgdump_runs = ( 'pg_dump', '--no-sync', '--file' => "$tempdir/section_post_data.sql", '--section' => 'post-data', + '--with-statistics', 'postgres', ], }, @@ -717,6 +773,7 @@ my %pgdump_runs = ( '--schema' => 'dump_test', '--large-objects', '--no-large-objects', + '--with-statistics', 'postgres', ], }, @@ -732,6 +789,7 @@ my %pgdump_runs = ( 'pg_dump', '--no-sync', "--file=$tempdir/no_data_no_schema.sql", '--no-data', '--no-schema', 'postgres', + '--with-statistics', ], }, statistics_only => { @@ -752,7 +810,7 @@ my %pgdump_runs = ( dump_cmd => [ 'pg_dump', '--no-sync', "--file=$tempdir/no_schema.sql", '--no-schema', - 'postgres', + '--with-statistics', 'postgres', ], },); @@ -1132,7 +1190,9 @@ my %tests = ( ) INHERITS (dump_test.test_table_nn, dump_test.test_table_nn_2); ALTER TABLE dump_test.test_table_nn ADD CONSTRAINT nn NOT NULL col1 NOT VALID; ALTER TABLE dump_test.test_table_nn_chld1 VALIDATE CONSTRAINT nn; - ALTER TABLE dump_test.test_table_nn_chld2 VALIDATE CONSTRAINT nn;', + ALTER TABLE dump_test.test_table_nn_chld2 VALIDATE CONSTRAINT nn; + COMMENT ON CONSTRAINT nn ON dump_test.test_table_nn IS \'nn comment is valid\'; + COMMENT ON CONSTRAINT nn ON dump_test.test_table_nn_chld2 IS \'nn_chld2 comment is valid\';', regexp => qr/^ \QALTER TABLE dump_test.test_table_nn\E \n^\s+ \QADD CONSTRAINT nn NOT NULL col1 NOT VALID;\E @@ -1146,6 +1206,34 @@ my %tests = ( }, }, + # This constraint is invalid therefore it goes in SECTION_POST_DATA + 'COMMENT ON CONSTRAINT ON test_table_nn' => { + regexp => qr/^ + \QCOMMENT ON CONSTRAINT nn ON dump_test.test_table_nn IS\E + /xm, + like => { + %full_runs, %dump_test_schema_runs, section_post_data => 1, + }, + unlike => { + exclude_dump_test_schema => 1, + only_dump_measurement => 1, + }, + }, + + # This constraint is valid therefore it goes in SECTION_PRE_DATA + 'COMMENT ON CONSTRAINT ON test_table_chld2' => { + regexp => qr/^ + \QCOMMENT ON CONSTRAINT nn ON dump_test.test_table_nn_chld2 IS\E + /xm, + like => { + %full_runs, %dump_test_schema_runs, section_pre_data => 1, + }, + unlike => { + exclude_dump_test_schema => 1, + only_dump_measurement => 1, + }, + }, + 'CONSTRAINT NOT NULL / NOT VALID (child1)' => { regexp => qr/^ \QCREATE TABLE dump_test.test_table_nn_chld1 (\E\n diff --git a/src/bin/pg_dump/t/006_pg_dumpall.pl b/src/bin/pg_dump/t/006_pg_dumpall.pl index 5acd49f1559..c274b777586 100644 --- a/src/bin/pg_dump/t/006_pg_dumpall.pl +++ b/src/bin/pg_dump/t/006_pg_dumpall.pl @@ -294,17 +294,17 @@ my %pgdumpall_runs = ( '--format' => 'directory', '--globals-only', '--file' => "$tempdir/dump_globals_only", - ], - restore_cmd => [ - 'pg_restore', '-C', '--globals-only', - '--format' => 'directory', - '--file' => "$tempdir/dump_globals_only.sql", - "$tempdir/dump_globals_only", - ], - like => qr/ + ], + restore_cmd => [ + 'pg_restore', '-C', '--globals-only', + '--format' => 'directory', + '--file' => "$tempdir/dump_globals_only.sql", + "$tempdir/dump_globals_only", + ], + like => qr/ ^\s*\QCREATE ROLE dumpall;\E\s*\n /xm - }, ); + },); # First execute the setup_sql foreach my $run (sort keys %pgdumpall_runs) @@ -339,7 +339,8 @@ foreach my $run (sort keys %pgdumpall_runs) # pg_restore --file output file. my $output_file = slurp_file("$tempdir/${run}.sql"); - if (!($pgdumpall_runs{$run}->{like}) && !($pgdumpall_runs{$run}->{unlike})) + if ( !($pgdumpall_runs{$run}->{like}) + && !($pgdumpall_runs{$run}->{unlike})) { die "missing \"like\" or \"unlike\" in test \"$run\""; } @@ -361,30 +362,38 @@ foreach my $run (sort keys %pgdumpall_runs) # Some negative test case with dump of pg_dumpall and restore using pg_restore # test case 1: when -C is not used in pg_restore with dump of pg_dumpall $node->command_fails_like( - [ 'pg_restore', - "$tempdir/format_custom", - '--format' => 'custom', - '--file' => "$tempdir/error_test.sql", ], - qr/\Qpg_restore: error: -C\/--create option should be specified when restoring an archive created by pg_dumpall\E/, - 'When -C is not used in pg_restore with dump of pg_dumpall'); + [ + 'pg_restore', + "$tempdir/format_custom", + '--format' => 'custom', + '--file' => "$tempdir/error_test.sql", + ], + qr/\Qpg_restore: error: option -C\/--create must be specified when restoring an archive created by pg_dumpall\E/, + 'When -C is not used in pg_restore with dump of pg_dumpall'); # test case 2: When --list option is used with dump of pg_dumpall $node->command_fails_like( - [ 'pg_restore', + [ + 'pg_restore', "$tempdir/format_custom", '-C', - '--format' => 'custom', '--list', - '--file' => "$tempdir/error_test.sql", ], + '--format' => 'custom', + '--list', + '--file' => "$tempdir/error_test.sql", + ], qr/\Qpg_restore: error: option -l\/--list cannot be used when restoring an archive created by pg_dumpall\E/, 'When --list is used in pg_restore with dump of pg_dumpall'); # test case 3: When non-exist database is given with -d option $node->command_fails_like( - [ 'pg_restore', + [ + 'pg_restore', "$tempdir/format_custom", '-C', '--format' => 'custom', - '-d' => 'dbpq', ], + '-d' => 'dbpq', + ], qr/\Qpg_restore: error: could not connect to database "dbpq"\E/, - 'When non-existent database is given with -d option in pg_restore with dump of pg_dumpall'); + 'When non-existent database is given with -d option in pg_restore with dump of pg_dumpall' +); $node->stop('fast'); diff --git a/src/bin/pg_rewind/libpq_source.c b/src/bin/pg_rewind/libpq_source.c index 56c2ad55d4a..e80edb7077e 100644 --- a/src/bin/pg_rewind/libpq_source.c +++ b/src/bin/pg_rewind/libpq_source.c @@ -215,7 +215,7 @@ libpq_get_current_wal_insert_lsn(rewind_source *source) val = run_simple_query(conn, "SELECT pg_current_wal_insert_lsn()"); - if (sscanf(val, "%X/%X", &hi, &lo) != 2) + if (sscanf(val, "%X/%08X", &hi, &lo) != 2) pg_fatal("unrecognized result \"%s\" for current WAL insert location", val); result = ((uint64) hi) << 32 | lo; diff --git a/src/bin/pg_rewind/parsexlog.c b/src/bin/pg_rewind/parsexlog.c index 2cd44625ca3..8f4b282c6b1 100644 --- a/src/bin/pg_rewind/parsexlog.c +++ b/src/bin/pg_rewind/parsexlog.c @@ -89,11 +89,11 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex, XLogRecPtr errptr = xlogreader->EndRecPtr; if (errormsg) - pg_fatal("could not read WAL record at %X/%X: %s", + pg_fatal("could not read WAL record at %X/%08X: %s", LSN_FORMAT_ARGS(errptr), errormsg); else - pg_fatal("could not read WAL record at %X/%X", + pg_fatal("could not read WAL record at %X/%08X", LSN_FORMAT_ARGS(errptr)); } @@ -105,7 +105,7 @@ extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex, * messed up. */ if (xlogreader->EndRecPtr != endpoint) - pg_fatal("end pointer %X/%X is not a valid end point; expected %X/%X", + pg_fatal("end pointer %X/%08X is not a valid end point; expected %X/%08X", LSN_FORMAT_ARGS(endpoint), LSN_FORMAT_ARGS(xlogreader->EndRecPtr)); XLogReaderFree(xlogreader); @@ -143,10 +143,10 @@ readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex, if (record == NULL) { if (errormsg) - pg_fatal("could not read WAL record at %X/%X: %s", + pg_fatal("could not read WAL record at %X/%08X: %s", LSN_FORMAT_ARGS(ptr), errormsg); else - pg_fatal("could not read WAL record at %X/%X", + pg_fatal("could not read WAL record at %X/%08X", LSN_FORMAT_ARGS(ptr)); } endptr = xlogreader->EndRecPtr; @@ -211,11 +211,11 @@ findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex, if (record == NULL) { if (errormsg) - pg_fatal("could not find previous WAL record at %X/%X: %s", + pg_fatal("could not find previous WAL record at %X/%08X: %s", LSN_FORMAT_ARGS(searchptr), errormsg); else - pg_fatal("could not find previous WAL record at %X/%X", + pg_fatal("could not find previous WAL record at %X/%08X", LSN_FORMAT_ARGS(searchptr)); } @@ -458,8 +458,8 @@ extractPageInfo(XLogReaderState *record) * we don't recognize the type. That's bad - we don't know how to * track that change. */ - pg_fatal("WAL record modifies a relation, but record type is not recognized: " - "lsn: %X/%X, rmid: %d, rmgr: %s, info: %02X", + pg_fatal("WAL record modifies a relation, but record type is not recognized:\n" + "lsn: %X/%08X, rmid: %d, rmgr: %s, info: %02X", LSN_FORMAT_ARGS(record->ReadRecPtr), rmid, RmgrName(rmid), info); } diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c index 9d16c1e6b47..0c68dd4235e 100644 --- a/src/bin/pg_rewind/pg_rewind.c +++ b/src/bin/pg_rewind/pg_rewind.c @@ -393,7 +393,7 @@ main(int argc, char **argv) targetHistory, targetNentries, &divergerec, &lastcommontliIndex); - pg_log_info("servers diverged at WAL location %X/%X on timeline %u", + pg_log_info("servers diverged at WAL location %X/%08X on timeline %u", LSN_FORMAT_ARGS(divergerec), targetHistory[lastcommontliIndex].tli); @@ -461,7 +461,7 @@ main(int argc, char **argv) findLastCheckpoint(datadir_target, divergerec, lastcommontliIndex, &chkptrec, &chkpttli, &chkptredo, restore_command); - pg_log_info("rewinding from last common checkpoint at %X/%X on timeline %u", + pg_log_info("rewinding from last common checkpoint at %X/%08X on timeline %u", LSN_FORMAT_ARGS(chkptrec), chkpttli); /* Initialize the hash table to track the status of each file */ @@ -902,7 +902,7 @@ getTimelineHistory(TimeLineID tli, bool is_source, int *nentries) TimeLineHistoryEntry *entry; entry = &history[i]; - pg_log_debug("%u: %X/%X - %X/%X", entry->tli, + pg_log_debug("%u: %X/%08X - %X/%08X", entry->tli, LSN_FORMAT_ARGS(entry->begin), LSN_FORMAT_ARGS(entry->end)); } @@ -981,8 +981,8 @@ createBackupLabel(XLogRecPtr startpoint, TimeLineID starttli, XLogRecPtr checkpo strftime(strfbuf, sizeof(strfbuf), "%Y-%m-%d %H:%M:%S %Z", tmp); len = snprintf(buf, sizeof(buf), - "START WAL LOCATION: %X/%X (file %s)\n" - "CHECKPOINT LOCATION: %X/%X\n" + "START WAL LOCATION: %X/%08X (file %s)\n" + "CHECKPOINT LOCATION: %X/%08X\n" "BACKUP METHOD: pg_rewind\n" "BACKUP FROM: standby\n" "START TIME: %s\n", diff --git a/src/bin/pg_rewind/t/RewindTest.pm b/src/bin/pg_rewind/t/RewindTest.pm index 3efab831797..b0234ebfaf2 100644 --- a/src/bin/pg_rewind/t/RewindTest.pm +++ b/src/bin/pg_rewind/t/RewindTest.pm @@ -285,7 +285,7 @@ sub run_pg_rewind # Check that pg_rewind with dbname and --write-recovery-conf # wrote the dbname in the generated primary_conninfo value. like(slurp_file("$primary_pgdata/postgresql.auto.conf"), - qr/dbname=postgres/m, 'recovery conf file sets dbname'); + qr/dbname=postgres/m, 'recovery conf file sets dbname'); # Check that standby.signal is here as recovery configuration # was requested. diff --git a/src/bin/pg_rewind/timeline.c b/src/bin/pg_rewind/timeline.c index 4d9f0d8301b..6784969951f 100644 --- a/src/bin/pg_rewind/timeline.c +++ b/src/bin/pg_rewind/timeline.c @@ -66,7 +66,7 @@ rewind_parseTimeLineHistory(char *buffer, TimeLineID targetTLI, int *nentries) if (*ptr == '\0' || *ptr == '#') continue; - nfields = sscanf(fline, "%u\t%X/%X", &tli, &switchpoint_hi, &switchpoint_lo); + nfields = sscanf(fline, "%u\t%X/%08X", &tli, &switchpoint_hi, &switchpoint_lo); if (nfields < 1) { diff --git a/src/bin/pg_upgrade/check.c b/src/bin/pg_upgrade/check.c index 940fc77fc2e..30579ef2051 100644 --- a/src/bin/pg_upgrade/check.c +++ b/src/bin/pg_upgrade/check.c @@ -23,6 +23,7 @@ static void check_for_isn_and_int8_passing_mismatch(ClusterInfo *cluster); static void check_for_user_defined_postfix_ops(ClusterInfo *cluster); static void check_for_incompatible_polymorphics(ClusterInfo *cluster); static void check_for_tables_with_oids(ClusterInfo *cluster); +static void check_for_not_null_inheritance(ClusterInfo *cluster); static void check_for_pg_role_prefix(ClusterInfo *cluster); static void check_for_new_tablespace_dir(void); static void check_for_user_defined_encoding_conversions(ClusterInfo *cluster); @@ -168,6 +169,7 @@ static DataTypesUsageChecks data_types_usage_checks[] = /* pg_class.oid is preserved, so 'regclass' is OK */ " 'regcollation', " " 'regconfig', " + /* pg_database.oid is preserved, so 'regdatabase' is OK */ " 'regdictionary', " " 'regnamespace', " " 'regoper', " @@ -419,7 +421,7 @@ process_data_type_check(DbInfo *dbinfo, PGresult *res, void *arg) if (!state->result) { pg_log(PG_REPORT, "failed check: %s", _(state->check->status)); - appendPQExpBuffer(*state->report, "\n%s\n%s %s\n", + appendPQExpBuffer(*state->report, "\n%s\n%s\n %s\n", _(state->check->report_text), _("A list of the problem columns is in the file:"), output_path); @@ -672,6 +674,14 @@ check_and_dump_old_cluster(void) check_for_tables_with_oids(&old_cluster); /* + * Pre-PG 18 allowed child tables to omit not-null constraints that their + * parents columns have, but schema restore fails for them. Verify there + * are none, iff applicable. + */ + if (GET_MAJOR_VERSION(old_cluster.major_version) <= 1800) + check_for_not_null_inheritance(&old_cluster); + + /* * Pre-PG 10 allowed tables with 'unknown' type columns and non WAL logged * hash indexes */ @@ -885,7 +895,7 @@ check_cluster_versions(void) */ if (GET_MAJOR_VERSION(old_cluster.major_version) >= 1800 && user_opts.char_signedness != -1) - pg_fatal("%s option cannot be used to upgrade from PostgreSQL %s and later.", + pg_fatal("The option %s cannot be used for upgrades from PostgreSQL %s and later.", "--set-char-signedness", "18"); check_ok(); @@ -1623,6 +1633,93 @@ check_for_tables_with_oids(ClusterInfo *cluster) check_ok(); } +/* + * Callback function for processing results of query for + * check_for_not_null_inheritance. + */ +static void +process_inconsistent_notnull(DbInfo *dbinfo, PGresult *res, void *arg) +{ + UpgradeTaskReport *report = (UpgradeTaskReport *) arg; + int ntups = PQntuples(res); + int i_nspname = PQfnumber(res, "nspname"); + int i_relname = PQfnumber(res, "relname"); + int i_attname = PQfnumber(res, "attname"); + + AssertVariableIsOfType(&process_inconsistent_notnull, + UpgradeTaskProcessCB); + + if (ntups == 0) + return; + + if (report->file == NULL && + (report->file = fopen_priv(report->path, "w")) == NULL) + pg_fatal("could not open file \"%s\": %m", report->path); + + fprintf(report->file, "In database: %s\n", dbinfo->db_name); + + for (int rowno = 0; rowno < ntups; rowno++) + { + fprintf(report->file, " %s.%s.%s\n", + PQgetvalue(res, rowno, i_nspname), + PQgetvalue(res, rowno, i_relname), + PQgetvalue(res, rowno, i_attname)); + } +} + +/* + * check_for_not_null_inheritance() + * + * An attempt to create child tables lacking not-null constraints that are + * present in their parents errors out. This can no longer occur since 18, + * but previously there were various ways for that to happen. Check that + * the cluster to be upgraded doesn't have any of those problems. + */ +static void +check_for_not_null_inheritance(ClusterInfo *cluster) +{ + UpgradeTaskReport report; + UpgradeTask *task; + const char *query; + + prep_status("Checking for not-null constraint inconsistencies"); + + report.file = NULL; + snprintf(report.path, sizeof(report.path), "%s/%s", + log_opts.basedir, + "not_null_inconsistent_columns.txt"); + + query = "SELECT nspname, cc.relname, ac.attname " + "FROM pg_catalog.pg_inherits i, pg_catalog.pg_attribute ac, " + " pg_catalog.pg_attribute ap, pg_catalog.pg_class cc, " + " pg_catalog.pg_namespace nc " + "WHERE cc.oid = ac.attrelid AND i.inhrelid = ac.attrelid " + " AND i.inhparent = ap.attrelid AND ac.attname = ap.attname " + " AND cc.relnamespace = nc.oid " + " AND ap.attnum > 0 and ap.attnotnull AND NOT ac.attnotnull"; + + task = upgrade_task_create(); + upgrade_task_add_step(task, query, + process_inconsistent_notnull, + true, &report); + upgrade_task_run(task, cluster); + upgrade_task_free(task); + + if (report.file) + { + fclose(report.file); + pg_log(PG_REPORT, "fatal"); + pg_fatal("Your installation contains inconsistent NOT NULL constraints.\n" + "If the parent column(s) are NOT NULL, then the child column must\n" + "also be marked NOT NULL, or the upgrade will fail.\n" + "You can fix this by running\n" + " ALTER TABLE tablename ALTER column SET NOT NULL;\n" + "on each column listed in the file:\n" + " %s", report.path); + } + else + check_ok(); +} /* * check_for_pg_role_prefix() @@ -1934,7 +2031,7 @@ check_for_unicode_update(ClusterInfo *cluster) { fclose(report.file); report_status(PG_WARNING, "warning"); - pg_log(PG_WARNING, "Your installation contains relations that may be affected by a new version of Unicode.\n" + pg_log(PG_WARNING, "Your installation contains relations that might be affected by a new version of Unicode.\n" "A list of potentially-affected relations is in the file:\n" " %s", report.path); } diff --git a/src/bin/pg_upgrade/dump.c b/src/bin/pg_upgrade/dump.c index 23cb08e8347..183f08ce1e8 100644 --- a/src/bin/pg_upgrade/dump.c +++ b/src/bin/pg_upgrade/dump.c @@ -58,7 +58,7 @@ generate_old_dump(void) (user_opts.transfer_mode == TRANSFER_MODE_SWAP) ? "" : "--sequence-data", log_opts.verbose ? "--verbose" : "", - user_opts.do_statistics ? "" : "--no-statistics", + user_opts.do_statistics ? "--with-statistics" : "--no-statistics", log_opts.dumpdir, sql_file_name, escaped_connstr.data); diff --git a/src/bin/pg_upgrade/relfilenumber.c b/src/bin/pg_upgrade/relfilenumber.c index 2959c07f0b8..8d8e816a01f 100644 --- a/src/bin/pg_upgrade/relfilenumber.c +++ b/src/bin/pg_upgrade/relfilenumber.c @@ -290,19 +290,19 @@ prepare_for_swap(const char *old_tablespace, Oid db_oid, /* Create directory for stuff that is moved aside. */ if (pg_mkdir_p(moved_tblspc, pg_dir_create_mode) != 0 && errno != EEXIST) - pg_fatal("could not create directory \"%s\"", moved_tblspc); + pg_fatal("could not create directory \"%s\": %m", moved_tblspc); /* Create directory for old catalog files. */ if (pg_mkdir_p(old_catalog_dir, pg_dir_create_mode) != 0) - pg_fatal("could not create directory \"%s\"", old_catalog_dir); + pg_fatal("could not create directory \"%s\": %m", old_catalog_dir); /* Move the new cluster's database directory aside. */ if (rename(new_db_dir, moved_db_dir) != 0) - pg_fatal("could not rename \"%s\" to \"%s\"", new_db_dir, moved_db_dir); + pg_fatal("could not rename directory \"%s\" to \"%s\": %m", new_db_dir, moved_db_dir); /* Move the old cluster's database directory into place. */ if (rename(old_db_dir, new_db_dir) != 0) - pg_fatal("could not rename \"%s\" to \"%s\"", old_db_dir, new_db_dir); + pg_fatal("could not rename directory \"%s\" to \"%s\": %m", old_db_dir, new_db_dir); return true; } @@ -390,7 +390,7 @@ swap_catalog_files(FileNameMap *maps, int size, const char *old_catalog_dir, snprintf(dest, sizeof(dest), "%s/%s", old_catalog_dir, de->d_name); if (rename(path, dest) != 0) - pg_fatal("could not rename \"%s\" to \"%s\": %m", path, dest); + pg_fatal("could not rename file \"%s\" to \"%s\": %m", path, dest); } if (errno) pg_fatal("could not read directory \"%s\": %m", new_db_dir); @@ -417,7 +417,7 @@ swap_catalog_files(FileNameMap *maps, int size, const char *old_catalog_dir, snprintf(dest, sizeof(dest), "%s/%s", new_db_dir, de->d_name); if (rename(path, dest) != 0) - pg_fatal("could not rename \"%s\" to \"%s\": %m", path, dest); + pg_fatal("could not rename file \"%s\" to \"%s\": %m", path, dest); /* * We don't fsync() the database files in the file synchronization diff --git a/src/bin/pg_upgrade/t/004_subscription.pl b/src/bin/pg_upgrade/t/004_subscription.pl index c545abf6581..e46f02c6cc6 100644 --- a/src/bin/pg_upgrade/t/004_subscription.pl +++ b/src/bin/pg_upgrade/t/004_subscription.pl @@ -53,7 +53,8 @@ $old_sub->safe_psql('postgres', $old_sub->stop; -$new_sub->append_conf('postgresql.conf', "max_active_replication_origins = 0"); +$new_sub->append_conf('postgresql.conf', + "max_active_replication_origins = 0"); # pg_upgrade will fail because the new cluster has insufficient # max_active_replication_origins. @@ -80,7 +81,8 @@ command_checks_all( ); # Reset max_active_replication_origins -$new_sub->append_conf('postgresql.conf', "max_active_replication_origins = 10"); +$new_sub->append_conf('postgresql.conf', + "max_active_replication_origins = 10"); # Cleanup $publisher->safe_psql('postgres', "DROP PUBLICATION regress_pub1"); diff --git a/src/bin/pg_upgrade/t/005_char_signedness.pl b/src/bin/pg_upgrade/t/005_char_signedness.pl index 17fa0d48b15..cd8cff6f513 100644 --- a/src/bin/pg_upgrade/t/005_char_signedness.pl +++ b/src/bin/pg_upgrade/t/005_char_signedness.pl @@ -65,7 +65,7 @@ command_checks_all( $mode ], 1, - [qr/--set-char-signedness option cannot be used/], + [qr/option --set-char-signedness cannot be used/], [], '--set-char-signedness option cannot be used for upgrading from v18 or later' ); diff --git a/src/bin/pg_upgrade/t/006_transfer_modes.pl b/src/bin/pg_upgrade/t/006_transfer_modes.pl index 550a63fdf7d..58fe8a8c7dc 100644 --- a/src/bin/pg_upgrade/t/006_transfer_modes.pl +++ b/src/bin/pg_upgrade/t/006_transfer_modes.pl @@ -13,7 +13,8 @@ sub test_mode { my ($mode) = @_; - my $old = PostgreSQL::Test::Cluster->new('old', install_path => $ENV{oldinstall}); + my $old = + PostgreSQL::Test::Cluster->new('old', install_path => $ENV{oldinstall}); my $new = PostgreSQL::Test::Cluster->new('new'); # --swap can't be used to upgrade from versions older than 10, so just skip @@ -40,9 +41,11 @@ sub test_mode # Create a small variety of simple test objects on the old cluster. We'll # check that these reach the new version after upgrading. $old->start; - $old->safe_psql('postgres', "CREATE TABLE test1 AS SELECT generate_series(1, 100)"); + $old->safe_psql('postgres', + "CREATE TABLE test1 AS SELECT generate_series(1, 100)"); $old->safe_psql('postgres', "CREATE DATABASE testdb1"); - $old->safe_psql('testdb1', "CREATE TABLE test2 AS SELECT generate_series(200, 300)"); + $old->safe_psql('testdb1', + "CREATE TABLE test2 AS SELECT generate_series(200, 300)"); $old->safe_psql('testdb1', "VACUUM FULL test2"); $old->safe_psql('testdb1', "CREATE SEQUENCE testseq START 5432"); @@ -51,10 +54,15 @@ sub test_mode if (defined($ENV{oldinstall})) { my $tblspc = PostgreSQL::Test::Utils::tempdir_short(); - $old->safe_psql('postgres', "CREATE TABLESPACE test_tblspc LOCATION '$tblspc'"); - $old->safe_psql('postgres', "CREATE DATABASE testdb2 TABLESPACE test_tblspc"); - $old->safe_psql('postgres', "CREATE TABLE test3 TABLESPACE test_tblspc AS SELECT generate_series(300, 401)"); - $old->safe_psql('testdb2', "CREATE TABLE test4 AS SELECT generate_series(400, 502)"); + $old->safe_psql('postgres', + "CREATE TABLESPACE test_tblspc LOCATION '$tblspc'"); + $old->safe_psql('postgres', + "CREATE DATABASE testdb2 TABLESPACE test_tblspc"); + $old->safe_psql('postgres', + "CREATE TABLE test3 TABLESPACE test_tblspc AS SELECT generate_series(300, 401)" + ); + $old->safe_psql('testdb2', + "CREATE TABLE test4 AS SELECT generate_series(400, 502)"); } $old->stop; @@ -90,9 +98,11 @@ sub test_mode # tablespace. if (defined($ENV{oldinstall})) { - $result = $new->safe_psql('postgres', "SELECT COUNT(*) FROM test3"); + $result = + $new->safe_psql('postgres', "SELECT COUNT(*) FROM test3"); is($result, '102', "test3 data after pg_upgrade $mode"); - $result = $new->safe_psql('testdb2', "SELECT COUNT(*) FROM test4"); + $result = + $new->safe_psql('testdb2', "SELECT COUNT(*) FROM test4"); is($result, '103', "test4 data after pg_upgrade $mode"); } $new->stop; diff --git a/src/bin/pg_upgrade/task.c b/src/bin/pg_upgrade/task.c index a48d5691390..ee0e2457152 100644 --- a/src/bin/pg_upgrade/task.c +++ b/src/bin/pg_upgrade/task.c @@ -192,8 +192,7 @@ start_conn(const ClusterInfo *cluster, UpgradeTaskSlot *slot) slot->conn = PQconnectStart(conn_opts.data); if (!slot->conn) - pg_fatal("failed to create connection with connection string: \"%s\"", - conn_opts.data); + pg_fatal("out of memory"); termPQExpBuffer(&conn_opts); } @@ -402,7 +401,7 @@ wait_on_slots(UpgradeTaskSlot *slots, int numslots) * If we found socket(s) to wait on, wait. */ if (select_loop(maxFd, &input, &output) == -1) - pg_fatal("select() failed: %m"); + pg_fatal("%s() failed: %m", "select"); /* * Mark which sockets appear to be ready. diff --git a/src/bin/pg_verifybackup/meson.build b/src/bin/pg_verifybackup/meson.build index 9567d55500d..f45ea790d8e 100644 --- a/src/bin/pg_verifybackup/meson.build +++ b/src/bin/pg_verifybackup/meson.build @@ -23,10 +23,10 @@ tests += { 'sd': meson.current_source_dir(), 'bd': meson.current_build_dir(), 'tap': { - 'env': {'GZIP_PROGRAM': gzip.found() ? gzip.path() : '', - 'TAR': tar.found() ? tar.path() : '', - 'LZ4': program_lz4.found() ? program_lz4.path() : '', - 'ZSTD': program_zstd.found() ? program_zstd.path() : ''}, + 'env': {'GZIP_PROGRAM': gzip.found() ? gzip.full_path() : '', + 'TAR': tar.found() ? tar.full_path() : '', + 'LZ4': program_lz4.found() ? program_lz4.full_path() : '', + 'ZSTD': program_zstd.found() ? program_zstd.full_path() : ''}, 'tests': [ 't/001_basic.pl', 't/002_algorithm.pl', diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c index 48994ef9bc6..5e6c13bb921 100644 --- a/src/bin/pg_verifybackup/pg_verifybackup.c +++ b/src/bin/pg_verifybackup/pg_verifybackup.c @@ -1207,7 +1207,7 @@ parse_required_wal(verifier_context *context, char *pg_waldump_path, { char *pg_waldump_cmd; - pg_waldump_cmd = psprintf("\"%s\" --quiet --path=\"%s\" --timeline=%u --start=%X/%X --end=%X/%X\n", + pg_waldump_cmd = psprintf("\"%s\" --quiet --path=\"%s\" --timeline=%u --start=%X/%08X --end=%X/%08X\n", pg_waldump_path, wal_directory, this_wal_range->tli, LSN_FORMAT_ARGS(this_wal_range->start_lsn), LSN_FORMAT_ARGS(this_wal_range->end_lsn)); diff --git a/src/bin/pg_verifybackup/t/008_untar.pl b/src/bin/pg_verifybackup/t/008_untar.pl index deed3ec247d..bc3d6b352ad 100644 --- a/src/bin/pg_verifybackup/t/008_untar.pl +++ b/src/bin/pg_verifybackup/t/008_untar.pl @@ -16,6 +16,22 @@ my $primary = PostgreSQL::Test::Cluster->new('primary'); $primary->init(allows_streaming => 1); $primary->start; +# Create file with some random data and an arbitrary size, useful to check +# the solidity of the compression and decompression logic. The size of the +# file is chosen to be around 640kB. This has proven to be large enough to +# detect some issues related to LZ4, and low enough to not impact the runtime +# of the test significantly. +my $junk_data = $primary->safe_psql( + 'postgres', qq( + SELECT string_agg(encode(sha256(i::bytea), 'hex'), '') + FROM generate_series(1, 10240) s(i);)); +my $data_dir = $primary->data_dir; +my $junk_file = "$data_dir/junk"; +open my $jf, '>', $junk_file + or die "Could not create junk file: $!"; +print $jf $junk_data; +close $jf; + # Create a tablespace directory. my $source_ts_path = PostgreSQL::Test::Utils::tempdir_short(); @@ -53,6 +69,12 @@ my @test_configuration = ( 'enabled' => check_pg_config("#define USE_LZ4 1") }, { + 'compression_method' => 'lz4', + 'backup_flags' => [ '--compress', 'server-lz4:5' ], + 'backup_archive' => [ 'base.tar.lz4', "$tsoid.tar.lz4" ], + 'enabled' => check_pg_config("#define USE_LZ4 1") + }, + { 'compression_method' => 'zstd', 'backup_flags' => [ '--compress', 'server-zstd' ], 'backup_archive' => [ 'base.tar.zst', "$tsoid.tar.zst" ], diff --git a/src/bin/pg_verifybackup/t/010_client_untar.pl b/src/bin/pg_verifybackup/t/010_client_untar.pl index d8d2b06c7ee..b62faeb5acf 100644 --- a/src/bin/pg_verifybackup/t/010_client_untar.pl +++ b/src/bin/pg_verifybackup/t/010_client_untar.pl @@ -15,6 +15,22 @@ my $primary = PostgreSQL::Test::Cluster->new('primary'); $primary->init(allows_streaming => 1); $primary->start; +# Create file with some random data and an arbitrary size, useful to check +# the solidity of the compression and decompression logic. The size of the +# file is chosen to be around 640kB. This has proven to be large enough to +# detect some issues related to LZ4, and low enough to not impact the runtime +# of the test significantly. +my $junk_data = $primary->safe_psql( + 'postgres', qq( + SELECT string_agg(encode(sha256(i::bytea), 'hex'), '') + FROM generate_series(1, 10240) s(i);)); +my $data_dir = $primary->data_dir; +my $junk_file = "$data_dir/junk"; +open my $jf, '>', $junk_file + or die "Could not create junk file: $!"; +print $jf $junk_data; +close $jf; + my $backup_path = $primary->backup_dir . '/client-backup'; my $extract_path = $primary->backup_dir . '/extracted-backup'; @@ -38,6 +54,12 @@ my @test_configuration = ( 'enabled' => check_pg_config("#define USE_LZ4 1") }, { + 'compression_method' => 'lz4', + 'backup_flags' => [ '--compress', 'client-lz4:1' ], + 'backup_archive' => 'base.tar.lz4', + 'enabled' => check_pg_config("#define USE_LZ4 1") + }, + { 'compression_method' => 'zstd', 'backup_flags' => [ '--compress', 'client-zstd:5' ], 'backup_archive' => 'base.tar.zst', diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index 51fb76efc48..13d3ec2f5be 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -656,7 +656,7 @@ XLogDumpDisplayStats(XLogDumpConfig *config, XLogStats *stats) } total_len = total_rec_len + total_fpi_len; - printf("WAL statistics between %X/%X and %X/%X:\n", + printf("WAL statistics between %X/%08X and %X/%08X:\n", LSN_FORMAT_ARGS(stats->startptr), LSN_FORMAT_ARGS(stats->endptr)); /* @@ -904,7 +904,7 @@ main(int argc, char **argv) config.filter_by_extended = true; break; case 'e': - if (sscanf(optarg, "%X/%X", &xlogid, &xrecoff) != 2) + if (sscanf(optarg, "%X/%08X", &xlogid, &xrecoff) != 2) { pg_log_error("invalid WAL location: \"%s\"", optarg); @@ -1002,7 +1002,7 @@ main(int argc, char **argv) config.filter_by_extended = true; break; case 's': - if (sscanf(optarg, "%X/%X", &xlogid, &xrecoff) != 2) + if (sscanf(optarg, "%X/%08X", &xlogid, &xrecoff) != 2) { pg_log_error("invalid WAL location: \"%s\"", optarg); @@ -1140,7 +1140,7 @@ main(int argc, char **argv) XLogSegNoOffsetToRecPtr(segno, 0, WalSegSz, private.startptr); else if (!XLByteInSeg(private.startptr, segno, WalSegSz)) { - pg_log_error("start WAL location %X/%X is not inside file \"%s\"", + pg_log_error("start WAL location %X/%08X is not inside file \"%s\"", LSN_FORMAT_ARGS(private.startptr), fname); goto bad_argument; @@ -1182,7 +1182,7 @@ main(int argc, char **argv) if (!XLByteInSeg(private.endptr, segno, WalSegSz) && private.endptr != (segno + 1) * WalSegSz) { - pg_log_error("end WAL location %X/%X is not inside file \"%s\"", + pg_log_error("end WAL location %X/%08X is not inside file \"%s\"", LSN_FORMAT_ARGS(private.endptr), argv[argc - 1]); goto bad_argument; @@ -1214,7 +1214,7 @@ main(int argc, char **argv) first_record = XLogFindNextRecord(xlogreader_state, private.startptr); if (first_record == InvalidXLogRecPtr) - pg_fatal("could not find a valid record after %X/%X", + pg_fatal("could not find a valid record after %X/%08X", LSN_FORMAT_ARGS(private.startptr)); /* @@ -1224,8 +1224,8 @@ main(int argc, char **argv) */ if (first_record != private.startptr && XLogSegmentOffset(private.startptr, WalSegSz) != 0) - pg_log_info(ngettext("first record is after %X/%X, at %X/%X, skipping over %u byte", - "first record is after %X/%X, at %X/%X, skipping over %u bytes", + pg_log_info(ngettext("first record is after %X/%08X, at %X/%08X, skipping over %u byte", + "first record is after %X/%08X, at %X/%08X, skipping over %u bytes", (first_record - private.startptr)), LSN_FORMAT_ARGS(private.startptr), LSN_FORMAT_ARGS(first_record), @@ -1309,7 +1309,7 @@ main(int argc, char **argv) exit(0); if (errormsg) - pg_fatal("error in WAL record at %X/%X: %s", + pg_fatal("error in WAL record at %X/%08X: %s", LSN_FORMAT_ARGS(xlogreader_state->ReadRecPtr), errormsg); diff --git a/src/bin/pgbench/t/002_pgbench_no_server.pl b/src/bin/pgbench/t/002_pgbench_no_server.pl index f975c73dd75..2cc59cc8140 100644 --- a/src/bin/pgbench/t/002_pgbench_no_server.pl +++ b/src/bin/pgbench/t/002_pgbench_no_server.pl @@ -233,21 +233,9 @@ for my $o (@options) 'pgbench option error: ' . $name); } -# Help -pgbench( - '--help', 0, - [ - qr{benchmarking tool for PostgreSQL}, - qr{Usage}, - qr{Initialization options:}, - qr{Common options:}, - qr{Report bugs to} - ], - [qr{^$}], - 'pgbench help'); - -# Version -pgbench('-V', 0, [qr{^pgbench .PostgreSQL. }], [qr{^$}], 'pgbench version'); +program_help_ok('pgbench'); +program_version_ok('pgbench'); +program_options_handling_ok('pgbench'); # list of builtins pgbench( diff --git a/src/bin/psql/command.c b/src/bin/psql/command.c index 81a5ba844ba..9fcd2db8326 100644 --- a/src/bin/psql/command.c +++ b/src/bin/psql/command.c @@ -67,8 +67,8 @@ static backslashResult exec_command_C(PsqlScanState scan_state, bool active_bran static backslashResult exec_command_connect(PsqlScanState scan_state, bool active_branch); static backslashResult exec_command_cd(PsqlScanState scan_state, bool active_branch, const char *cmd); -static backslashResult exec_command_close(PsqlScanState scan_state, bool active_branch, - const char *cmd); +static backslashResult exec_command_close_prepared(PsqlScanState scan_state, + bool active_branch, const char *cmd); static backslashResult exec_command_conninfo(PsqlScanState scan_state, bool active_branch); static backslashResult exec_command_copy(PsqlScanState scan_state, bool active_branch); static backslashResult exec_command_copyright(PsqlScanState scan_state, bool active_branch); @@ -330,8 +330,8 @@ exec_command(const char *cmd, status = exec_command_connect(scan_state, active_branch); else if (strcmp(cmd, "cd") == 0) status = exec_command_cd(scan_state, active_branch, cmd); - else if (strcmp(cmd, "close") == 0) - status = exec_command_close(scan_state, active_branch, cmd); + else if (strcmp(cmd, "close_prepared") == 0) + status = exec_command_close_prepared(scan_state, active_branch, cmd); else if (strcmp(cmd, "conninfo") == 0) status = exec_command_conninfo(scan_state, active_branch); else if (pg_strcasecmp(cmd, "copy") == 0) @@ -728,10 +728,10 @@ exec_command_cd(PsqlScanState scan_state, bool active_branch, const char *cmd) } /* - * \close -- close a previously prepared statement + * \close_prepared -- close a previously prepared statement */ static backslashResult -exec_command_close(PsqlScanState scan_state, bool active_branch, const char *cmd) +exec_command_close_prepared(PsqlScanState scan_state, bool active_branch, const char *cmd) { backslashResult status = PSQL_CMD_SKIP_LINE; @@ -778,6 +778,7 @@ exec_command_conninfo(PsqlScanState scan_state, bool active_branch) int ssl_in_use, password_used, gssapi_used; + int version_num; char *paramval; if (!active_branch) @@ -793,7 +794,9 @@ exec_command_conninfo(PsqlScanState scan_state, bool active_branch) /* Get values for the parameters */ host = PQhost(pset.db); hostaddr = PQhostaddr(pset.db); - protocol_version = psprintf("%d", PQprotocolVersion(pset.db)); + version_num = PQfullProtocolVersion(pset.db); + protocol_version = psprintf("%d.%d", version_num / 10000, + version_num % 10000); ssl_in_use = PQsslInUse(pset.db); password_used = PQconnectionUsedPassword(pset.db); gssapi_used = PQconnectionUsedGSSAPI(pset.db); @@ -874,11 +877,11 @@ exec_command_conninfo(PsqlScanState scan_state, bool active_branch) printTableAddCell(&cont, _("Backend PID"), false, false); printTableAddCell(&cont, backend_pid, false, false); - /* TLS Connection */ - printTableAddCell(&cont, _("TLS Connection"), false, false); + /* SSL Connection */ + printTableAddCell(&cont, _("SSL Connection"), false, false); printTableAddCell(&cont, ssl_in_use ? _("true") : _("false"), false, false); - /* TLS Information */ + /* SSL Information */ if (ssl_in_use) { char *library, @@ -895,19 +898,19 @@ exec_command_conninfo(PsqlScanState scan_state, bool active_branch) compression = (char *) PQsslAttribute(pset.db, "compression"); alpn = (char *) PQsslAttribute(pset.db, "alpn"); - printTableAddCell(&cont, _("TLS Library"), false, false); + printTableAddCell(&cont, _("SSL Library"), false, false); printTableAddCell(&cont, library ? library : _("unknown"), false, false); - printTableAddCell(&cont, _("TLS Protocol"), false, false); + printTableAddCell(&cont, _("SSL Protocol"), false, false); printTableAddCell(&cont, protocol ? protocol : _("unknown"), false, false); - printTableAddCell(&cont, _("TLS Key Bits"), false, false); + printTableAddCell(&cont, _("SSL Key Bits"), false, false); printTableAddCell(&cont, key_bits ? key_bits : _("unknown"), false, false); - printTableAddCell(&cont, _("TLS Cipher"), false, false); + printTableAddCell(&cont, _("SSL Cipher"), false, false); printTableAddCell(&cont, cipher ? cipher : _("unknown"), false, false); - printTableAddCell(&cont, _("TLS Compression"), false, false); + printTableAddCell(&cont, _("SSL Compression"), false, false); printTableAddCell(&cont, (compression && strcmp(compression, "off") != 0) ? _("true") : _("false"), false, false); @@ -1946,7 +1949,7 @@ exec_command_gexec(PsqlScanState scan_state, bool active_branch) { if (PQpipelineStatus(pset.db) != PQ_PIPELINE_OFF) { - pg_log_error("\\gexec not allowed in pipeline mode"); + pg_log_error("\\%s not allowed in pipeline mode", "gexec"); clean_extended_state(); return PSQL_CMD_ERROR; } @@ -1972,7 +1975,7 @@ exec_command_gset(PsqlScanState scan_state, bool active_branch) if (PQpipelineStatus(pset.db) != PQ_PIPELINE_OFF) { - pg_log_error("\\gset not allowed in pipeline mode"); + pg_log_error("\\%s not allowed in pipeline mode", "gset"); clean_extended_state(); return PSQL_CMD_ERROR; } @@ -3284,7 +3287,7 @@ exec_command_watch(PsqlScanState scan_state, bool active_branch, if (PQpipelineStatus(pset.db) != PQ_PIPELINE_OFF) { - pg_log_error("\\watch not allowed in pipeline mode"); + pg_log_error("\\%s not allowed in pipeline mode", "watch"); clean_extended_state(); success = false; } diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c index 3e4e444f3fd..d2c0a49c46c 100644 --- a/src/bin/psql/common.c +++ b/src/bin/psql/common.c @@ -1867,6 +1867,33 @@ ExecQueryAndProcessResults(const char *query, { FILE *copy_stream = NULL; + if (PQpipelineStatus(pset.db) != PQ_PIPELINE_OFF) + { + /* + * Running COPY within a pipeline can break the protocol + * synchronisation in multiple ways, and psql shows its limits + * when it comes to tracking this information. + * + * While in COPY mode, the backend process ignores additional + * Sync messages and will not send the matching ReadyForQuery + * expected by the frontend. + * + * Additionally, libpq automatically sends a Sync with the + * Copy message, creating an unexpected synchronisation point. + * A failure during COPY would leave the pipeline in an + * aborted state while the backend would be in a clean state, + * ready to process commands. + * + * Improving those issues would require modifications in how + * libpq handles pipelines and COPY. Hence, for the time + * being, we forbid the use of COPY within a pipeline, + * aborting the connection to avoid an inconsistent state on + * psql side if trying to use a COPY command. + */ + pg_log_info("COPY in a pipeline is not supported, aborting connection"); + exit(EXIT_BADCONN); + } + /* * For COPY OUT, direct the output to the default place (probably * a pager pipe) for \watch, or to pset.copyStream for \copy, @@ -2601,7 +2628,7 @@ clean_extended_state(void) switch (pset.send_mode) { - case PSQL_SEND_EXTENDED_CLOSE: /* \close */ + case PSQL_SEND_EXTENDED_CLOSE: /* \close_prepared */ free(pset.stmtName); break; case PSQL_SEND_EXTENDED_PARSE: /* \parse */ diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index 1d08268393e..dd25d2fe7b8 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -296,6 +296,7 @@ describeFunctions(const char *functypes, const char *func_pattern, char **arg_patterns, int num_arg_patterns, bool verbose, bool showSystem) { + const char *df_options = "anptwSx+"; bool showAggregate = strchr(functypes, 'a') != NULL; bool showNormal = strchr(functypes, 'n') != NULL; bool showProcedure = strchr(functypes, 'p') != NULL; @@ -310,9 +311,9 @@ describeFunctions(const char *functypes, const char *func_pattern, /* No "Parallel" column before 9.6 */ static const bool translate_columns_pre_96[] = {false, false, false, false, true, true, false, true, true, false, false, false, false}; - if (strlen(functypes) != strspn(functypes, "anptwSx+")) + if (strlen(functypes) != strspn(functypes, df_options)) { - pg_log_error("\\df only takes [anptwSx+] as options"); + pg_log_error("\\df only takes [%s] as options", df_options); return true; } @@ -6188,8 +6189,8 @@ listExtensions(const char *pattern) "FROM pg_catalog.pg_extension e " "LEFT JOIN pg_catalog.pg_namespace n ON n.oid = e.extnamespace " "LEFT JOIN pg_catalog.pg_description d ON d.objoid = e.oid " - "LEFT JOIN pg_catalog.pg_available_extensions() ae(name, default_version, comment) ON ae.name = e.extname " - "AND d.classoid = 'pg_catalog.pg_extension'::pg_catalog.regclass\n", + "AND d.classoid = 'pg_catalog.pg_extension'::pg_catalog.regclass " + "LEFT JOIN pg_catalog.pg_available_extensions() ae(name, default_version, comment) ON ae.name = e.extname\n", gettext_noop("Name"), gettext_noop("Version"), gettext_noop("Default version"), diff --git a/src/bin/psql/help.c b/src/bin/psql/help.c index 403b51325a7..a2e009ab9be 100644 --- a/src/bin/psql/help.c +++ b/src/bin/psql/help.c @@ -252,7 +252,8 @@ slashUsage(unsigned short int pager) HELP0(" \\dO[Sx+] [PATTERN] list collations\n"); HELP0(" \\dp[Sx] [PATTERN] list table, view, and sequence access privileges\n"); HELP0(" \\dP[itnx+] [PATTERN] list [only index/table] partitioned relations [n=nested]\n"); - HELP0(" \\drds[x] [ROLEPTRN [DBPTRN]] list per-database role settings\n"); + HELP0(" \\drds[x] [ROLEPTRN [DBPTRN]]\n" + " list per-database role settings\n"); HELP0(" \\drg[Sx] [PATTERN] list role grants\n"); HELP0(" \\dRp[x+] [PATTERN] list replication publications\n"); HELP0(" \\dRs[x+] [PATTERN] list replication subscriptions\n"); @@ -330,12 +331,12 @@ slashUsage(unsigned short int pager) HELP0(" \\bind [PARAM]... set query parameters\n"); HELP0(" \\bind_named STMT_NAME [PARAM]...\n" " set query parameters for an existing prepared statement\n"); - HELP0(" \\close STMT_NAME close an existing prepared statement\n"); + HELP0(" \\close_prepared STMT_NAME\n" + " close an existing prepared statement\n"); HELP0(" \\endpipeline exit pipeline mode\n"); HELP0(" \\flush flush output data to the server\n"); HELP0(" \\flushrequest send request to the server to flush its output buffer\n"); - HELP0(" \\getresults [NUM_RES] read NUM_RES pending results. All pending results are\n" - " read if no argument is provided\n"); + HELP0(" \\getresults [NUM_RES] read NUM_RES pending results, or all if no argument\n"); HELP0(" \\parse STMT_NAME create a prepared statement\n"); HELP0(" \\sendpipeline send an extended query to an ongoing pipeline\n"); HELP0(" \\startpipeline enter pipeline mode\n"); @@ -463,8 +464,9 @@ helpVariables(unsigned short int pager) " VERSION_NAME\n" " VERSION_NUM\n" " psql's version (in verbose string, short string, or numeric format)\n"); - HELP0(" WATCH_INTERVAL\n" - " if set to a number, overrides the default two second \\watch interval\n"); + HELPN(" WATCH_INTERVAL\n" + " number of seconds \\watch waits between executions (default %s)\n", + DEFAULT_WATCH_INTERVAL); HELP0("\nDisplay settings:\n"); HELP0("Usage:\n"); diff --git a/src/bin/psql/t/001_basic.pl b/src/bin/psql/t/001_basic.pl index 4050f9a5e3e..f42c3961e09 100644 --- a/src/bin/psql/t/001_basic.pl +++ b/src/bin/psql/t/001_basic.pl @@ -483,8 +483,8 @@ psql_like($node, "copy (values ('foo'),('bar')) to stdout \\g | $pipe_cmd", my $c4 = slurp_file($g_file); like($c4, qr/foo.*bar/s); -# Tests with pipelines. These trigger FATAL failures in the backend, -# so they cannot be tested via SQL. +# Test COPY within pipelines. These abort the connection from +# the frontend so they cannot be tested via SQL. $node->safe_psql('postgres', 'CREATE TABLE psql_pipeline()'); my $log_location = -s $node->logfile; psql_fails_like( @@ -493,35 +493,41 @@ psql_fails_like( COPY psql_pipeline FROM STDIN; SELECT 'val1'; \\syncpipeline -\\getresults \\endpipeline}, - qr/server closed the connection unexpectedly/, - 'protocol sync loss in pipeline: direct COPY, SELECT, sync and getresult' -); + qr/COPY in a pipeline is not supported, aborting connection/, + 'COPY FROM in pipeline: fails'); $node->wait_for_log( qr/FATAL: .*terminating connection because protocol synchronization was lost/, $log_location); +# Remove \syncpipeline here. psql_fails_like( $node, qq{\\startpipeline -COPY psql_pipeline FROM STDIN \\bind \\sendpipeline -SELECT 'val1' \\bind \\sendpipeline -\\syncpipeline -\\getresults +COPY psql_pipeline TO STDOUT; +SELECT 'val1'; \\endpipeline}, - qr/server closed the connection unexpectedly/, - 'protocol sync loss in pipeline: bind COPY, SELECT, sync and getresult'); + qr/COPY in a pipeline is not supported, aborting connection/, + 'COPY TO in pipeline: fails'); -# This time, test without the \getresults. psql_fails_like( $node, qq{\\startpipeline -COPY psql_pipeline FROM STDIN; +\\copy psql_pipeline from stdin; SELECT 'val1'; \\syncpipeline \\endpipeline}, - qr/server closed the connection unexpectedly/, - 'protocol sync loss in pipeline: COPY, SELECT and sync'); + qr/COPY in a pipeline is not supported, aborting connection/, + '\copy from in pipeline: fails'); + +# Sync attempt after a COPY TO/FROM. +psql_fails_like( + $node, + qq{\\startpipeline +\\copy psql_pipeline to stdout; +\\syncpipeline +\\endpipeline}, + qr/COPY in a pipeline is not supported, aborting connection/, + '\copy to in pipeline: fails'); done_testing(); diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index ec65ab79fec..53e7d35fe98 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -889,6 +889,14 @@ static const SchemaQuery Query_for_list_of_analyzables = { .result = "c.relname", }; +/* + * Relations supporting COPY TO/FROM are currently almost the same as + * those supporting ANALYZE. Although views with INSTEAD OF INSERT triggers + * can be used with COPY FROM, they are rarely used for this purpose, + * so plain views are intentionally excluded from this tab completion. + */ +#define Query_for_list_of_tables_for_copy Query_for_list_of_analyzables + /* Relations supporting index creation */ static const SchemaQuery Query_for_list_of_indexables = { .catname = "pg_catalog.pg_class c", @@ -1875,7 +1883,7 @@ psql_completion(const char *text, int start, int end) static const char *const backslash_commands[] = { "\\a", "\\bind", "\\bind_named", - "\\connect", "\\conninfo", "\\C", "\\cd", "\\close", "\\copy", + "\\connect", "\\conninfo", "\\C", "\\cd", "\\close_prepared", "\\copy", "\\copyright", "\\crosstabview", "\\d", "\\da", "\\dA", "\\dAc", "\\dAf", "\\dAo", "\\dAp", "\\db", "\\dc", "\\dconfig", "\\dC", "\\dd", "\\ddp", "\\dD", @@ -2725,17 +2733,24 @@ match_previous_words(int pattern_id, /* ALTER TABLE xxx ADD */ else if (Matches("ALTER", "TABLE", MatchAny, "ADD")) { - /* make sure to keep this list and the !Matches() below in sync */ - COMPLETE_WITH("COLUMN", "CONSTRAINT", "CHECK", "UNIQUE", "PRIMARY KEY", - "EXCLUDE", "FOREIGN KEY"); + /* + * make sure to keep this list and the MatchAnyExcept() below in sync + */ + COMPLETE_WITH("COLUMN", "CONSTRAINT", "CHECK (", "NOT NULL", "UNIQUE", + "PRIMARY KEY", "EXCLUDE", "FOREIGN KEY"); } /* ALTER TABLE xxx ADD [COLUMN] yyy */ else if (Matches("ALTER", "TABLE", MatchAny, "ADD", "COLUMN", MatchAny) || - Matches("ALTER", "TABLE", MatchAny, "ADD", MatchAnyExcept("COLUMN|CONSTRAINT|CHECK|UNIQUE|PRIMARY|EXCLUDE|FOREIGN"))) + Matches("ALTER", "TABLE", MatchAny, "ADD", MatchAnyExcept("COLUMN|CONSTRAINT|CHECK|UNIQUE|PRIMARY|NOT|EXCLUDE|FOREIGN"))) COMPLETE_WITH_SCHEMA_QUERY(Query_for_list_of_datatypes); /* ALTER TABLE xxx ADD CONSTRAINT yyy */ else if (Matches("ALTER", "TABLE", MatchAny, "ADD", "CONSTRAINT", MatchAny)) - COMPLETE_WITH("CHECK", "UNIQUE", "PRIMARY KEY", "EXCLUDE", "FOREIGN KEY"); + COMPLETE_WITH("CHECK (", "NOT NULL", "UNIQUE", "PRIMARY KEY", "EXCLUDE", "FOREIGN KEY"); + /* ALTER TABLE xxx ADD NOT NULL */ + else if (Matches("ALTER", "TABLE", MatchAny, "ADD", "NOT", "NULL")) + COMPLETE_WITH_ATTR(prev4_wd); + else if (Matches("ALTER", "TABLE", MatchAny, "ADD", "CONSTRAINT", MatchAny, "NOT", "NULL")) + COMPLETE_WITH_ATTR(prev6_wd); /* ALTER TABLE xxx ADD [CONSTRAINT yyy] (PRIMARY KEY|UNIQUE) */ else if (Matches("ALTER", "TABLE", MatchAny, "ADD", "PRIMARY", "KEY") || Matches("ALTER", "TABLE", MatchAny, "ADD", "UNIQUE") || @@ -3255,7 +3270,7 @@ match_previous_words(int pattern_id, * backslash command). */ else if (Matches("COPY|\\copy")) - COMPLETE_WITH_SCHEMA_QUERY_PLUS(Query_for_list_of_tables, "("); + COMPLETE_WITH_SCHEMA_QUERY_PLUS(Query_for_list_of_tables_for_copy, "("); /* Complete COPY ( with legal query commands */ else if (Matches("COPY|\\copy", "(")) COMPLETE_WITH("SELECT", "TABLE", "VALUES", "INSERT INTO", "UPDATE", "DELETE FROM", "MERGE INTO", "WITH"); @@ -3289,7 +3304,7 @@ match_previous_words(int pattern_id, COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL", "HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE", "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT", - "ON_ERROR", "LOG_VERBOSITY"); + "ON_ERROR", "LOG_VERBOSITY", "REJECT_LIMIT"); /* Complete COPY <sth> FROM|TO filename WITH (FORMAT */ else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT")) @@ -3664,9 +3679,10 @@ match_previous_words(int pattern_id, TailMatches("CREATE", "TEMP|TEMPORARY|UNLOGGED", "TABLE", MatchAny, "(*)", "AS")) COMPLETE_WITH("EXECUTE", "SELECT", "TABLE", "VALUES", "WITH"); /* Complete CREATE TABLE name (...) with supported options */ - else if (TailMatches("CREATE", "TABLE", MatchAny, "(*)") || - TailMatches("CREATE", "UNLOGGED", "TABLE", MatchAny, "(*)")) + else if (TailMatches("CREATE", "TABLE", MatchAny, "(*)")) COMPLETE_WITH("AS", "INHERITS (", "PARTITION BY", "USING", "TABLESPACE", "WITH ("); + else if (TailMatches("CREATE", "UNLOGGED", "TABLE", MatchAny, "(*)")) + COMPLETE_WITH("AS", "INHERITS (", "USING", "TABLESPACE", "WITH ("); else if (TailMatches("CREATE", "TEMP|TEMPORARY", "TABLE", MatchAny, "(*)")) COMPLETE_WITH("AS", "INHERITS (", "ON COMMIT", "PARTITION BY", "USING", "TABLESPACE", "WITH ("); diff --git a/src/bin/psql/variables.c b/src/bin/psql/variables.c index ae2d0e5ed3f..6b64302ebca 100644 --- a/src/bin/psql/variables.c +++ b/src/bin/psql/variables.c @@ -204,7 +204,7 @@ ParseVariableDouble(const char *value, const char *name, double *result, double if ((value == NULL) || (*value == '\0')) { if (name) - pg_log_error("invalid input syntax for \"%s\"", name); + pg_log_error("invalid input syntax for variable \"%s\"", name); return false; } @@ -215,14 +215,14 @@ ParseVariableDouble(const char *value, const char *name, double *result, double if (dblval < min) { if (name) - pg_log_error("invalid value \"%s\" for \"%s\": must be greater than %.2f", + pg_log_error("invalid value \"%s\" for variable \"%s\": must be greater than %.2f", value, name, min); return false; } else if (dblval > max) { if (name) - pg_log_error("invalid value \"%s\" for \"%s\": must be less than %.2f", + pg_log_error("invalid value \"%s\" for variable \"%s\": must be less than %.2f", value, name, max); } *result = dblval; @@ -238,13 +238,13 @@ ParseVariableDouble(const char *value, const char *name, double *result, double (dblval == 0.0 || dblval >= HUGE_VAL || dblval <= -HUGE_VAL)) { if (name) - pg_log_error("\"%s\" is out of range for \"%s\"", value, name); + pg_log_error("value \"%s\" is out of range for variable \"%s\"", value, name); return false; } else { if (name) - pg_log_error("invalid value \"%s\" for \"%s\"", value, name); + pg_log_error("invalid value \"%s\" for variable \"%s\"", value, name); return false; } } diff --git a/src/bin/scripts/t/100_vacuumdb.pl b/src/bin/scripts/t/100_vacuumdb.pl index 75ac24a7a55..ff56a13b46b 100644 --- a/src/bin/scripts/t/100_vacuumdb.pl +++ b/src/bin/scripts/t/100_vacuumdb.pl @@ -238,62 +238,105 @@ $node->command_fails_like( 'cannot use option --all and a dbname as argument at the same time'); $node->safe_psql('postgres', - 'CREATE TABLE regression_vacuumdb_test AS select generate_series(1, 10) a, generate_series(2, 11) b;'); + 'CREATE TABLE regression_vacuumdb_test AS select generate_series(1, 10) a, generate_series(2, 11) b;' +); $node->issues_sql_like( - [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + [ + 'vacuumdb', '--analyze-only', + '--missing-stats-only', '-t', + 'regression_vacuumdb_test', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with missing stats'); $node->issues_sql_unlike( - [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + [ + 'vacuumdb', '--analyze-only', + '--missing-stats-only', '-t', + 'regression_vacuumdb_test', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with no missing stats'); $node->safe_psql('postgres', - 'CREATE INDEX regression_vacuumdb_test_idx ON regression_vacuumdb_test (mod(a, 2));'); + 'CREATE INDEX regression_vacuumdb_test_idx ON regression_vacuumdb_test (mod(a, 2));' +); $node->issues_sql_like( - [ 'vacuumdb', '--analyze-in-stages', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + [ + 'vacuumdb', '--analyze-in-stages', + '--missing-stats-only', '-t', + 'regression_vacuumdb_test', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with missing index expression stats'); $node->issues_sql_unlike( - [ 'vacuumdb', '--analyze-in-stages', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + [ + 'vacuumdb', '--analyze-in-stages', + '--missing-stats-only', '-t', + 'regression_vacuumdb_test', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with no missing index expression stats'); $node->safe_psql('postgres', - 'CREATE STATISTICS regression_vacuumdb_test_stat ON a, b FROM regression_vacuumdb_test;'); + 'CREATE STATISTICS regression_vacuumdb_test_stat ON a, b FROM regression_vacuumdb_test;' +); $node->issues_sql_like( - [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + [ + 'vacuumdb', '--analyze-only', + '--missing-stats-only', '-t', + 'regression_vacuumdb_test', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with missing extended stats'); $node->issues_sql_unlike( - [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + [ + 'vacuumdb', '--analyze-only', + '--missing-stats-only', '-t', + 'regression_vacuumdb_test', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with no missing extended stats'); $node->safe_psql('postgres', "CREATE TABLE regression_vacuumdb_child (a INT) INHERITS (regression_vacuumdb_test);\n" - . "INSERT INTO regression_vacuumdb_child VALUES (1, 2);\n" - . "ANALYZE regression_vacuumdb_child;\n"); + . "INSERT INTO regression_vacuumdb_child VALUES (1, 2);\n" + . "ANALYZE regression_vacuumdb_child;\n"); $node->issues_sql_like( - [ 'vacuumdb', '--analyze-in-stages', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + [ + 'vacuumdb', '--analyze-in-stages', + '--missing-stats-only', '-t', + 'regression_vacuumdb_test', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with missing inherited stats'); $node->issues_sql_unlike( - [ 'vacuumdb', '--analyze-in-stages', '--missing-stats-only', '-t', 'regression_vacuumdb_test', 'postgres' ], + [ + 'vacuumdb', '--analyze-in-stages', + '--missing-stats-only', '-t', + 'regression_vacuumdb_test', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with no missing inherited stats'); $node->safe_psql('postgres', "CREATE TABLE regression_vacuumdb_parted (a INT) PARTITION BY LIST (a);\n" - . "CREATE TABLE regression_vacuumdb_part1 PARTITION OF regression_vacuumdb_parted FOR VALUES IN (1);\n" - . "INSERT INTO regression_vacuumdb_parted VALUES (1);\n" - . "ANALYZE regression_vacuumdb_part1;\n"); + . "CREATE TABLE regression_vacuumdb_part1 PARTITION OF regression_vacuumdb_parted FOR VALUES IN (1);\n" + . "INSERT INTO regression_vacuumdb_parted VALUES (1);\n" + . "ANALYZE regression_vacuumdb_part1;\n"); $node->issues_sql_like( - [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_parted', 'postgres' ], + [ + 'vacuumdb', '--analyze-only', + '--missing-stats-only', '-t', + 'regression_vacuumdb_parted', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with missing partition stats'); $node->issues_sql_unlike( - [ 'vacuumdb', '--analyze-only', '--missing-stats-only', '-t', 'regression_vacuumdb_parted', 'postgres' ], + [ + 'vacuumdb', '--analyze-only', + '--missing-stats-only', '-t', + 'regression_vacuumdb_parted', 'postgres' + ], qr/statement:\ ANALYZE/sx, '--missing-stats-only with no missing partition stats'); diff --git a/src/common/parse_manifest.c b/src/common/parse_manifest.c index 71973af199b..58e0948100f 100644 --- a/src/common/parse_manifest.c +++ b/src/common/parse_manifest.c @@ -942,7 +942,7 @@ parse_xlogrecptr(XLogRecPtr *result, char *input) uint32 hi; uint32 lo; - if (sscanf(input, "%X/%X", &hi, &lo) != 2) + if (sscanf(input, "%X/%08X", &hi, &lo) != 2) return false; *result = ((uint64) hi) << 32 | lo; return true; diff --git a/src/fe_utils/astreamer_lz4.c b/src/fe_utils/astreamer_lz4.c index 781aaf99f38..5f581d1de37 100644 --- a/src/fe_utils/astreamer_lz4.c +++ b/src/fe_utils/astreamer_lz4.c @@ -322,9 +322,9 @@ astreamer_lz4_decompressor_content(astreamer *streamer, mystreamer = (astreamer_lz4_frame *) streamer; next_in = (uint8 *) data; - next_out = (uint8 *) mystreamer->base.bbs_buffer.data; + next_out = (uint8 *) mystreamer->base.bbs_buffer.data + mystreamer->bytes_written; avail_in = len; - avail_out = mystreamer->base.bbs_buffer.maxlen; + avail_out = mystreamer->base.bbs_buffer.maxlen - mystreamer->bytes_written; while (avail_in > 0) { diff --git a/src/include/access/commit_ts.h b/src/include/access/commit_ts.h index b8294e41b97..dc39e7dd32c 100644 --- a/src/include/access/commit_ts.h +++ b/src/include/access/commit_ts.h @@ -46,17 +46,6 @@ extern int committssyncfiletag(const FileTag *ftag, char *path); #define COMMIT_TS_ZEROPAGE 0x00 #define COMMIT_TS_TRUNCATE 0x10 -typedef struct xl_commit_ts_set -{ - TimestampTz timestamp; - RepOriginId nodeid; - TransactionId mainxid; - /* subxact Xids follow */ -} xl_commit_ts_set; - -#define SizeOfCommitTsSet (offsetof(xl_commit_ts_set, mainxid) + \ - sizeof(TransactionId)) - typedef struct xl_commit_ts_truncate { int64 pageno; diff --git a/src/include/access/gist.h b/src/include/access/gist.h index db78e60eeab..b3f4e02cbfd 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -40,7 +40,7 @@ #define GIST_FETCH_PROC 9 #define GIST_OPTIONS_PROC 10 #define GIST_SORTSUPPORT_PROC 11 -#define GIST_STRATNUM_PROC 12 +#define GIST_TRANSLATE_CMPTYPE_PROC 12 #define GISTNProcs 12 /* diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index e48fe434cd3..a2bd5a897f8 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -21,6 +21,7 @@ #include "access/skey.h" #include "access/table.h" /* for backward compatibility */ #include "access/tableam.h" +#include "commands/vacuum.h" #include "nodes/lockoptions.h" #include "nodes/primnodes.h" #include "storage/bufpage.h" @@ -96,7 +97,7 @@ typedef struct HeapScanDescData uint32 rs_cindex; /* current tuple's index in vistuples */ uint32 rs_ntuples; /* number of visible tuples on page */ OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */ -} HeapScanDescData; +} HeapScanDescData; typedef struct HeapScanDescData *HeapScanDesc; typedef struct BitmapHeapScanDescData @@ -396,9 +397,8 @@ extern void log_heap_prune_and_freeze(Relation relation, Buffer buffer, OffsetNumber *unused, int nunused); /* in heap/vacuumlazy.c */ -struct VacuumParams; extern void heap_vacuum_rel(Relation rel, - struct VacuumParams *params, BufferAccessStrategy bstrategy); + const VacuumParams params, BufferAccessStrategy bstrategy); /* in heap/heapam_visibility.c */ extern bool HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot snapshot, diff --git a/src/include/access/multixact.h b/src/include/access/multixact.h index 4e6b0eec2ff..b876e98f46e 100644 --- a/src/include/access/multixact.h +++ b/src/include/access/multixact.h @@ -11,6 +11,7 @@ #ifndef MULTIXACT_H #define MULTIXACT_H +#include "access/transam.h" #include "access/xlogreader.h" #include "lib/stringinfo.h" #include "storage/sync.h" @@ -119,7 +120,7 @@ extern int multixactmemberssyncfiletag(const FileTag *ftag, char *path); extern void AtEOXact_MultiXact(void); extern void AtPrepare_MultiXact(void); -extern void PostPrepare_MultiXact(TransactionId xid); +extern void PostPrepare_MultiXact(FullTransactionId fxid); extern Size MultiXactShmemSize(void); extern void MultiXactShmemInit(void); @@ -145,11 +146,11 @@ extern void MultiXactAdvanceNextMXact(MultiXactId minMulti, extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB); extern int MultiXactMemberFreezeThreshold(void); -extern void multixact_twophase_recover(TransactionId xid, uint16 info, +extern void multixact_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void multixact_twophase_postcommit(TransactionId xid, uint16 info, +extern void multixact_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void multixact_twophase_postabort(TransactionId xid, uint16 info, +extern void multixact_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); extern void multixact_redo(XLogReaderState *record); diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index ebca02588d3..e709d2e0afe 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -939,7 +939,7 @@ typedef BTVacuumPostingData *BTVacuumPosting; * processing. This approach minimizes lock/unlock traffic. We must always * drop the lock to make it okay for caller to process the returned items. * Whether or not we can also release the pin during this window will vary. - * We drop the pin eagerly (when safe) to avoid blocking progress by VACUUM + * We drop the pin (when so->dropPin) to avoid blocking progress by VACUUM * (see nbtree/README section about making concurrent TID recycling safe). * We'll always release both the lock and the pin on the current page before * moving on to its sibling page. @@ -967,7 +967,7 @@ typedef struct BTScanPosData BlockNumber currPage; /* page referenced by items array */ BlockNumber prevPage; /* currPage's left link */ BlockNumber nextPage; /* currPage's right link */ - XLogRecPtr lsn; /* currPage's LSN */ + XLogRecPtr lsn; /* currPage's LSN (when so->dropPin) */ /* scan direction for the saved position's call to _bt_readpage */ ScanDirection dir; @@ -1070,6 +1070,7 @@ typedef struct BTScanOpaqueData /* info about killed items if any (killedItems is NULL if never used) */ int *killedItems; /* currPos.items indexes of killed items */ int numKilled; /* number of currently stored items */ + bool dropPin; /* drop leaf pin before btgettuple returns? */ /* * If we are doing an index-only scan, these are the tuple storage diff --git a/src/include/access/slru.h b/src/include/access/slru.h index e142800aab2..20dbd1e0070 100644 --- a/src/include/access/slru.h +++ b/src/include/access/slru.h @@ -187,6 +187,7 @@ extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns, int bank_tranche_id, SyncRequestHandler sync_handler, bool long_segment_names); extern int SimpleLruZeroPage(SlruCtl ctl, int64 pageno); +extern void SimpleLruZeroAndWritePage(SlruCtl ctl, int64 pageno); extern int SimpleLruReadPage(SlruCtl ctl, int64 pageno, bool write_ok, TransactionId xid); extern int SimpleLruReadPage_ReadOnly(SlruCtl ctl, int64 pageno, diff --git a/src/include/access/tableam.h b/src/include/access/tableam.h index 8713e12cbfb..1c9e802a6b1 100644 --- a/src/include/access/tableam.h +++ b/src/include/access/tableam.h @@ -20,6 +20,7 @@ #include "access/relscan.h" #include "access/sdir.h" #include "access/xact.h" +#include "commands/vacuum.h" #include "executor/tuptable.h" #include "storage/read_stream.h" #include "utils/rel.h" @@ -36,7 +37,6 @@ extern PGDLLIMPORT bool synchronize_seqscans; struct BulkInsertStateData; struct IndexInfo; struct SampleScanState; -struct VacuumParams; struct ValidateIndexState; /* @@ -645,7 +645,7 @@ typedef struct TableAmRoutine * integrate with autovacuum's scheduling. */ void (*relation_vacuum) (Relation rel, - struct VacuumParams *params, + const VacuumParams params, BufferAccessStrategy bstrategy); /* @@ -1664,7 +1664,7 @@ table_relation_copy_for_cluster(Relation OldTable, Relation NewTable, * routine, even if (for ANALYZE) it is part of the same VACUUM command. */ static inline void -table_relation_vacuum(Relation rel, struct VacuumParams *params, +table_relation_vacuum(Relation rel, const VacuumParams params, BufferAccessStrategy bstrategy) { rel->rd_tableam->relation_vacuum(rel, params, bstrategy); diff --git a/src/include/access/twophase.h b/src/include/access/twophase.h index 9fa82355033..509bdad9a5d 100644 --- a/src/include/access/twophase.h +++ b/src/include/access/twophase.h @@ -36,10 +36,10 @@ extern void PostPrepare_Twophase(void); extern TransactionId TwoPhaseGetXidByVirtualXID(VirtualTransactionId vxid, bool *have_more); -extern PGPROC *TwoPhaseGetDummyProc(TransactionId xid, bool lock_held); -extern int TwoPhaseGetDummyProcNumber(TransactionId xid, bool lock_held); +extern PGPROC *TwoPhaseGetDummyProc(FullTransactionId fxid, bool lock_held); +extern int TwoPhaseGetDummyProcNumber(FullTransactionId fxid, bool lock_held); -extern GlobalTransaction MarkAsPreparing(TransactionId xid, const char *gid, +extern GlobalTransaction MarkAsPreparing(FullTransactionId fxid, const char *gid, TimestampTz prepared_at, Oid owner, Oid databaseid); @@ -56,8 +56,9 @@ extern void CheckPointTwoPhase(XLogRecPtr redo_horizon); extern void FinishPreparedTransaction(const char *gid, bool isCommit); -extern void PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, - XLogRecPtr end_lsn, RepOriginId origin_id); +extern void PrepareRedoAdd(FullTransactionId fxid, char *buf, + XLogRecPtr start_lsn, XLogRecPtr end_lsn, + RepOriginId origin_id); extern void PrepareRedoRemove(TransactionId xid, bool giveWarning); extern void restoreTwoPhaseData(void); extern bool LookupGXact(const char *gid, XLogRecPtr prepare_end_lsn, diff --git a/src/include/access/twophase_rmgr.h b/src/include/access/twophase_rmgr.h index 3ed154bb231..8f576402e36 100644 --- a/src/include/access/twophase_rmgr.h +++ b/src/include/access/twophase_rmgr.h @@ -14,7 +14,9 @@ #ifndef TWOPHASE_RMGR_H #define TWOPHASE_RMGR_H -typedef void (*TwoPhaseCallback) (TransactionId xid, uint16 info, +#include "access/transam.h" + +typedef void (*TwoPhaseCallback) (FullTransactionId fxid, uint16 info, void *recdata, uint32 len); typedef uint8 TwoPhaseRmgrId; diff --git a/src/include/access/xlogdefs.h b/src/include/access/xlogdefs.h index 9e41c9f6e84..514f03df0b6 100644 --- a/src/include/access/xlogdefs.h +++ b/src/include/access/xlogdefs.h @@ -38,7 +38,10 @@ typedef uint64 XLogRecPtr; /* * Handy macro for printing XLogRecPtr in conventional format, e.g., * - * printf("%X/%X", LSN_FORMAT_ARGS(lsn)); + * printf("%X/08X", LSN_FORMAT_ARGS(lsn)); + * + * To avoid breaking translatable messages, we're directly applying the + * LSN format instead of using a macro. */ #define LSN_FORMAT_ARGS(lsn) (AssertVariableIsOfTypeMacro((lsn), XLogRecPtr), (uint32) ((lsn) >> 32)), ((uint32) (lsn)) diff --git a/src/include/access/xloginsert.h b/src/include/access/xloginsert.h index cf057f033a2..d6a71415d4f 100644 --- a/src/include/access/xloginsert.h +++ b/src/include/access/xloginsert.h @@ -44,6 +44,7 @@ extern void XLogBeginInsert(void); extern void XLogSetRecordFlags(uint8 flags); extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info); +extern XLogRecPtr XLogSimpleInsertInt64(RmgrId rmid, uint8 info, int64 value); extern void XLogEnsureRecordSpace(int max_block_id, int ndatas); extern void XLogRegisterData(const void *data, uint32 len); extern void XLogRegisterBuffer(uint8 block_id, Buffer buffer, uint8 flags); diff --git a/src/include/c.h b/src/include/c.h index 8cdc16a0f4a..04fd23577de 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -376,25 +376,7 @@ * pretty trivial: VA_ARGS_NARGS_() returns its 64th argument, and we set up * the call so that that is the appropriate one of the list of constants. * This idea is due to Laurent Deniau. - * - * MSVC has an implementation of __VA_ARGS__ that doesn't conform to the - * standard unless you use the /Zc:preprocessor compiler flag, but that - * isn't available before Visual Studio 2019. For now, use a different - * definition that also works on older compilers. */ -#ifdef _MSC_VER -#define EXPAND(args) args -#define VA_ARGS_NARGS(...) \ - VA_ARGS_NARGS_ EXPAND((__VA_ARGS__, \ - 63,62,61,60, \ - 59,58,57,56,55,54,53,52,51,50, \ - 49,48,47,46,45,44,43,42,41,40, \ - 39,38,37,36,35,34,33,32,31,30, \ - 29,28,27,26,25,24,23,22,21,20, \ - 19,18,17,16,15,14,13,12,11,10, \ - 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)) -#else - #define VA_ARGS_NARGS(...) \ VA_ARGS_NARGS_(__VA_ARGS__, \ 63,62,61,60, \ @@ -404,7 +386,6 @@ 29,28,27,26,25,24,23,22,21,20, \ 19,18,17,16,15,14,13,12,11,10, \ 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) -#endif #define VA_ARGS_NARGS_( \ _01,_02,_03,_04,_05,_06,_07,_08,_09,_10, \ diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 82988d24433..ff9ffd9d474 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202505071 +#define CATALOG_VERSION_NO 202506301 #endif diff --git a/src/include/catalog/pg_amproc.dat b/src/include/catalog/pg_amproc.dat index 92505148998..e3477500baa 100644 --- a/src/include/catalog/pg_amproc.dat +++ b/src/include/catalog/pg_amproc.dat @@ -533,7 +533,7 @@ amprocrighttype => 'box', amprocnum => '8', amproc => 'gist_box_distance' }, { amprocfamily => 'gist/box_ops', amproclefttype => 'any', amprocrighttype => 'any', amprocnum => '12', - amproc => 'gist_stratnum_common' }, + amproc => 'gist_translate_cmptype_common' }, { amprocfamily => 'gist/poly_ops', amproclefttype => 'polygon', amprocrighttype => 'polygon', amprocnum => '1', amproc => 'gist_poly_consistent' }, @@ -555,7 +555,7 @@ amproc => 'gist_poly_distance' }, { amprocfamily => 'gist/poly_ops', amproclefttype => 'any', amprocrighttype => 'any', amprocnum => '12', - amproc => 'gist_stratnum_common' }, + amproc => 'gist_translate_cmptype_common' }, { amprocfamily => 'gist/circle_ops', amproclefttype => 'circle', amprocrighttype => 'circle', amprocnum => '1', amproc => 'gist_circle_consistent' }, @@ -576,7 +576,7 @@ amproc => 'gist_circle_distance' }, { amprocfamily => 'gist/circle_ops', amproclefttype => 'any', amprocrighttype => 'any', amprocnum => '12', - amproc => 'gist_stratnum_common' }, + amproc => 'gist_translate_cmptype_common' }, { amprocfamily => 'gist/tsvector_ops', amproclefttype => 'tsvector', amprocrighttype => 'tsvector', amprocnum => '1', amproc => 'gtsvector_consistent(internal,tsvector,int2,oid,internal)' }, @@ -636,7 +636,7 @@ amproc => 'range_sortsupport' }, { amprocfamily => 'gist/range_ops', amproclefttype => 'any', amprocrighttype => 'any', amprocnum => '12', - amproc => 'gist_stratnum_common' }, + amproc => 'gist_translate_cmptype_common' }, { amprocfamily => 'gist/network_ops', amproclefttype => 'inet', amprocrighttype => 'inet', amprocnum => '1', amproc => 'inet_gist_consistent' }, @@ -655,7 +655,7 @@ amprocrighttype => 'inet', amprocnum => '9', amproc => 'inet_gist_fetch' }, { amprocfamily => 'gist/network_ops', amproclefttype => 'any', amprocrighttype => 'any', amprocnum => '12', - amproc => 'gist_stratnum_common' }, + amproc => 'gist_translate_cmptype_common' }, { amprocfamily => 'gist/multirange_ops', amproclefttype => 'anymultirange', amprocrighttype => 'anymultirange', amprocnum => '1', amproc => 'multirange_gist_consistent' }, @@ -676,7 +676,7 @@ amproc => 'range_gist_same' }, { amprocfamily => 'gist/multirange_ops', amproclefttype => 'any', amprocrighttype => 'any', amprocnum => '12', - amproc => 'gist_stratnum_common' }, + amproc => 'gist_translate_cmptype_common' }, # gin { amprocfamily => 'gin/array_ops', amproclefttype => 'anyarray', diff --git a/src/include/catalog/pg_authid.dat b/src/include/catalog/pg_authid.dat index eb4dab5c6aa..c881c13adf1 100644 --- a/src/include/catalog/pg_authid.dat +++ b/src/include/catalog/pg_authid.dat @@ -99,7 +99,7 @@ rolcreaterole => 'f', rolcreatedb => 'f', rolcanlogin => 'f', rolreplication => 'f', rolbypassrls => 'f', rolconnlimit => '-1', rolpassword => '_null_', rolvaliduntil => '_null_' }, -{ oid => '8916', oid_symbol => 'ROLE_PG_SIGNAL_AUTOVACUUM_WORKER', +{ oid => '6392', oid_symbol => 'ROLE_PG_SIGNAL_AUTOVACUUM_WORKER', rolname => 'pg_signal_autovacuum_worker', rolsuper => 'f', rolinherit => 't', rolcreaterole => 'f', rolcreatedb => 'f', rolcanlogin => 'f', rolreplication => 'f', rolbypassrls => 'f', rolconnlimit => '-1', diff --git a/src/include/catalog/pg_cast.dat b/src/include/catalog/pg_cast.dat index ab46be606f0..fbfd669587f 100644 --- a/src/include/catalog/pg_cast.dat +++ b/src/include/catalog/pg_cast.dat @@ -281,6 +281,20 @@ castcontext => 'a', castmethod => 'f' }, { castsource => 'regnamespace', casttarget => 'int4', castfunc => '0', castcontext => 'a', castmethod => 'b' }, +{ castsource => 'oid', casttarget => 'regdatabase', castfunc => '0', + castcontext => 'i', castmethod => 'b' }, +{ castsource => 'regdatabase', casttarget => 'oid', castfunc => '0', + castcontext => 'i', castmethod => 'b' }, +{ castsource => 'int8', casttarget => 'regdatabase', castfunc => 'oid', + castcontext => 'i', castmethod => 'f' }, +{ castsource => 'int2', casttarget => 'regdatabase', castfunc => 'int4(int2)', + castcontext => 'i', castmethod => 'f' }, +{ castsource => 'int4', casttarget => 'regdatabase', castfunc => '0', + castcontext => 'i', castmethod => 'b' }, +{ castsource => 'regdatabase', casttarget => 'int8', castfunc => 'int8(oid)', + castcontext => 'a', castmethod => 'f' }, +{ castsource => 'regdatabase', casttarget => 'int4', castfunc => '0', + castcontext => 'a', castmethod => 'b' }, # String category { castsource => 'text', casttarget => 'bpchar', castfunc => '0', diff --git a/src/include/catalog/pg_collation.dat b/src/include/catalog/pg_collation.dat index fb76c421931..8cfd09f0314 100644 --- a/src/include/catalog/pg_collation.dat +++ b/src/include/catalog/pg_collation.dat @@ -33,7 +33,8 @@ descr => 'sorts by Unicode code point; Unicode and POSIX character semantics', collname => 'pg_c_utf8', collprovider => 'b', collencoding => '6', colllocale => 'C.UTF-8', collversion => '1' }, -{ oid => '9535', descr => 'sorts by Unicode code point; Unicode character semantics', +{ oid => '6411', + descr => 'sorts by Unicode code point; Unicode character semantics', collname => 'pg_unicode_fast', collprovider => 'b', collencoding => '6', colllocale => 'PG_UNICODE_FAST', collversion => '1' }, diff --git a/src/include/catalog/pg_index.h b/src/include/catalog/pg_index.h index 4392b9d221d..731d3938169 100644 --- a/src/include/catalog/pg_index.h +++ b/src/include/catalog/pg_index.h @@ -69,7 +69,7 @@ CATALOG(pg_index,2610,IndexRelationId) BKI_SCHEMA_MACRO */ typedef FormData_pg_index *Form_pg_index; -DECLARE_TOAST_WITH_MACRO(pg_index, 8149, 8150, PgIndexToastTable, PgIndexToastIndex); +DECLARE_TOAST_WITH_MACRO(pg_index, 6351, 6352, PgIndexToastTable, PgIndexToastIndex); DECLARE_INDEX(pg_index_indrelid_index, 2678, IndexIndrelidIndexId, pg_index, btree(indrelid oid_ops)); DECLARE_UNIQUE_INDEX_PKEY(pg_index_indexrelid_index, 2679, IndexRelidIndexId, pg_index, btree(indexrelid oid_ops)); diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 62beb71da28..d4650947c63 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -1004,7 +1004,7 @@ { oid => '3129', descr => 'sort support', proname => 'btint2sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btint2sortsupport' }, -{ oid => '9290', descr => 'skip support', +{ oid => '6402', descr => 'skip support', proname => 'btint2skipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btint2skipsupport' }, { oid => '351', descr => 'less-equal-greater', @@ -1013,7 +1013,7 @@ { oid => '3130', descr => 'sort support', proname => 'btint4sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btint4sortsupport' }, -{ oid => '9291', descr => 'skip support', +{ oid => '6403', descr => 'skip support', proname => 'btint4skipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btint4skipsupport' }, { oid => '842', descr => 'less-equal-greater', @@ -1022,7 +1022,7 @@ { oid => '3131', descr => 'sort support', proname => 'btint8sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btint8sortsupport' }, -{ oid => '9292', descr => 'skip support', +{ oid => '6404', descr => 'skip support', proname => 'btint8skipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btint8skipsupport' }, { oid => '354', descr => 'less-equal-greater', @@ -1043,7 +1043,7 @@ { oid => '3134', descr => 'sort support', proname => 'btoidsortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btoidsortsupport' }, -{ oid => '9293', descr => 'skip support', +{ oid => '6405', descr => 'skip support', proname => 'btoidskipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btoidskipsupport' }, { oid => '404', descr => 'less-equal-greater', @@ -1052,7 +1052,7 @@ { oid => '358', descr => 'less-equal-greater', proname => 'btcharcmp', proleakproof => 't', prorettype => 'int4', proargtypes => 'char char', prosrc => 'btcharcmp' }, -{ oid => '9294', descr => 'skip support', +{ oid => '6406', descr => 'skip support', proname => 'btcharskipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btcharskipsupport' }, { oid => '359', descr => 'less-equal-greater', @@ -1180,24 +1180,24 @@ proname => 'name', proleakproof => 't', prorettype => 'name', proargtypes => 'bpchar', prosrc => 'bpchar_name' }, -{ oid => '8577', descr => 'convert int2 to bytea', +{ oid => '6367', descr => 'convert int2 to bytea', proname => 'bytea', proleakproof => 't', prorettype => 'bytea', proargtypes => 'int2', prosrc => 'int2_bytea' }, -{ oid => '8578', descr => 'convert int4 to bytea', +{ oid => '6368', descr => 'convert int4 to bytea', proname => 'bytea', proleakproof => 't', prorettype => 'bytea', proargtypes => 'int4', prosrc => 'int4_bytea' }, -{ oid => '8579', descr => 'convert int8 to bytea', +{ oid => '6369', descr => 'convert int8 to bytea', proname => 'bytea', proleakproof => 't', prorettype => 'bytea', proargtypes => 'int8', prosrc => 'int8_bytea' }, -{ oid => '8580', descr => 'convert bytea to int2', - proname => 'int2', prorettype => 'int2', - proargtypes => 'bytea', prosrc => 'bytea_int2' }, -{ oid => '8581', descr => 'convert bytea to int4', - proname => 'int4', prorettype => 'int4', - proargtypes => 'bytea', prosrc => 'bytea_int4' }, -{ oid => '8582', descr => 'convert bytea to int8', - proname => 'int8', prorettype => 'int8', - proargtypes => 'bytea', prosrc => 'bytea_int8' }, +{ oid => '6370', descr => 'convert bytea to int2', + proname => 'int2', prorettype => 'int2', proargtypes => 'bytea', + prosrc => 'bytea_int2' }, +{ oid => '6371', descr => 'convert bytea to int4', + proname => 'int4', prorettype => 'int4', proargtypes => 'bytea', + prosrc => 'bytea_int4' }, +{ oid => '6372', descr => 'convert bytea to int8', + proname => 'int8', prorettype => 'int8', proargtypes => 'bytea', + prosrc => 'bytea_int8' }, { oid => '449', descr => 'hash', proname => 'hashint2', prorettype => 'int4', proargtypes => 'int2', @@ -1259,10 +1259,10 @@ { oid => '772', descr => 'hash', proname => 'hashvarlenaextended', prorettype => 'int8', proargtypes => 'internal int8', prosrc => 'hashvarlenaextended' }, -{ oid => '9708', descr => 'hash', +{ oid => '6413', descr => 'hash', proname => 'hashbytea', prorettype => 'int4', proargtypes => 'bytea', prosrc => 'hashbytea' }, -{ oid => '9709', descr => 'hash', +{ oid => '6414', descr => 'hash', proname => 'hashbyteaextended', prorettype => 'int8', proargtypes => 'bytea int8', prosrc => 'hashbyteaextended' }, { oid => '457', descr => 'hash', @@ -1301,34 +1301,34 @@ { oid => '781', descr => 'hash', proname => 'hashmacaddr8extended', prorettype => 'int8', proargtypes => 'macaddr8 int8', prosrc => 'hashmacaddr8extended' }, -{ oid => '9710', descr => 'hash', +{ oid => '6415', descr => 'hash', proname => 'hashdate', prorettype => 'int4', proargtypes => 'date', prosrc => 'hashdate' }, -{ oid => '9711', descr => 'hash', +{ oid => '6416', descr => 'hash', proname => 'hashdateextended', prorettype => 'int8', proargtypes => 'date int8', prosrc => 'hashdateextended' }, -{ oid => '9712', descr => 'hash', +{ oid => '6417', descr => 'hash', proname => 'hashbool', prorettype => 'int4', proargtypes => 'bool', prosrc => 'hashbool' }, -{ oid => '9713', descr => 'hash', +{ oid => '6418', descr => 'hash', proname => 'hashboolextended', prorettype => 'int8', proargtypes => 'bool int8', prosrc => 'hashboolextended' }, -{ oid => '9714', descr => 'hash', +{ oid => '6419', descr => 'hash', proname => 'hashxid', prorettype => 'int4', proargtypes => 'xid', prosrc => 'hashxid' }, -{ oid => '9715', descr => 'hash', +{ oid => '6420', descr => 'hash', proname => 'hashxidextended', prorettype => 'int8', proargtypes => 'xid int8', prosrc => 'hashxidextended' }, -{ oid => '9716', descr => 'hash', +{ oid => '6421', descr => 'hash', proname => 'hashxid8', prorettype => 'int4', proargtypes => 'xid8', prosrc => 'hashxid8' }, -{ oid => '9717', descr => 'hash', +{ oid => '6422', descr => 'hash', proname => 'hashxid8extended', prorettype => 'int8', proargtypes => 'xid8 int8', prosrc => 'hashxid8extended' }, -{ oid => '9718', descr => 'hash', +{ oid => '6423', descr => 'hash', proname => 'hashcid', prorettype => 'int4', proargtypes => 'cid', prosrc => 'hashcid' }, -{ oid => '9719', descr => 'hash', +{ oid => '6424', descr => 'hash', proname => 'hashcidextended', prorettype => 'int8', proargtypes => 'cid int8', prosrc => 'hashcidextended' }, @@ -1348,10 +1348,10 @@ proname => 'text_smaller', proleakproof => 't', prorettype => 'text', proargtypes => 'text text', prosrc => 'text_smaller' }, -{ oid => '8920', descr => 'larger of two', +{ oid => '6393', descr => 'larger of two', proname => 'bytea_larger', proleakproof => 't', prorettype => 'bytea', proargtypes => 'bytea bytea', prosrc => 'bytea_larger' }, -{ oid => '8921', descr => 'smaller of two', +{ oid => '6394', descr => 'smaller of two', proname => 'bytea_smaller', proleakproof => 't', prorettype => 'bytea', proargtypes => 'bytea bytea', prosrc => 'bytea_smaller' }, @@ -1533,7 +1533,7 @@ { oid => '6163', descr => 'number of set bits', proname => 'bit_count', prorettype => 'int8', proargtypes => 'bytea', prosrc => 'bytea_bit_count' }, -{ oid => '8694', descr => 'reverse bytea', +{ oid => '6382', descr => 'reverse bytea', proname => 'reverse', prorettype => 'bytea', proargtypes => 'bytea', prosrc => 'bytea_reverse' }, @@ -1638,7 +1638,7 @@ proname => 'array_append', prosupport => 'array_append_support', proisstrict => 'f', prorettype => 'anycompatiblearray', proargtypes => 'anycompatiblearray anycompatible', prosrc => 'array_append' }, -{ oid => '8680', descr => 'planner support for array_append', +{ oid => '6378', descr => 'planner support for array_append', proname => 'array_append_support', prorettype => 'internal', proargtypes => 'internal', prosrc => 'array_append_support' }, { oid => '379', descr => 'prepend element onto front of array', @@ -1646,7 +1646,7 @@ proisstrict => 'f', prorettype => 'anycompatiblearray', proargtypes => 'anycompatible anycompatiblearray', prosrc => 'array_prepend' }, -{ oid => '8681', descr => 'planner support for array_prepend', +{ oid => '6379', descr => 'planner support for array_prepend', proname => 'array_prepend_support', prorettype => 'internal', proargtypes => 'internal', prosrc => 'array_prepend_support' }, { oid => '383', @@ -1784,17 +1784,17 @@ { oid => '6216', descr => 'take samples from array', proname => 'array_sample', provolatile => 'v', prorettype => 'anyarray', proargtypes => 'anyarray int4', prosrc => 'array_sample' }, -{ oid => '8686', descr => 'reverse array', +{ oid => '6381', descr => 'reverse array', proname => 'array_reverse', prorettype => 'anyarray', proargtypes => 'anyarray', prosrc => 'array_reverse' }, -{ oid => '8810', descr => 'sort array', +{ oid => '6388', descr => 'sort array', proname => 'array_sort', prorettype => 'anyarray', proargtypes => 'anyarray', prosrc => 'array_sort' }, -{ oid => '8811', descr => 'sort array', +{ oid => '6389', descr => 'sort array', proname => 'array_sort', prorettype => 'anyarray', proargtypes => 'anyarray bool', proargnames => '{array,descending}', prosrc => 'array_sort_order' }, -{ oid => '8812', descr => 'sort array', +{ oid => '6390', descr => 'sort array', proname => 'array_sort', prorettype => 'anyarray', proargtypes => 'anyarray bool bool', proargnames => '{array,descending,nulls_first}', @@ -2315,7 +2315,7 @@ { oid => '3136', descr => 'sort support', proname => 'date_sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'date_sortsupport' }, -{ oid => '9295', descr => 'skip support', +{ oid => '6407', descr => 'skip support', proname => 'date_skipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'date_skipsupport' }, { oid => '4133', descr => 'window RANGE support', @@ -3433,7 +3433,7 @@ proname => 'pg_sequence_last_value', provolatile => 'v', proparallel => 'u', prorettype => 'int8', proargtypes => 'regclass', prosrc => 'pg_sequence_last_value' }, -{ oid => '9876', descr => 'return sequence tuple, for use by pg_dump', +{ oid => '6427', descr => 'return sequence tuple, for use by pg_dump', proname => 'pg_get_sequence_data', provolatile => 'v', proparallel => 'u', prorettype => 'record', proargtypes => 'regclass', proallargtypes => '{regclass,int8,bool}', proargmodes => '{i,o,o}', @@ -3594,10 +3594,11 @@ proname => 'erfc', prorettype => 'float8', proargtypes => 'float8', prosrc => 'derfc' }, -{ oid => '8702', descr => 'gamma function', +{ oid => '6383', descr => 'gamma function', proname => 'gamma', prorettype => 'float8', proargtypes => 'float8', prosrc => 'dgamma' }, -{ oid => '8703', descr => 'natural logarithm of absolute value of gamma function', +{ oid => '6384', + descr => 'natural logarithm of absolute value of gamma function', proname => 'lgamma', prorettype => 'float8', proargtypes => 'float8', prosrc => 'dlgamma' }, @@ -3688,7 +3689,7 @@ { oid => '872', descr => 'capitalize each word', proname => 'initcap', prorettype => 'text', proargtypes => 'text', prosrc => 'initcap' }, -{ oid => '9569', descr => 'fold case', +{ oid => '6412', descr => 'fold case', proname => 'casefold', prorettype => 'text', proargtypes => 'text', prosrc => 'casefold' }, { oid => '873', descr => 'left-pad string to length', @@ -4515,7 +4516,7 @@ { oid => '1693', descr => 'less-equal-greater', proname => 'btboolcmp', proleakproof => 't', prorettype => 'int4', proargtypes => 'bool bool', prosrc => 'btboolcmp' }, -{ oid => '9296', descr => 'skip support', +{ oid => '6408', descr => 'skip support', proname => 'btboolskipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'btboolskipsupport' }, @@ -5450,17 +5451,17 @@ prorettype => 'bool', proargtypes => 'oid text', prosrc => 'has_any_column_privilege_id' }, -{ oid => '8048', +{ oid => '6348', descr => 'user privilege on large object by username, large object oid', proname => 'has_largeobject_privilege', procost => '10', provolatile => 's', prorettype => 'bool', proargtypes => 'name oid text', prosrc => 'has_largeobject_privilege_name_id' }, -{ oid => '8049', +{ oid => '6349', descr => 'current user privilege on large object by large object oid', proname => 'has_largeobject_privilege', procost => '10', provolatile => 's', prorettype => 'bool', proargtypes => 'oid text', prosrc => 'has_largeobject_privilege_id' }, -{ oid => '8050', +{ oid => '6350', descr => 'user privilege on large object by user oid, large object oid', proname => 'has_largeobject_privilege', procost => '10', provolatile => 's', prorettype => 'bool', proargtypes => 'oid oid text', @@ -5611,19 +5612,19 @@ proname => 'pg_stat_get_autoanalyze_count', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', prosrc => 'pg_stat_get_autoanalyze_count' }, -{ oid => '8406', descr => 'total vacuum time, in milliseconds', +{ oid => '6358', descr => 'total vacuum time, in milliseconds', proname => 'pg_stat_get_total_vacuum_time', provolatile => 's', proparallel => 'r', prorettype => 'float8', proargtypes => 'oid', prosrc => 'pg_stat_get_total_vacuum_time' }, -{ oid => '8407', descr => 'total autovacuum time, in milliseconds', +{ oid => '6359', descr => 'total autovacuum time, in milliseconds', proname => 'pg_stat_get_total_autovacuum_time', provolatile => 's', proparallel => 'r', prorettype => 'float8', proargtypes => 'oid', prosrc => 'pg_stat_get_total_autovacuum_time' }, -{ oid => '8408', descr => 'total analyze time, in milliseconds', +{ oid => '6360', descr => 'total analyze time, in milliseconds', proname => 'pg_stat_get_total_analyze_time', provolatile => 's', proparallel => 'r', prorettype => 'float8', proargtypes => 'oid', prosrc => 'pg_stat_get_total_analyze_time' }, -{ oid => '8409', descr => 'total autoanalyze time, in milliseconds', +{ oid => '6361', descr => 'total autoanalyze time, in milliseconds', proname => 'pg_stat_get_total_autoanalyze_time', provolatile => 's', proparallel => 'r', prorettype => 'float8', proargtypes => 'oid', prosrc => 'pg_stat_get_total_autoanalyze_time' }, @@ -5900,12 +5901,12 @@ proname => 'pg_stat_get_db_sessions_killed', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', prosrc => 'pg_stat_get_db_sessions_killed' }, -{ oid => '8403', +{ oid => '6355', descr => 'statistics: number of parallel workers planned to be launched by queries', proname => 'pg_stat_get_db_parallel_workers_to_launch', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', prosrc => 'pg_stat_get_db_parallel_workers_to_launch' }, -{ oid => '8404', +{ oid => '6356', descr => 'statistics: number of parallel workers effectively launched by queries', proname => 'pg_stat_get_db_parallel_workers_launched', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => 'oid', @@ -5927,7 +5928,7 @@ proname => 'pg_stat_get_checkpointer_num_requested', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_checkpointer_num_requested' }, -{ oid => '8599', +{ oid => '6377', descr => 'statistics: number of checkpoints performed by the checkpointer', proname => 'pg_stat_get_checkpointer_num_performed', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => '', @@ -5954,7 +5955,7 @@ proname => 'pg_stat_get_checkpointer_buffers_written', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_checkpointer_buffers_written' }, -{ oid => '8573', +{ oid => '6366', descr => 'statistics: number of SLRU buffers written during checkpoints and restartpoints', proname => 'pg_stat_get_checkpointer_slru_written', provolatile => 's', proparallel => 'r', prorettype => 'int8', proargtypes => '', @@ -6000,7 +6001,7 @@ proargnames => '{backend_type,object,context,reads,read_bytes,read_time,writes,write_bytes,write_time,writebacks,writeback_time,extends,extend_bytes,extend_time,hits,evictions,reuses,fsyncs,fsync_time,stats_reset}', prosrc => 'pg_stat_get_io' }, -{ oid => '8806', descr => 'statistics: backend IO statistics', +{ oid => '6386', descr => 'statistics: backend IO statistics', proname => 'pg_stat_get_backend_io', prorows => '5', proretset => 't', provolatile => 'v', proparallel => 'r', prorettype => 'record', proargtypes => 'int4', @@ -6016,7 +6017,7 @@ proargmodes => '{o,o,o,o,o}', proargnames => '{wal_records,wal_fpi,wal_bytes,wal_buffers_full,stats_reset}', prosrc => 'pg_stat_get_wal' }, -{ oid => '8037', descr => 'statistics: backend WAL activity', +{ oid => '6313', descr => 'statistics: backend WAL activity', proname => 'pg_stat_get_backend_wal', provolatile => 'v', proparallel => 'r', prorettype => 'record', proargtypes => 'int4', proallargtypes => '{int4,int8,int8,numeric,int8,timestamptz}', @@ -6155,7 +6156,7 @@ proname => 'pg_stat_reset_single_function_counters', provolatile => 'v', prorettype => 'void', proargtypes => 'oid', prosrc => 'pg_stat_reset_single_function_counters' }, -{ oid => '8807', descr => 'statistics: reset statistics for a single backend', +{ oid => '6387', descr => 'statistics: reset statistics for a single backend', proname => 'pg_stat_reset_backend_stats', provolatile => 'v', prorettype => 'void', proargtypes => 'int4', prosrc => 'pg_stat_reset_backend_stats' }, @@ -6369,10 +6370,10 @@ { oid => '3411', descr => 'hash', proname => 'timestamp_hash_extended', prorettype => 'int8', proargtypes => 'timestamp int8', prosrc => 'timestamp_hash_extended' }, -{ oid => '9720', descr => 'hash', +{ oid => '6425', descr => 'hash', proname => 'timestamptz_hash', prorettype => 'int4', proargtypes => 'timestamptz', prosrc => 'timestamptz_hash' }, -{ oid => '9721', descr => 'hash', +{ oid => '6426', descr => 'hash', proname => 'timestamptz_hash_extended', prorettype => 'int8', proargtypes => 'timestamptz int8', prosrc => 'timestamptz_hash_extended' }, { oid => '2041', descr => 'intervals overlap?', @@ -6397,7 +6398,7 @@ { oid => '3137', descr => 'sort support', proname => 'timestamp_sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'timestamp_sortsupport' }, -{ oid => '9297', descr => 'skip support', +{ oid => '6409', descr => 'skip support', proname => 'timestamp_skipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'timestamp_skipsupport' }, @@ -6593,7 +6594,7 @@ proname => 'pg_describe_object', provolatile => 's', prorettype => 'text', proargtypes => 'oid oid int4', prosrc => 'pg_describe_object' }, -{ oid => '8730', descr => 'get ACL for SQL object', +{ oid => '6385', descr => 'get ACL for SQL object', proname => 'pg_get_acl', provolatile => 's', prorettype => '_aclitem', proargtypes => 'oid oid int4', proargnames => '{classid,objid,objsubid}', prosrc => 'pg_get_acl' }, @@ -6792,7 +6793,7 @@ proargnames => '{rm_id, rm_name, rm_builtin}', prosrc => 'pg_get_wal_resource_managers' }, -{ oid => '8303', descr => 'get info about loaded modules', +{ oid => '6353', descr => 'get info about loaded modules', proname => 'pg_get_loaded_modules', prorows => '10', proretset => 't', provolatile => 'v', proparallel => 'r', prorettype => 'record', proargtypes => '', proallargtypes => '{text,text,text}', @@ -6992,7 +6993,7 @@ proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'anyarray', proargtypes => 'anyarray', prosrc => 'aggregate_dummy' }, -{ oid => '8595', descr => 'maximum value of all record input values', +{ oid => '6373', descr => 'maximum value of all record input values', proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'record', proargtypes => 'record', prosrc => 'aggregate_dummy' }, { oid => '2244', descr => 'maximum value of all bpchar input values', @@ -7010,7 +7011,7 @@ { oid => '5099', descr => 'maximum value of all xid8 input values', proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'xid8', proargtypes => 'xid8', prosrc => 'aggregate_dummy' }, -{ oid => '8922', descr => 'maximum value of all bytea input values', +{ oid => '6395', descr => 'maximum value of all bytea input values', proname => 'max', prokind => 'a', proisstrict => 'f', prorettype => 'bytea', proargtypes => 'bytea', prosrc => 'aggregate_dummy' }, @@ -7068,7 +7069,7 @@ proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'anyarray', proargtypes => 'anyarray', prosrc => 'aggregate_dummy' }, -{ oid => '8596', descr => 'minimum value of all record input values', +{ oid => '6374', descr => 'minimum value of all record input values', proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'record', proargtypes => 'record', prosrc => 'aggregate_dummy' }, { oid => '2245', descr => 'minimum value of all bpchar input values', @@ -7086,7 +7087,7 @@ { oid => '5100', descr => 'minimum value of all xid8 input values', proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'xid8', proargtypes => 'xid8', prosrc => 'aggregate_dummy' }, -{ oid => '8923', descr => 'minimum value of all bytea input values', +{ oid => '6396', descr => 'minimum value of all bytea input values', proname => 'min', prokind => 'a', proisstrict => 'f', prorettype => 'bytea', proargtypes => 'bytea', prosrc => 'aggregate_dummy' }, @@ -7454,6 +7455,17 @@ prorettype => 'regnamespace', proargtypes => 'text', prosrc => 'to_regnamespace' }, +{ oid => '8321', descr => 'I/O', + proname => 'regdatabasein', provolatile => 's', prorettype => 'regdatabase', + proargtypes => 'cstring', prosrc => 'regdatabasein' }, +{ oid => '8322', descr => 'I/O', + proname => 'regdatabaseout', provolatile => 's', prorettype => 'cstring', + proargtypes => 'regdatabase', prosrc => 'regdatabaseout' }, +{ oid => '8323', descr => 'convert database name to regdatabase', + proname => 'to_regdatabase', provolatile => 's', + prorettype => 'regdatabase', proargtypes => 'text', + prosrc => 'to_regdatabase' }, + { oid => '6210', descr => 'test whether string is valid input for data type', proname => 'pg_input_is_valid', provolatile => 's', prorettype => 'bool', proargtypes => 'text text', prosrc => 'pg_input_is_valid' }, @@ -7949,10 +7961,10 @@ proargtypes => 'internal', prosrc => 'tsm_system_handler' }, # CRC variants -{ oid => '8571', descr => 'CRC-32 value', +{ oid => '6364', descr => 'CRC-32 value', proname => 'crc32', proleakproof => 't', prorettype => 'int8', proargtypes => 'bytea', prosrc => 'crc32_bytea' }, -{ oid => '8572', descr => 'CRC-32C value', +{ oid => '6365', descr => 'CRC-32C value', proname => 'crc32c', proleakproof => 't', prorettype => 'int8', proargtypes => 'bytea', prosrc => 'crc32c_bytea' }, @@ -8312,6 +8324,12 @@ { oid => '4088', descr => 'I/O', proname => 'regnamespacesend', prorettype => 'bytea', proargtypes => 'regnamespace', prosrc => 'regnamespacesend' }, +{ oid => '8324', descr => 'I/O', + proname => 'regdatabaserecv', prorettype => 'regdatabase', + proargtypes => 'internal', prosrc => 'regdatabaserecv' }, +{ oid => '8325', descr => 'I/O', + proname => 'regdatabasesend', prorettype => 'bytea', + proargtypes => 'regdatabase', prosrc => 'regdatabasesend' }, { oid => '2456', descr => 'I/O', proname => 'bit_recv', prorettype => 'bit', proargtypes => 'internal oid int4', prosrc => 'bit_recv' }, @@ -8496,7 +8514,7 @@ proargmodes => '{o,o,o,o,o,o}', proargnames => '{name,statement,is_holdable,is_binary,is_scrollable,creation_time}', prosrc => 'pg_cursor' }, -{ oid => '9221', descr => 'get abbreviations from current timezone', +{ oid => '6401', descr => 'get abbreviations from current timezone', proname => 'pg_timezone_abbrevs_zone', prorows => '10', proretset => 't', provolatile => 's', prorettype => 'record', proargtypes => '', proallargtypes => '{text,interval,bool}', proargmodes => '{o,o,o}', @@ -8571,16 +8589,6 @@ prorettype => 'bool', proargtypes => 'int4', prosrc => 'pg_log_backend_memory_contexts' }, -# publishing memory contexts of the specified postgres process -{ oid => '2173', descr => 'publish memory contexts of the specified backend', - proname => 'pg_get_process_memory_contexts', provolatile => 'v', - prorows => '100', proretset => 't', proparallel => 'r', - prorettype => 'record', proargtypes => 'int4 bool float8', - proallargtypes => '{int4,bool,float8,text,text,text,_int4,int4,int8,int8,int8,int8,int8,int4,timestamptz}', - proargmodes => '{i,i,i,o,o,o,o,o,o,o,o,o,o,o,o}', - proargnames => '{pid, summary, timeout, name, ident, type, path, level, total_bytes, total_nblocks, free_bytes, free_chunks, used_bytes, num_agg_contexts, stats_timestamp}', - prosrc => 'pg_get_process_memory_contexts' }, - # non-persistent series generator { oid => '1066', descr => 'non-persistent series generator', proname => 'generate_series', prorows => '1000', @@ -8618,7 +8626,7 @@ prosupport => 'generate_series_numeric_support', proretset => 't', prorettype => 'numeric', proargtypes => 'numeric numeric', prosrc => 'generate_series_numeric' }, -{ oid => '8405', descr => 'planner support for generate_series', +{ oid => '6357', descr => 'planner support for generate_series', proname => 'generate_series_numeric_support', prorettype => 'internal', proargtypes => 'internal', prosrc => 'generate_series_numeric_support' }, { oid => '938', descr => 'non-persistent series generator', @@ -8638,7 +8646,7 @@ prorettype => 'timestamptz', proargtypes => 'timestamptz timestamptz interval text', prosrc => 'generate_series_timestamptz_at_zone' }, -{ oid => '8402', descr => 'planner support for generate_series', +{ oid => '6354', descr => 'planner support for generate_series', proname => 'generate_series_timestamp_support', prorettype => 'internal', proargtypes => 'internal', prosrc => 'generate_series_timestamp_support' }, @@ -9370,8 +9378,8 @@ proname => 'to_json', provolatile => 's', prorettype => 'json', proargtypes => 'anyelement', prosrc => 'to_json' }, { oid => '3261', descr => 'remove object fields with null values from json', - proname => 'json_strip_nulls', prorettype => 'json', proargtypes => 'json bool', - prosrc => 'json_strip_nulls' }, + proname => 'json_strip_nulls', prorettype => 'json', + proargtypes => 'json bool', prosrc => 'json_strip_nulls' }, { oid => '3947', proname => 'json_object_field', prorettype => 'json', @@ -9477,7 +9485,7 @@ { oid => '3300', descr => 'sort support', proname => 'uuid_sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'uuid_sortsupport' }, -{ oid => '9298', descr => 'skip support', +{ oid => '6410', descr => 'skip support', proname => 'uuid_skipsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'uuid_skipsupport' }, { oid => '2961', descr => 'I/O', @@ -9493,17 +9501,19 @@ proname => 'uuid_hash_extended', prorettype => 'int8', proargtypes => 'uuid int8', prosrc => 'uuid_hash_extended' }, { oid => '3432', descr => 'generate random UUID', - proname => 'gen_random_uuid', provolatile => 'v', - prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' }, -{ oid => '9895', descr => 'generate UUID version 4', - proname => 'uuidv4', provolatile => 'v', - prorettype => 'uuid', proargtypes => '', prosrc => 'gen_random_uuid' }, -{ oid => '9896', descr => 'generate UUID version 7', - proname => 'uuidv7', provolatile => 'v', - prorettype => 'uuid', proargtypes => '', prosrc => 'uuidv7' }, -{ oid => '9897', descr => 'generate UUID version 7 with a timestamp shifted by specified interval', - proname => 'uuidv7', provolatile => 'v', proargnames => '{shift}', - prorettype => 'uuid', proargtypes => 'interval', prosrc => 'uuidv7_interval' }, + proname => 'gen_random_uuid', provolatile => 'v', prorettype => 'uuid', + proargtypes => '', prosrc => 'gen_random_uuid' }, +{ oid => '6428', descr => 'generate UUID version 4', + proname => 'uuidv4', provolatile => 'v', prorettype => 'uuid', + proargtypes => '', prosrc => 'gen_random_uuid' }, +{ oid => '6429', descr => 'generate UUID version 7', + proname => 'uuidv7', provolatile => 'v', prorettype => 'uuid', + proargtypes => '', prosrc => 'uuidv7' }, +{ oid => '6430', + descr => 'generate UUID version 7 with a timestamp shifted by specified interval', + proname => 'uuidv7', provolatile => 'v', prorettype => 'uuid', + proargtypes => 'interval', proargnames => '{shift}', + prosrc => 'uuidv7_interval' }, { oid => '6342', descr => 'extract timestamp from UUID', proname => 'uuid_extract_timestamp', proleakproof => 't', prorettype => 'timestamptz', proargtypes => 'uuid', @@ -10309,8 +10319,8 @@ prorettype => 'jsonb', proargtypes => '', prosrc => 'jsonb_build_object_noargs' }, { oid => '3262', descr => 'remove object fields with null values from jsonb', - proname => 'jsonb_strip_nulls', prorettype => 'jsonb', proargtypes => 'jsonb bool', - prosrc => 'jsonb_strip_nulls' }, + proname => 'jsonb_strip_nulls', prorettype => 'jsonb', + proargtypes => 'jsonb bool', prosrc => 'jsonb_strip_nulls' }, { oid => '3478', proname => 'jsonb_object_field', prorettype => 'jsonb', @@ -10661,10 +10671,10 @@ { oid => '2987', descr => 'less-equal-greater', proname => 'btrecordcmp', prorettype => 'int4', proargtypes => 'record record', prosrc => 'btrecordcmp' }, -{ oid => '8597', descr => 'larger of two', +{ oid => '6375', descr => 'larger of two', proname => 'record_larger', prorettype => 'record', proargtypes => 'record record', prosrc => 'record_larger' }, -{ oid => '8598', descr => 'smaller of two', +{ oid => '6376', descr => 'smaller of two', proname => 'record_smaller', prorettype => 'record', proargtypes => 'record record', prosrc => 'record_smaller' }, @@ -10904,7 +10914,7 @@ { oid => '3870', descr => 'less-equal-greater', proname => 'range_cmp', prorettype => 'int4', proargtypes => 'anyrange anyrange', prosrc => 'range_cmp' }, -{ oid => '8849', descr => 'sort support', +{ oid => '6391', descr => 'sort support', proname => 'range_sortsupport', prorettype => 'void', proargtypes => 'internal', prosrc => 'range_sortsupport' }, { oid => '3871', @@ -12323,7 +12333,7 @@ proname => 'array_subscript_handler', prosupport => 'array_subscript_handler_support', prorettype => 'internal', proargtypes => 'internal', prosrc => 'array_subscript_handler' }, -{ oid => '8682', descr => 'planner support for array_subscript_handler', +{ oid => '6380', descr => 'planner support for array_subscript_handler', proname => 'array_subscript_handler_support', prorettype => 'internal', proargtypes => 'internal', prosrc => 'array_subscript_handler_support' }, { oid => '6180', descr => 'raw array subscripting support', @@ -12362,7 +12372,7 @@ provolatile => 'v', prorettype => 'record', proargtypes => '', proallargtypes => '{text,int8,timestamptz}', proargmodes => '{o,o,o}', proargnames => '{name,size,modification}', prosrc => 'pg_ls_waldir' }, -{ oid => '9220', descr => 'list of files in the pg_wal/summaries directory', +{ oid => '6400', descr => 'list of files in the pg_wal/summaries directory', proname => 'pg_ls_summariesdir', procost => '10', prorows => '20', proretset => 't', provolatile => 'v', prorettype => 'record', proargtypes => '', proallargtypes => '{text,int8,timestamptz}', @@ -12518,49 +12528,37 @@ proargnames => '{summarized_tli,summarized_lsn,pending_lsn,summarizer_pid}', prosrc => 'pg_get_wal_summarizer_state' }, # Statistics Import -{ oid => '8459', - descr => 'restore statistics on relation', - proname => 'pg_restore_relation_stats', provolatile => 'v', proisstrict => 'f', - provariadic => 'any', - proparallel => 'u', prorettype => 'bool', - proargtypes => 'any', - proargnames => '{kwargs}', - proargmodes => '{v}', - prosrc => 'pg_restore_relation_stats' }, -{ oid => '9160', - descr => 'clear statistics on relation', - proname => 'pg_clear_relation_stats', provolatile => 'v', proisstrict => 'f', - proparallel => 'u', prorettype => 'void', - proargtypes => 'text text', - proargnames => '{schemaname,relname}', - prosrc => 'pg_clear_relation_stats' }, -{ oid => '8461', - descr => 'restore statistics on attribute', - proname => 'pg_restore_attribute_stats', provolatile => 'v', proisstrict => 'f', - provariadic => 'any', - proparallel => 'u', prorettype => 'bool', - proargtypes => 'any', - proargnames => '{kwargs}', - proargmodes => '{v}', - prosrc => 'pg_restore_attribute_stats' }, -{ oid => '9162', - descr => 'clear statistics on attribute', - proname => 'pg_clear_attribute_stats', provolatile => 'v', proisstrict => 'f', +{ oid => '6362', descr => 'restore statistics on relation', + proname => 'pg_restore_relation_stats', provariadic => 'any', + proisstrict => 'f', provolatile => 'v', proparallel => 'u', + prorettype => 'bool', proargtypes => 'any', proargmodes => '{v}', + proargnames => '{kwargs}', prosrc => 'pg_restore_relation_stats' }, +{ oid => '6397', descr => 'clear statistics on relation', + proname => 'pg_clear_relation_stats', proisstrict => 'f', provolatile => 'v', + proparallel => 'u', prorettype => 'void', proargtypes => 'text text', + proargnames => '{schemaname,relname}', prosrc => 'pg_clear_relation_stats' }, +{ oid => '6363', descr => 'restore statistics on attribute', + proname => 'pg_restore_attribute_stats', provariadic => 'any', + proisstrict => 'f', provolatile => 'v', proparallel => 'u', + prorettype => 'bool', proargtypes => 'any', proargmodes => '{v}', + proargnames => '{kwargs}', prosrc => 'pg_restore_attribute_stats' }, +{ oid => '6398', descr => 'clear statistics on attribute', + proname => 'pg_clear_attribute_stats', proisstrict => 'f', provolatile => 'v', proparallel => 'u', prorettype => 'void', proargtypes => 'text text text bool', proargnames => '{schemaname,relname,attname,inherited}', prosrc => 'pg_clear_attribute_stats' }, # GiST stratnum implementations -{ oid => '8047', descr => 'GiST support', - proname => 'gist_stratnum_common', prorettype => 'int2', - proargtypes => 'int4', - prosrc => 'gist_stratnum_common' }, +{ oid => '6347', descr => 'GiST support', + proname => 'gist_translate_cmptype_common', prorettype => 'int2', + proargtypes => 'int4', prosrc => 'gist_translate_cmptype_common' }, # AIO related functions -{ oid => '9200', descr => 'information about in-progress asynchronous IOs', +{ oid => '6399', descr => 'information about in-progress asynchronous IOs', proname => 'pg_get_aios', prorows => '100', proretset => 't', - provolatile => 'v', proparallel => 'r', prorettype => 'record', proargtypes => '', + provolatile => 'v', proparallel => 'r', prorettype => 'record', + proargtypes => '', proallargtypes => '{int4,int4,int8,text,text,int8,int8,text,int2,int4,text,text,bool,bool,bool}', proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}', proargnames => '{pid,io_id,io_generation,state,operation,off,length,target,handle_data_len,raw_result,result,target_desc,f_sync,f_localmem,f_buffered}', diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h index 48c7d1a8615..6e074190fd2 100644 --- a/src/include/catalog/pg_publication.h +++ b/src/include/catalog/pg_publication.h @@ -146,7 +146,7 @@ extern Publication *GetPublicationByName(const char *pubname, bool missing_ok); extern List *GetRelationPublications(Oid relid); /*--------- - * Expected values for pub_partopt parameter of GetRelationPublications(), + * Expected values for pub_partopt parameter of GetPublicationRelations(), * which allows callers to specify which partitions of partitioned tables * mentioned in the publication they expect to see. * diff --git a/src/include/catalog/pg_type.dat b/src/include/catalog/pg_type.dat index 6dca77e0a22..29e4ffffc98 100644 --- a/src/include/catalog/pg_type.dat +++ b/src/include/catalog/pg_type.dat @@ -399,6 +399,11 @@ typinput => 'regnamespacein', typoutput => 'regnamespaceout', typreceive => 'regnamespacerecv', typsend => 'regnamespacesend', typalign => 'i' }, +{ oid => '8326', array_type_oid => '8327', descr => 'registered database', + typname => 'regdatabase', typlen => '4', typbyval => 't', typcategory => 'N', + typinput => 'regdatabasein', typoutput => 'regdatabaseout', + typreceive => 'regdatabaserecv', typsend => 'regdatabasesend', + typalign => 'i' }, # uuid { oid => '2950', array_type_oid => '2951', descr => 'UUID', diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index 06dfdfef721..541176e1980 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -20,15 +20,12 @@ #include "tcop/dest.h" /* - * Represents whether a header line should be present, and whether it must - * match the actual names (which implies "true"). + * Represents whether a header line must match the actual names + * (which implies "true"), and whether it should be present. */ -typedef enum CopyHeaderChoice -{ - COPY_HEADER_FALSE = 0, - COPY_HEADER_TRUE, - COPY_HEADER_MATCH, -} CopyHeaderChoice; +#define COPY_HEADER_MATCH -1 +#define COPY_HEADER_FALSE 0 +#define COPY_HEADER_TRUE 1 /* * Represents where to save input processing errors. More values to be added @@ -64,7 +61,8 @@ typedef struct CopyFormatOptions bool binary; /* binary format? */ bool freeze; /* freeze rows on loading? */ bool csv_mode; /* Comma Separated Value format? */ - CopyHeaderChoice header_line; /* header line? */ + int header_line; /* number of lines to skip or COPY_HEADER_XXX + * value (see the above) */ char *null_print; /* NULL marker string (server encoding!) */ int null_print_len; /* length of same */ char *null_print_client; /* same converted to file encoding */ diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index bc37a80dc74..14eeccbd718 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -336,7 +336,7 @@ extern PGDLLIMPORT int64 parallel_vacuum_worker_delay_ns; /* in commands/vacuum.c */ extern void ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel); -extern void vacuum(List *relations, VacuumParams *params, +extern void vacuum(List *relations, const VacuumParams params, BufferAccessStrategy bstrategy, MemoryContext vac_context, bool isTopLevel); extern void vac_open_indexes(Relation relation, LOCKMODE lockmode, @@ -357,7 +357,7 @@ extern void vac_update_relstats(Relation relation, bool *frozenxid_updated, bool *minmulti_updated, bool in_outer_xact); -extern bool vacuum_get_cutoffs(Relation rel, const VacuumParams *params, +extern bool vacuum_get_cutoffs(Relation rel, const VacuumParams params, struct VacuumCutoffs *cutoffs); extern bool vacuum_xid_failsafe_check(const struct VacuumCutoffs *cutoffs); extern void vac_update_datfrozenxid(void); @@ -398,7 +398,7 @@ extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc); /* in commands/analyze.c */ extern void analyze_rel(Oid relid, RangeVar *relation, - VacuumParams *params, List *va_cols, bool in_outer_xact, + const VacuumParams params, List *va_cols, bool in_outer_xact, BufferAccessStrategy bstrategy); extern bool std_typanalyze(VacAttrStats *stats); diff --git a/src/include/executor/nodeAgg.h b/src/include/executor/nodeAgg.h index 34b82d0f5d1..6c4891bbaeb 100644 --- a/src/include/executor/nodeAgg.h +++ b/src/include/executor/nodeAgg.h @@ -264,7 +264,7 @@ typedef struct AggStatePerGroupData * NULL and not auto-replace it with a later input value. Only the first * non-NULL input will be auto-substituted. */ -} AggStatePerGroupData; +} AggStatePerGroupData; /* * AggStatePerPhaseData - per-grouping-set-phase state diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 1e59a7f910f..1bef98471c3 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -96,7 +96,6 @@ extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending; extern PGDLLIMPORT volatile sig_atomic_t ProcSignalBarrierPending; extern PGDLLIMPORT volatile sig_atomic_t LogMemoryContextPending; extern PGDLLIMPORT volatile sig_atomic_t IdleStatsUpdateTimeoutPending; -extern PGDLLIMPORT volatile sig_atomic_t PublishMemoryContextPending; extern PGDLLIMPORT volatile sig_atomic_t CheckClientConnectionPending; extern PGDLLIMPORT volatile sig_atomic_t ClientConnectionLost; diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 2492282213f..e107d6e5f81 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -157,34 +157,6 @@ typedef struct ExprState * entries for a particular index. Used for both index_build and * retail creation of index entries. * - * NumIndexAttrs total number of columns in this index - * NumIndexKeyAttrs number of key columns in index - * IndexAttrNumbers underlying-rel attribute numbers used as keys - * (zeroes indicate expressions). It also contains - * info about included columns. - * Expressions expr trees for expression entries, or NIL if none - * ExpressionsState exec state for expressions, or NIL if none - * Predicate partial-index predicate, or NIL if none - * PredicateState exec state for predicate, or NIL if none - * ExclusionOps Per-column exclusion operators, or NULL if none - * ExclusionProcs Underlying function OIDs for ExclusionOps - * ExclusionStrats Opclass strategy numbers for ExclusionOps - * UniqueOps These are like Exclusion*, but for unique indexes - * UniqueProcs - * UniqueStrats - * Unique is it a unique index? - * OpclassOptions opclass-specific options, or NULL if none - * ReadyForInserts is it valid for inserts? - * CheckedUnchanged IndexUnchanged status determined yet? - * IndexUnchanged aminsert hint, cached for retail inserts - * Concurrent are we doing a concurrent index build? - * BrokenHotChain did we detect any broken HOT chains? - * Summarizing is it a summarizing index? - * ParallelWorkers # of workers requested (excludes leader) - * Am Oid of index AM - * AmCache private cache area for index AM - * Context memory context holding this IndexInfo - * * ii_Concurrent, ii_BrokenHotChain, and ii_ParallelWorkers are used only * during index build; they're conventionally zeroed otherwise. * ---------------- @@ -192,31 +164,67 @@ typedef struct ExprState typedef struct IndexInfo { NodeTag type; - int ii_NumIndexAttrs; /* total number of columns in index */ - int ii_NumIndexKeyAttrs; /* number of key columns in index */ + + /* total number of columns in index */ + int ii_NumIndexAttrs; + /* number of key columns in index */ + int ii_NumIndexKeyAttrs; + + /* + * Underlying-rel attribute numbers used as keys (zeroes indicate + * expressions). It also contains info about included columns. + */ AttrNumber ii_IndexAttrNumbers[INDEX_MAX_KEYS]; + + /* expr trees for expression entries, or NIL if none */ List *ii_Expressions; /* list of Expr */ + /* exec state for expressions, or NIL if none */ List *ii_ExpressionsState; /* list of ExprState */ + + /* partial-index predicate, or NIL if none */ List *ii_Predicate; /* list of Expr */ + /* exec state for expressions, or NIL if none */ ExprState *ii_PredicateState; + + /* Per-column exclusion operators, or NULL if none */ Oid *ii_ExclusionOps; /* array with one entry per column */ + /* Underlying function OIDs for ExclusionOps */ Oid *ii_ExclusionProcs; /* array with one entry per column */ + /* Opclass strategy numbers for ExclusionOps */ uint16 *ii_ExclusionStrats; /* array with one entry per column */ + + /* These are like Exclusion*, but for unique indexes */ Oid *ii_UniqueOps; /* array with one entry per column */ Oid *ii_UniqueProcs; /* array with one entry per column */ uint16 *ii_UniqueStrats; /* array with one entry per column */ + + /* is it a unique index? */ bool ii_Unique; + /* is NULLS NOT DISTINCT? */ bool ii_NullsNotDistinct; + /* is it valid for inserts? */ bool ii_ReadyForInserts; + /* IndexUnchanged status determined yet? */ bool ii_CheckedUnchanged; + /* aminsert hint, cached for retail inserts */ bool ii_IndexUnchanged; + /* are we doing a concurrent index build? */ bool ii_Concurrent; + /* did we detect any broken HOT chains? */ bool ii_BrokenHotChain; + /* is it a summarizing index? */ bool ii_Summarizing; + /* is it a WITHOUT OVERLAPS index? */ bool ii_WithoutOverlaps; + /* # of workers requested (excludes leader) */ int ii_ParallelWorkers; + + /* Oid of index AM */ Oid ii_Am; + /* private cache area for index AM */ void *ii_AmCache; + + /* memory context holding this IndexInfo */ MemoryContext ii_Context; } IndexInfo; diff --git a/src/include/nodes/meson.build b/src/include/nodes/meson.build index d1ca24dd32f..ea36cb0fda4 100644 --- a/src/include/nodes/meson.build +++ b/src/include/nodes/meson.build @@ -28,7 +28,7 @@ node_support_input_i = [ node_support_input = [] foreach i : node_support_input_i - node_support_input += meson.source_root() / 'src' / 'include' / i + node_support_input += meson.project_source_root() / 'src' / 'include' / i endforeach node_support_output = [ diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 4610fc61293..28e2e8dc0fd 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -127,8 +127,13 @@ typedef struct Query * query identifier (can be set by plugins); ignored for equal, as it * might not be set; also not stored. This is the result of the query * jumble, hence ignored. + * + * We store this as a signed value as this is the form it's displayed to + * users in places such as EXPLAIN and pg_stat_statements. Primarily this + * is done due to lack of an SQL type to represent the full range of + * uint64. */ - uint64 queryId pg_node_attr(equal_ignore, query_jumble_ignore, read_write_ignore, read_as(0)); + int64 queryId pg_node_attr(equal_ignore, query_jumble_ignore, read_write_ignore, read_as(0)); /* do I set the command result tag? */ bool canSetTag pg_node_attr(query_jumble_ignore); @@ -346,6 +351,14 @@ typedef struct A_Expr List *name; /* possibly-qualified name of operator */ Node *lexpr; /* left argument, or NULL if none */ Node *rexpr; /* right argument, or NULL if none */ + + /* + * If rexpr is a list of some kind, we separately track its starting and + * ending location; it's not the same as the starting and ending location + * of the token itself. + */ + ParseLoc rexpr_list_start; + ParseLoc rexpr_list_end; ParseLoc location; /* token location, or -1 if unknown */ } A_Expr; @@ -501,6 +514,8 @@ typedef struct A_ArrayExpr { NodeTag type; List *elements; /* array element expressions */ + ParseLoc list_start; /* start of the element list */ + ParseLoc list_end; /* end of the elements list */ ParseLoc location; /* token location, or -1 if unknown */ } A_ArrayExpr; @@ -2095,8 +2110,6 @@ typedef struct InsertStmt ReturningClause *returningClause; /* RETURNING clause */ WithClause *withClause; /* WITH clause */ OverridingKind override; /* OVERRIDING clause */ - ParseLoc stmt_location; /* start location, or -1 if unknown */ - ParseLoc stmt_len; /* length in bytes; 0 means "rest of string" */ } InsertStmt; /* ---------------------- @@ -2111,8 +2124,6 @@ typedef struct DeleteStmt Node *whereClause; /* qualifications */ ReturningClause *returningClause; /* RETURNING clause */ WithClause *withClause; /* WITH clause */ - ParseLoc stmt_location; /* start location, or -1 if unknown */ - ParseLoc stmt_len; /* length in bytes; 0 means "rest of string" */ } DeleteStmt; /* ---------------------- @@ -2128,8 +2139,6 @@ typedef struct UpdateStmt List *fromClause; /* optional from clause for more tables */ ReturningClause *returningClause; /* RETURNING clause */ WithClause *withClause; /* WITH clause */ - ParseLoc stmt_location; /* start location, or -1 if unknown */ - ParseLoc stmt_len; /* length in bytes; 0 means "rest of string" */ } UpdateStmt; /* ---------------------- @@ -2145,8 +2154,6 @@ typedef struct MergeStmt List *mergeWhenClauses; /* list of MergeWhenClause(es) */ ReturningClause *returningClause; /* RETURNING clause */ WithClause *withClause; /* WITH clause */ - ParseLoc stmt_location; /* start location, or -1 if unknown */ - ParseLoc stmt_len; /* length in bytes; 0 means "rest of string" */ } MergeStmt; /* ---------------------- @@ -2216,8 +2223,6 @@ typedef struct SelectStmt bool all; /* ALL specified? */ struct SelectStmt *larg; /* left child */ struct SelectStmt *rarg; /* right child */ - ParseLoc stmt_location; /* start location, or -1 if unknown */ - ParseLoc stmt_len; /* length in bytes; 0 means "rest of string" */ /* Eventually add fields for CORRESPONDING spec here */ } SelectStmt; @@ -2531,17 +2536,20 @@ typedef struct AlterCollationStmt * this command. * ---------------------- */ +typedef enum AlterDomainType +{ + AD_AlterDefault = 'T', /* SET|DROP DEFAULT */ + AD_DropNotNull = 'N', /* DROP NOT NULL */ + AD_SetNotNull = 'O', /* SET NOT NULL */ + AD_AddConstraint = 'C', /* ADD CONSTRAINT */ + AD_DropConstraint = 'X', /* DROP CONSTRAINT */ + AD_ValidateConstraint = 'V', /* VALIDATE CONSTRAINT */ +} AlterDomainType; + typedef struct AlterDomainStmt { NodeTag type; - char subtype; /*------------ - * T = alter column default - * N = alter column drop not null - * O = alter column set not null - * C = add constraint - * X = drop constraint - *------------ - */ + AlterDomainType subtype; /* subtype of command */ List *typeName; /* domain to work on */ char *name; /* column or constraint name to act on */ Node *def; /* definition of default or constraint */ @@ -3417,15 +3425,44 @@ typedef enum FetchDirection FETCH_RELATIVE, } FetchDirection; +typedef enum FetchDirectionKeywords +{ + FETCH_KEYWORD_NONE = 0, + FETCH_KEYWORD_NEXT, + FETCH_KEYWORD_PRIOR, + FETCH_KEYWORD_FIRST, + FETCH_KEYWORD_LAST, + FETCH_KEYWORD_ABSOLUTE, + FETCH_KEYWORD_RELATIVE, + FETCH_KEYWORD_ALL, + FETCH_KEYWORD_FORWARD, + FETCH_KEYWORD_FORWARD_ALL, + FETCH_KEYWORD_BACKWARD, + FETCH_KEYWORD_BACKWARD_ALL, +} FetchDirectionKeywords; + #define FETCH_ALL LONG_MAX typedef struct FetchStmt { NodeTag type; FetchDirection direction; /* see above */ - long howMany; /* number of rows, or position argument */ - char *portalname; /* name of portal (cursor) */ - bool ismove; /* true if MOVE */ + /* number of rows, or position argument */ + long howMany pg_node_attr(query_jumble_ignore); + /* name of portal (cursor) */ + char *portalname; + /* true if MOVE */ + bool ismove; + + /* + * Set when a direction_keyword (e.g., FETCH FORWARD) is used, to + * distinguish it from a numeric variant (e.g., FETCH 1) for the purpose + * of query jumbling. + */ + FetchDirectionKeywords direction_keyword; + + /* token location, or -1 if unknown */ + ParseLoc location pg_node_attr(query_jumble_location); } FetchStmt; /* ---------------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index f0d514e6e15..4f59e30d62d 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -53,10 +53,10 @@ typedef struct PlannedStmt CmdType commandType; /* query identifier (copied from Query) */ - uint64 queryId; + int64 queryId; /* plan identifier (can be set by plugins) */ - uint64 planId; + int64 planId; /* is it insert|update|delete|merge RETURNING? */ bool hasReturning; diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 7d3b4198f26..6dfca3cb35b 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -389,14 +389,16 @@ typedef enum ParamKind typedef struct Param { + pg_node_attr(custom_query_jumble) + Expr xpr; ParamKind paramkind; /* kind of parameter. See above */ int paramid; /* numeric ID for parameter */ Oid paramtype; /* pg_type OID of parameter's datatype */ /* typmod value, if known */ - int32 paramtypmod pg_node_attr(query_jumble_ignore); + int32 paramtypmod; /* OID of collation, or InvalidOid if none */ - Oid paramcollid pg_node_attr(query_jumble_ignore); + Oid paramcollid; /* token location, or -1 if unknown */ ParseLoc location; } Param; @@ -1397,6 +1399,10 @@ typedef struct ArrayExpr List *elements pg_node_attr(query_jumble_squash); /* true if elements are sub-arrays */ bool multidims pg_node_attr(query_jumble_ignore); + /* location of the start of the elements list */ + ParseLoc list_start; + /* location of the end of the elements list */ + ParseLoc list_end; /* token location, or -1 if unknown */ ParseLoc location; } ArrayExpr; diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h index da7c7abed2e..dcb36dcb44f 100644 --- a/src/include/nodes/queryjumble.h +++ b/src/include/nodes/queryjumble.h @@ -24,11 +24,11 @@ typedef struct LocationLen int location; /* start offset in query text */ int length; /* length in bytes, or -1 to ignore */ - /* - * Indicates that this location represents the beginning or end of a run - * of squashed constants. - */ + /* Does this location represent a squashed list? */ bool squashed; + + /* Is this location a PARAM_EXTERN parameter? */ + bool extern_param; } LocationLen; /* @@ -52,9 +52,18 @@ typedef struct JumbleState /* Current number of valid entries in clocations array */ int clocations_count; - /* highest Param id we've seen, in order to start normalization correctly */ + /* + * ID of the highest PARAM_EXTERN parameter we've seen in the query; used + * to start normalization correctly. However, if there are any squashed + * lists in the query, we disregard query-supplied parameter numbers and + * renumber everything. This is to avoid possible gaps caused by + * squashing in case any params are in squashed lists. + */ int highest_extern_param_id; + /* Whether squashable lists are present */ + bool has_squashed_lists; + /* * Count of the number of NULL nodes seen since last appending a value. * These are flushed out to the jumble buffer before subsequent appends diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h index d397fe27dc1..b523bcda8f3 100644 --- a/src/include/optimizer/cost.h +++ b/src/include/optimizer/cost.h @@ -118,7 +118,7 @@ extern void cost_incremental_sort(Path *path, Cost input_startup_cost, Cost input_total_cost, double input_tuples, int width, Cost comparison_cost, int sort_mem, double limit_tuples); -extern void cost_append(AppendPath *apath); +extern void cost_append(AppendPath *apath, PlannerInfo *root); extern void cost_merge_append(Path *path, PlannerInfo *root, List *pathkeys, int n_streams, int input_disabled_nodes, diff --git a/src/include/optimizer/paramassign.h b/src/include/optimizer/paramassign.h index 59dcb1ff053..bbf7214289b 100644 --- a/src/include/optimizer/paramassign.h +++ b/src/include/optimizer/paramassign.h @@ -30,7 +30,8 @@ extern Param *replace_nestloop_param_placeholdervar(PlannerInfo *root, extern void process_subquery_nestloop_params(PlannerInfo *root, List *subplan_params); extern List *identify_current_nestloop_params(PlannerInfo *root, - Relids leftrelids); + Relids leftrelids, + Relids outerrelids); extern Param *generate_new_exec_param(PlannerInfo *root, Oid paramtype, int32 paramtypmod, Oid paramcollation); extern int assign_special_exec_param(PlannerInfo *root); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index a48c9721797..8410531f2d6 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -109,8 +109,6 @@ extern Relids add_outer_joins_to_relids(PlannerInfo *root, Relids input_relids, List **pushed_down_joins); extern bool have_join_order_restriction(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2); -extern bool have_dangerous_phv(PlannerInfo *root, - Relids outer_relids, Relids inner_params); extern void mark_dummy_rel(RelOptInfo *rel); extern void init_dummy_sjinfo(SpecialJoinInfo *sjinfo, Relids left_relids, Relids right_relids); diff --git a/src/include/optimizer/placeholder.h b/src/include/optimizer/placeholder.h index d351045e2e0..db92d8861ba 100644 --- a/src/include/optimizer/placeholder.h +++ b/src/include/optimizer/placeholder.h @@ -30,5 +30,7 @@ extern void add_placeholders_to_joinrel(PlannerInfo *root, RelOptInfo *joinrel, SpecialJoinInfo *sjinfo); extern bool contain_placeholder_references_to(PlannerInfo *root, Node *clause, int relid); +extern Relids get_placeholder_nulling_relids(PlannerInfo *root, + PlaceHolderInfo *phinfo); #endif /* PLACEHOLDER_H */ diff --git a/src/include/parser/parse_node.h b/src/include/parser/parse_node.h index 994284019fb..f7d07c84542 100644 --- a/src/include/parser/parse_node.h +++ b/src/include/parser/parse_node.h @@ -108,20 +108,6 @@ typedef Node *(*CoerceParamHook) (ParseState *pstate, Param *param, * byte-wise locations in parse structures to character-wise cursor * positions.) * - * p_stmt_location: location of the top level RawStmt's start. During - * transformation, the Query's location will be set to the statement's - * location if available. Otherwise, the RawStmt's start location will - * be used. Propagating the location through ParseState is needed for - * the Query length calculation (see p_stmt_len below). - * - * p_stmt_len: length of the top level RawStmt. Most of the time, the - * statement's length is not provided by the parser, with the exception - * of SelectStmt within parentheses and PreparableStmt in COPY. If the - * statement's location is provided by the parser, the top-level location - * and length are needed to accurately compute the Query's length. If the - * statement's location is not provided, the RawStmt's length can be used - * directly. - * * p_rtable: list of RTEs that will become the rangetable of the query. * Note that neither relname nor refname of these entries are necessarily * unique; searching the rtable by name is a bad idea. @@ -207,8 +193,6 @@ struct ParseState { ParseState *parentParseState; /* stack link */ const char *p_sourcetext; /* source text, or NULL if not available */ - ParseLoc p_stmt_location; /* start location, or -1 if unknown */ - ParseLoc p_stmt_len; /* length in bytes; 0 means "rest of string" */ List *p_rtable; /* range table so far */ List *p_rteperminfos; /* list of RTEPermissionInfo nodes for each * RTE_RELATION entry in rtable */ diff --git a/src/include/pch/meson.build b/src/include/pch/meson.build index f6babee6f6d..603add1a351 100644 --- a/src/include/pch/meson.build +++ b/src/include/pch/meson.build @@ -1,6 +1,6 @@ # Copyright (c) 2022-2025, PostgreSQL Global Development Group # See https://github.com/mesonbuild/meson/issues/10338 -pch_c_h = meson.source_root() / meson.current_source_dir() / 'c_pch.h' -pch_postgres_h = meson.source_root() / meson.current_source_dir() / 'postgres_pch.h' -pch_postgres_fe_h = meson.source_root() / meson.current_source_dir() / 'postgres_fe_pch.h' +pch_c_h = meson.project_source_root() / meson.current_source_dir() / 'c_pch.h' +pch_postgres_h = meson.project_source_root() / meson.current_source_dir() / 'postgres_pch.h' +pch_postgres_fe_h = meson.project_source_root() / meson.current_source_dir() / 'postgres_fe_pch.h' diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 726a7c1be1f..c4dc5d72bdb 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -229,6 +229,9 @@ /* Define to 1 if you have the global variable 'int timezone'. */ #undef HAVE_INT_TIMEZONE +/* Define to 1 if you have the `io_uring_queue_init_mem' function. */ +#undef HAVE_IO_URING_QUEUE_INIT_MEM + /* Define to 1 if __builtin_constant_p(x) implies "i"(x) acceptance. */ #undef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 378f2f2c2ba..202bd2d5ace 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -718,9 +718,9 @@ extern void pgstat_count_heap_delete(Relation rel); extern void pgstat_count_truncate(Relation rel); extern void pgstat_update_heap_dead_tuples(Relation rel, int delta); -extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info, +extern void pgstat_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void pgstat_twophase_postabort(TransactionId xid, uint16 info, +extern void pgstat_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid); diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h index 82313bb7fcf..ae008118ea8 100644 --- a/src/include/port/pg_crc32c.h +++ b/src/include/port/pg_crc32c.h @@ -72,7 +72,7 @@ pg_comp_crc32c_dispatch(pg_crc32c crc, const void *data, size_t len) { if (__builtin_constant_p(len) && len < 32) { - const unsigned char *p = data; + const unsigned char *p = (const unsigned char *) data; /* * For small constant inputs, inline the computation to avoid a diff --git a/src/include/port/pg_iovec.h b/src/include/port/pg_iovec.h index df40c7208be..90be3af449d 100644 --- a/src/include/port/pg_iovec.h +++ b/src/include/port/pg_iovec.h @@ -21,9 +21,6 @@ #else -/* POSIX requires at least 16 as a maximum iovcnt. */ -#define IOV_MAX 16 - /* Define our own POSIX-compatible iovec struct. */ struct iovec { @@ -34,6 +31,15 @@ struct iovec #endif /* + * If <limits.h> didn't define IOV_MAX, define our own. X/Open requires at + * least 16. (GNU Hurd apparently feel that they're not bound by X/Open, + * because they don't define this symbol at all.) + */ +#ifndef IOV_MAX +#define IOV_MAX 16 +#endif + +/* * Define a reasonable maximum that is safe to use on the stack in arrays of * struct iovec and other small types. The operating system could limit us to * a number as low as 16, but most systems have 1024. diff --git a/src/include/port/pg_numa.h b/src/include/port/pg_numa.h index 40f1d324dcf..9d1ea6d0db8 100644 --- a/src/include/port/pg_numa.h +++ b/src/include/port/pg_numa.h @@ -24,12 +24,17 @@ extern PGDLLIMPORT int pg_numa_get_max_node(void); * This is required on Linux, before pg_numa_query_pages() as we * need to page-fault before move_pages(2) syscall returns valid results. */ -#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \ - ro_volatile_var = *(volatile uint64 *) ptr +static inline void +pg_numa_touch_mem_if_required(void *ptr) +{ + volatile uint64 touch pg_attribute_unused(); + + touch = *(volatile uint64 *) ptr; +} #else -#define pg_numa_touch_mem_if_required(ro_volatile_var, ptr) \ +#define pg_numa_touch_mem_if_required(ptr) \ do {} while(0) #endif diff --git a/src/include/replication/reorderbuffer.h b/src/include/replication/reorderbuffer.h index 24e88c409ba..fa0745552f8 100644 --- a/src/include/replication/reorderbuffer.h +++ b/src/include/replication/reorderbuffer.h @@ -176,6 +176,7 @@ typedef struct ReorderBufferChange #define RBTXN_SENT_PREPARE 0x0200 #define RBTXN_IS_COMMITTED 0x0400 #define RBTXN_IS_ABORTED 0x0800 +#define RBTXN_DISTR_INVAL_OVERFLOWED 0x1000 #define RBTXN_PREPARE_STATUS_MASK (RBTXN_IS_PREPARED | RBTXN_SKIPPED_PREPARE | RBTXN_SENT_PREPARE) @@ -265,6 +266,12 @@ typedef struct ReorderBufferChange ((txn)->txn_flags & RBTXN_SKIPPED_PREPARE) != 0 \ ) +/* Is the array of distributed inval messages overflowed? */ +#define rbtxn_distr_inval_overflowed(txn) \ +( \ + ((txn)->txn_flags & RBTXN_DISTR_INVAL_OVERFLOWED) != 0 \ +) + /* Is this a top-level transaction? */ #define rbtxn_is_toptxn(txn) \ ( \ @@ -422,6 +429,12 @@ typedef struct ReorderBufferTXN uint32 ninvalidations; SharedInvalidationMessage *invalidations; + /* + * Stores cache invalidation messages distributed by other transactions. + */ + uint32 ninvalidations_distributed; + SharedInvalidationMessage *invalidations_distributed; + /* --- * Position in one of two lists: * * list of subtransactions if we are *known* to be subxact @@ -738,6 +751,9 @@ extern void ReorderBufferAddNewTupleCids(ReorderBuffer *rb, TransactionId xid, CommandId cmin, CommandId cmax, CommandId combocid); extern void ReorderBufferAddInvalidations(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn, Size nmsgs, SharedInvalidationMessage *msgs); +extern void ReorderBufferAddDistributedInvalidations(ReorderBuffer *rb, TransactionId xid, + XLogRecPtr lsn, Size nmsgs, + SharedInvalidationMessage *msgs); extern void ReorderBufferImmediateInvalidation(ReorderBuffer *rb, uint32 ninvalidations, SharedInvalidationMessage *invalidations); extern void ReorderBufferProcessXid(ReorderBuffer *rb, TransactionId xid, XLogRecPtr lsn); diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h index eb0b93b1114..ffacba9d2ae 100644 --- a/src/include/replication/slot.h +++ b/src/include/replication/slot.h @@ -215,6 +215,14 @@ typedef struct ReplicationSlot * recently stopped. */ TimestampTz inactive_since; + + /* + * Latest restart_lsn that has been flushed to disk. For persistent slots + * the flushed LSN should be taken into account when calculating the + * oldest LSN for WAL segments removal. + */ + XLogRecPtr last_saved_restart_lsn; + } ReplicationSlot; #define SlotIsPhysical(slot) ((slot)->data.database == InvalidOid) diff --git a/src/include/storage/aio.h b/src/include/storage/aio.h index f3726bc3dc5..e7a0a234b6c 100644 --- a/src/include/storage/aio.h +++ b/src/include/storage/aio.h @@ -36,7 +36,7 @@ typedef enum IoMethod #ifdef IOMETHOD_IO_URING_ENABLED IOMETHOD_IO_URING, #endif -} IoMethod; +} IoMethod; /* We'll default to worker based execution. */ #define DEFAULT_IO_METHOD IOMETHOD_WORKER diff --git a/src/include/storage/aio_types.h b/src/include/storage/aio_types.h index 18183366077..afee85c787b 100644 --- a/src/include/storage/aio_types.h +++ b/src/include/storage/aio_types.h @@ -107,7 +107,7 @@ typedef struct PgAioResult /* of type PgAioResultStatus, see above */ uint32 status:PGAIO_RESULT_STATUS_BITS; - /* meaning defined by callback->error */ + /* meaning defined by callback->report */ uint32 error_data:PGAIO_RESULT_ERROR_BITS; int32 result; diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index 0dec7d93b3b..52a71b138f7 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -486,8 +486,8 @@ extern bool StartLocalBufferIO(BufferDesc *bufHdr, bool forInput, bool nowait); extern void FlushLocalBuffer(BufferDesc *bufHdr, SMgrRelation reln); extern void InvalidateLocalBuffer(BufferDesc *bufHdr, bool check_unreferenced); extern void DropRelationLocalBuffers(RelFileLocator rlocator, - ForkNumber forkNum, - BlockNumber firstDelBlock); + ForkNumber *forkNum, int nforks, + BlockNumber *firstDelBlock); extern void DropRelationAllLocalBuffers(RelFileLocator rlocator); extern void AtEOXact_LocalBuffers(bool isCommit); diff --git a/src/include/storage/copydir.h b/src/include/storage/copydir.h index 940d74462d1..f1d7beeed1a 100644 --- a/src/include/storage/copydir.h +++ b/src/include/storage/copydir.h @@ -17,7 +17,7 @@ typedef enum FileCopyMethod { FILE_COPY_METHOD_COPY, FILE_COPY_METHOD_CLONE, -} FileCopyMethod; +} FileCopyMethod; /* GUC parameters */ extern PGDLLIMPORT int file_copy_method; diff --git a/src/include/storage/dsm_registry.h b/src/include/storage/dsm_registry.h index b381e44bc9d..4871ed509eb 100644 --- a/src/include/storage/dsm_registry.h +++ b/src/include/storage/dsm_registry.h @@ -13,10 +13,15 @@ #ifndef DSM_REGISTRY_H #define DSM_REGISTRY_H +#include "lib/dshash.h" + extern void *GetNamedDSMSegment(const char *name, size_t size, void (*init_callback) (void *ptr), bool *found); - +extern dsa_area *GetNamedDSA(const char *name, bool *found); +extern dshash_table *GetNamedDSHash(const char *name, + const dshash_parameters *params, + bool *found); extern Size DSMRegistryShmemSize(void); extern void DSMRegistryShmemInit(void); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 6f2108a44e8..826cf28fdbd 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -18,6 +18,7 @@ #error "lock.h may not be included from frontend code" #endif +#include "access/transam.h" #include "lib/ilist.h" #include "storage/lockdefs.h" #include "storage/lwlock.h" @@ -30,7 +31,7 @@ typedef struct PGPROC PGPROC; /* GUC variables */ extern PGDLLIMPORT int max_locks_per_xact; -extern PGDLLIMPORT bool log_lock_failure; +extern PGDLLIMPORT bool log_lock_failures; #ifdef LOCK_DEBUG extern PGDLLIMPORT int Trace_lock_oidmin; @@ -581,7 +582,7 @@ extern bool LockHasWaiters(const LOCKTAG *locktag, extern VirtualTransactionId *GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode, int *countp); extern void AtPrepare_Locks(void); -extern void PostPrepare_Locks(TransactionId xid); +extern void PostPrepare_Locks(FullTransactionId fxid); extern bool LockCheckConflicts(LockMethod lockMethodTable, LOCKMODE lockmode, LOCK *lock, PROCLOCK *proclock); @@ -597,13 +598,13 @@ extern BlockedProcsData *GetBlockerStatusData(int blocked_pid); extern xl_standby_lock *GetRunningTransactionLocks(int *nlocks); extern const char *GetLockmodeName(LOCKMETHODID lockmethodid, LOCKMODE mode); -extern void lock_twophase_recover(TransactionId xid, uint16 info, +extern void lock_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void lock_twophase_postcommit(TransactionId xid, uint16 info, +extern void lock_twophase_postcommit(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void lock_twophase_postabort(TransactionId xid, uint16 info, +extern void lock_twophase_postabort(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); -extern void lock_twophase_standby_recover(TransactionId xid, uint16 info, +extern void lock_twophase_standby_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); extern DeadLockState DeadLockCheck(PGPROC *proc); diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 2b4cbda39a5..08a72569ae5 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -221,8 +221,6 @@ typedef enum BuiltinTrancheIds LWTRANCHE_XACT_SLRU, LWTRANCHE_PARALLEL_VACUUM_DSA, LWTRANCHE_AIO_URING_COMPLETION, - LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE, - LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC, LWTRANCHE_FIRST_USER_DEFINED, } BuiltinTrancheIds; diff --git a/src/include/storage/predicate.h b/src/include/storage/predicate.h index 267d5d90e94..c1e3a4d9f64 100644 --- a/src/include/storage/predicate.h +++ b/src/include/storage/predicate.h @@ -72,9 +72,9 @@ extern void PreCommit_CheckForSerializationFailure(void); /* two-phase commit support */ extern void AtPrepare_PredicateLocks(void); -extern void PostPrepare_PredicateLocks(TransactionId xid); -extern void PredicateLockTwoPhaseFinish(TransactionId xid, bool isCommit); -extern void predicatelock_twophase_recover(TransactionId xid, uint16 info, +extern void PostPrepare_PredicateLocks(FullTransactionId fxid); +extern void PredicateLockTwoPhaseFinish(FullTransactionId xid, bool isCommit); +extern void predicatelock_twophase_recover(FullTransactionId fxid, uint16 info, void *recdata, uint32 len); /* parallel query support */ diff --git a/src/include/storage/procsignal.h b/src/include/storage/procsignal.h index 345d5a0ecb1..afeeb1ca019 100644 --- a/src/include/storage/procsignal.h +++ b/src/include/storage/procsignal.h @@ -35,7 +35,6 @@ typedef enum PROCSIG_WALSND_INIT_STOPPING, /* ask walsenders to prepare for shutdown */ PROCSIG_BARRIER, /* global barrier interrupt */ PROCSIG_LOG_MEMORY_CONTEXT, /* ask backend to log the memory contexts */ - PROCSIG_GET_MEMORY_CONTEXT, /* ask backend to send the memory contexts */ PROCSIG_PARALLEL_APPLY_MESSAGE, /* Message from parallel apply workers */ /* Recovery conflict reasons */ diff --git a/src/include/storage/sinval.h b/src/include/storage/sinval.h index 5dc5aafe5c9..845a5851b57 100644 --- a/src/include/storage/sinval.h +++ b/src/include/storage/sinval.h @@ -119,7 +119,7 @@ typedef struct Oid dbId; /* database ID */ Oid relid; /* relation ID, or 0 if whole * RelationSyncCache */ -} SharedInvalRelSyncMsg; +} SharedInvalRelSyncMsg; typedef union { diff --git a/src/include/tcop/backend_startup.h b/src/include/tcop/backend_startup.h index dcb9d056643..e8639688c00 100644 --- a/src/include/tcop/backend_startup.h +++ b/src/include/tcop/backend_startup.h @@ -86,7 +86,7 @@ typedef enum LogConnectionOption LOG_CONNECTION_AUTHENTICATION | LOG_CONNECTION_AUTHORIZATION | LOG_CONNECTION_SETUP_DURATIONS, -} LogConnectionOption; +} LogConnectionOption; /* * A collection of timings of various stages of connection establishment and diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h index 430ccd7d78e..3016501ac05 100644 --- a/src/include/utils/backend_status.h +++ b/src/include/utils/backend_status.h @@ -170,10 +170,10 @@ typedef struct PgBackendStatus int64 st_progress_param[PGSTAT_NUM_PROGRESS_PARAM]; /* query identifier, optionally computed using post_parse_analyze_hook */ - uint64 st_query_id; + int64 st_query_id; /* plan identifier, optionally computed using planner_hook */ - uint64 st_plan_id; + int64 st_plan_id; } PgBackendStatus; @@ -321,16 +321,16 @@ extern void pgstat_clear_backend_activity_snapshot(void); /* Activity reporting functions */ extern void pgstat_report_activity(BackendState state, const char *cmd_str); -extern void pgstat_report_query_id(uint64 query_id, bool force); -extern void pgstat_report_plan_id(uint64 plan_id, bool force); +extern void pgstat_report_query_id(int64 query_id, bool force); +extern void pgstat_report_plan_id(int64 plan_id, bool force); extern void pgstat_report_tempfile(size_t filesize); extern void pgstat_report_appname(const char *appname); extern void pgstat_report_xact_timestamp(TimestampTz tstamp); extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser); extern const char *pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen); -extern uint64 pgstat_get_my_query_id(void); -extern uint64 pgstat_get_my_plan_id(void); +extern int64 pgstat_get_my_query_id(void); +extern int64 pgstat_get_my_plan_id(void); extern BackendType pgstat_get_backend_type_by_proc_number(ProcNumber procNumber); diff --git a/src/include/utils/date.h b/src/include/utils/date.h index bb5c1e57b07..abfda0b1ae9 100644 --- a/src/include/utils/date.h +++ b/src/include/utils/date.h @@ -100,6 +100,8 @@ extern int32 anytime_typmod_check(bool istz, int32 typmod); extern double date2timestamp_no_overflow(DateADT dateVal); extern Timestamp date2timestamp_opt_overflow(DateADT dateVal, int *overflow); extern TimestampTz date2timestamptz_opt_overflow(DateADT dateVal, int *overflow); +extern DateADT timestamp2date_opt_overflow(Timestamp timestamp, int *overflow); +extern DateADT timestamptz2date_opt_overflow(TimestampTz timestamp, int *overflow); extern int32 date_cmp_timestamp_internal(DateADT dateVal, Timestamp dt2); extern int32 date_cmp_timestamptz_internal(DateADT dateVal, TimestampTz dt2); diff --git a/src/include/utils/dsa.h b/src/include/utils/dsa.h index 9eca8788908..0a6067be628 100644 --- a/src/include/utils/dsa.h +++ b/src/include/utils/dsa.h @@ -145,6 +145,7 @@ extern dsa_area *dsa_create_in_place_ext(void *place, size_t size, size_t init_segment_size, size_t max_segment_size); extern dsa_area *dsa_attach(dsa_handle handle); +extern bool dsa_is_attached(dsa_handle handle); extern dsa_area *dsa_attach_in_place(void *place, dsm_segment *segment); extern void dsa_release_in_place(void *place); extern void dsa_on_dsm_detach_release_in_place(dsm_segment *, Datum); diff --git a/src/include/utils/elog.h b/src/include/utils/elog.h index 5eac0e16970..675f4f5f469 100644 --- a/src/include/utils/elog.h +++ b/src/include/utils/elog.h @@ -485,7 +485,7 @@ typedef enum PGERROR_TERSE, /* single-line error messages */ PGERROR_DEFAULT, /* recommended style */ PGERROR_VERBOSE, /* all the facts, ma'am */ -} PGErrorVerbosity; +} PGErrorVerbosity; extern PGDLLIMPORT int Log_error_verbosity; extern PGDLLIMPORT char *Log_line_prefix; diff --git a/src/include/utils/injection_point.h b/src/include/utils/injection_point.h index a37958e1835..fd5bc061b7b 100644 --- a/src/include/utils/injection_point.h +++ b/src/include/utils/injection_point.h @@ -11,6 +11,19 @@ #ifndef INJECTION_POINT_H #define INJECTION_POINT_H +#include "nodes/pg_list.h" + +/* + * Injection point data, used when retrieving a list of all the attached + * injection points. + */ +typedef struct InjectionPointData +{ + const char *name; + const char *library; + const char *function; +} InjectionPointData; + /* * Injection points require --enable-injection-points. */ @@ -47,6 +60,9 @@ extern void InjectionPointCached(const char *name, void *arg); extern bool IsInjectionPointAttached(const char *name); extern bool InjectionPointDetach(const char *name); +/* Get the current set of injection points attached */ +extern List *InjectionPointList(void); + #ifdef EXEC_BACKEND extern PGDLLIMPORT struct InjectionPointsCtl *ActiveInjectionPoints; #endif diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h index c0987dca155..8abc26abce2 100644 --- a/src/include/utils/memutils.h +++ b/src/include/utils/memutils.h @@ -18,9 +18,6 @@ #define MEMUTILS_H #include "nodes/memnodes.h" -#include "storage/condition_variable.h" -#include "storage/lmgr.h" -#include "utils/dsa.h" /* @@ -51,23 +48,6 @@ #define AllocHugeSizeIsValid(size) ((Size) (size) <= MaxAllocHugeSize) -/* - * Memory Context reporting size limits. - */ - -/* Max length of context name and ident */ -#define MEMORY_CONTEXT_IDENT_SHMEM_SIZE 64 -/* Maximum size (in bytes) of DSA area per process */ -#define MEMORY_CONTEXT_REPORT_MAX_PER_BACKEND ((size_t) (1 * 1024 * 1024)) - -/* - * Maximum size per context. Actual size may be lower as this assumes the worst - * case of deepest path and longest identifiers (name and ident, thus the - * multiplication by 2). The path depth is limited to 100 like for memory - * context logging. - */ -#define MAX_MEMORY_CONTEXT_STATS_SIZE (sizeof(MemoryStatsEntry) + \ - (100 * sizeof(int)) + (2 * MEMORY_CONTEXT_IDENT_SHMEM_SIZE)) /* * Standard top-level memory contexts. @@ -339,66 +319,4 @@ pg_memory_is_all_zeros(const void *ptr, size_t len) return true; } -/* Dynamic shared memory state for statistics per context */ -typedef struct MemoryStatsEntry -{ - dsa_pointer name; - dsa_pointer ident; - dsa_pointer path; - NodeTag type; - int path_length; - int levels; - int64 totalspace; - int64 nblocks; - int64 freespace; - int64 freechunks; - int num_agg_stats; -} MemoryStatsEntry; - -/* - * Static shared memory state representing the DSA area created for memory - * context statistics reporting. A single DSA area is created and used by all - * the processes, each having its specific DSA allocations for sharing memory - * statistics, tracked by per backend static shared memory state. - */ -typedef struct MemoryStatsCtl -{ - dsa_handle memstats_dsa_handle; - LWLock lw_lock; -} MemoryStatsCtl; - -/* - * Per backend static shared memory state for memory context statistics - * reporting. - */ -typedef struct MemoryStatsBackendState -{ - ConditionVariable memcxt_cv; - LWLock lw_lock; - int proc_id; - int total_stats; - bool summary; - dsa_pointer memstats_dsa_pointer; - TimestampTz stats_timestamp; -} MemoryStatsBackendState; - - -/* - * Used for storage of transient identifiers for pg_get_backend_memory_contexts - */ -typedef struct MemoryStatsContextId -{ - MemoryContext context; - int context_id; -} MemoryStatsContextId; - -extern PGDLLIMPORT MemoryStatsBackendState *memCxtState; -extern PGDLLIMPORT MemoryStatsCtl *memCxtArea; -extern PGDLLIMPORT dsa_area *MemoryStatsDsaArea; -extern void ProcessGetMemoryContextInterrupt(void); -extern const char *ContextTypeToString(NodeTag type); -extern void HandleGetMemoryContextInterrupt(void); -extern Size MemoryContextReportingShmemSize(void); -extern void MemoryContextReportingShmemInit(void); -extern void AtProcExit_memstats_cleanup(int code, Datum arg); #endif /* MEMUTILS_H */ diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index 7b8cbf58d2c..44ff60a25b4 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -12,6 +12,8 @@ #ifndef _PG_LOCALE_ #define _PG_LOCALE_ +#include "mb/pg_wchar.h" + #ifdef USE_ICU #include <unicode/ucol.h> #endif @@ -77,6 +79,52 @@ struct collate_methods bool strxfrm_is_safe; }; +struct ctype_methods +{ + /* case mapping: LOWER()/INITCAP()/UPPER() */ + size_t (*strlower) (char *dest, size_t destsize, + const char *src, ssize_t srclen, + pg_locale_t locale); + size_t (*strtitle) (char *dest, size_t destsize, + const char *src, ssize_t srclen, + pg_locale_t locale); + size_t (*strupper) (char *dest, size_t destsize, + const char *src, ssize_t srclen, + pg_locale_t locale); + size_t (*strfold) (char *dest, size_t destsize, + const char *src, ssize_t srclen, + pg_locale_t locale); + + /* required */ + bool (*wc_isdigit) (pg_wchar wc, pg_locale_t locale); + bool (*wc_isalpha) (pg_wchar wc, pg_locale_t locale); + bool (*wc_isalnum) (pg_wchar wc, pg_locale_t locale); + bool (*wc_isupper) (pg_wchar wc, pg_locale_t locale); + bool (*wc_islower) (pg_wchar wc, pg_locale_t locale); + bool (*wc_isgraph) (pg_wchar wc, pg_locale_t locale); + bool (*wc_isprint) (pg_wchar wc, pg_locale_t locale); + bool (*wc_ispunct) (pg_wchar wc, pg_locale_t locale); + bool (*wc_isspace) (pg_wchar wc, pg_locale_t locale); + pg_wchar (*wc_toupper) (pg_wchar wc, pg_locale_t locale); + pg_wchar (*wc_tolower) (pg_wchar wc, pg_locale_t locale); + + /* required */ + bool (*char_is_cased) (char ch, pg_locale_t locale); + + /* + * Optional. If defined, will only be called for single-byte encodings. If + * not defined, or if the encoding is multibyte, will fall back to + * pg_strlower(). + */ + char (*char_tolower) (unsigned char ch, pg_locale_t locale); + + /* + * For regex and pattern matching efficiency, the maximum char value + * supported by the above methods. If zero, limit is set by regex code. + */ + pg_wchar max_chr; +}; + /* * We use a discriminated union to hold either a locale_t or an ICU collator. * pg_locale_t is occasionally checked for truth, so make it a pointer. @@ -95,13 +143,13 @@ struct collate_methods */ struct pg_locale_struct { - char provider; bool deterministic; bool collate_is_c; bool ctype_is_c; bool is_default; const struct collate_methods *collate; /* NULL if collate_is_c */ + const struct ctype_methods *ctype; /* NULL if ctype_is_c */ union { @@ -125,6 +173,10 @@ extern void init_database_collation(void); extern pg_locale_t pg_newlocale_from_collation(Oid collid); extern char *get_collation_actual_version(char collprovider, const char *collcollate); + +extern bool char_is_cased(char ch, pg_locale_t locale); +extern bool char_tolower_enabled(pg_locale_t locale); +extern char char_tolower(unsigned char ch, pg_locale_t locale); extern size_t pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale); diff --git a/src/include/utils/skipsupport.h b/src/include/utils/skipsupport.h index bc51847cf61..c42be001fb5 100644 --- a/src/include/utils/skipsupport.h +++ b/src/include/utils/skipsupport.h @@ -90,7 +90,7 @@ typedef struct SkipSupportData */ SkipSupportIncDec decrement; SkipSupportIncDec increment; -} SkipSupportData; +} SkipSupportData; extern SkipSupport PrepareSkipSupportFromOpclass(Oid opfamily, Oid opcintype, bool reverse); diff --git a/src/include/utils/timestamp.h b/src/include/utils/timestamp.h index 8c205859c3b..93531732b08 100644 --- a/src/include/utils/timestamp.h +++ b/src/include/utils/timestamp.h @@ -144,6 +144,9 @@ extern int timestamp_cmp_internal(Timestamp dt1, Timestamp dt2); extern TimestampTz timestamp2timestamptz_opt_overflow(Timestamp timestamp, int *overflow); +extern Timestamp timestamptz2timestamp_opt_overflow(TimestampTz timestamp, + int *overflow); + extern int32 timestamp_cmp_timestamptz_internal(Timestamp timestampVal, TimestampTz dt2); diff --git a/src/interfaces/libpq-oauth/.gitignore b/src/interfaces/libpq-oauth/.gitignore new file mode 100644 index 00000000000..a4afe7c1c68 --- /dev/null +++ b/src/interfaces/libpq-oauth/.gitignore @@ -0,0 +1 @@ +/exports.list diff --git a/src/interfaces/libpq-oauth/oauth-curl.c b/src/interfaces/libpq-oauth/oauth-curl.c index d13b9cbabb4..dba9a684fa8 100644 --- a/src/interfaces/libpq-oauth/oauth-curl.c +++ b/src/interfaces/libpq-oauth/oauth-curl.c @@ -83,6 +83,20 @@ #define MAX_OAUTH_RESPONSE_SIZE (256 * 1024) /* + * Similarly, a limit on the maximum JSON nesting level keeps a server from + * running us out of stack space. A common nesting level in practice is 2 (for a + * top-level object containing arrays of strings). As of May 2025, the maximum + * depth for standard server metadata appears to be 6, if the document contains + * a full JSON Web Key Set in its "jwks" parameter. + * + * Since it's easy to nest JSON, and the number of parameters and key types + * keeps growing, take a healthy buffer of 16. (If this ever proves to be a + * problem in practice, we may want to switch over to the incremental JSON + * parser instead of playing with this parameter.) + */ +#define MAX_OAUTH_NESTING_LEVEL 16 + +/* * Parsed JSON Representations * * As a general rule, we parse and cache only the fields we're currently using. @@ -495,6 +509,12 @@ oauth_json_object_start(void *state) } ++ctx->nested; + if (ctx->nested > MAX_OAUTH_NESTING_LEVEL) + { + oauth_parse_set_error(ctx, "JSON is too deeply nested"); + return JSON_SEM_ACTION_FAILED; + } + return JSON_SUCCESS; } @@ -599,6 +619,12 @@ oauth_json_array_start(void *state) } ++ctx->nested; + if (ctx->nested > MAX_OAUTH_NESTING_LEVEL) + { + oauth_parse_set_error(ctx, "JSON is too deeply nested"); + return JSON_SEM_ACTION_FAILED; + } + return JSON_SUCCESS; } diff --git a/src/interfaces/libpq/Makefile b/src/interfaces/libpq/Makefile index c6fe5fec7f6..853aab4b1b8 100644 --- a/src/interfaces/libpq/Makefile +++ b/src/interfaces/libpq/Makefile @@ -98,14 +98,21 @@ SHLIB_PREREQS = submake-libpgport SHLIB_EXPORTS = exports.txt +# Appends to a comma-separated list. +comma := , +define add_to_list +$(eval $1 := $(if $($1),$($1)$(comma) $2,$2)) +endef + ifeq ($(with_ssl),openssl) -PKG_CONFIG_REQUIRES_PRIVATE = libssl, libcrypto +$(call add_to_list,PKG_CONFIG_REQUIRES_PRIVATE,libssl) +$(call add_to_list,PKG_CONFIG_REQUIRES_PRIVATE,libcrypto) endif ifeq ($(with_libcurl),yes) # libpq.so doesn't link against libcurl, but libpq.a needs libpq-oauth, and # libpq-oauth needs libcurl. Put both into *.private. -PKG_CONFIG_REQUIRES_PRIVATE += libcurl +$(call add_to_list,PKG_CONFIG_REQUIRES_PRIVATE,libcurl) %.pc: override SHLIB_LINK_INTERNAL += -lpq-oauth endif diff --git a/src/interfaces/libpq/fe-auth-oauth.c b/src/interfaces/libpq/fe-auth-oauth.c index 9fbff89a21d..d146c5f567c 100644 --- a/src/interfaces/libpq/fe-auth-oauth.c +++ b/src/interfaces/libpq/fe-auth-oauth.c @@ -157,6 +157,14 @@ client_initial_response(PGconn *conn, bool discover) #define ERROR_SCOPE_FIELD "scope" #define ERROR_OPENID_CONFIGURATION_FIELD "openid-configuration" +/* + * Limit the maximum number of nested objects/arrays. Because OAUTHBEARER + * doesn't have any defined extensions for its JSON yet, we can be much more + * conservative here than with libpq-oauth's MAX_OAUTH_NESTING_LEVEL; we expect + * a nesting level of 1 in practice. + */ +#define MAX_SASL_NESTING_LEVEL 8 + struct json_ctx { char *errmsg; /* any non-NULL value stops all processing */ @@ -196,6 +204,9 @@ oauth_json_object_start(void *state) } ++ctx->nested; + if (ctx->nested > MAX_SASL_NESTING_LEVEL) + oauth_json_set_error(ctx, libpq_gettext("JSON is too deeply nested")); + return oauth_json_has_error(ctx) ? JSON_SEM_ACTION_FAILED : JSON_SUCCESS; } @@ -254,10 +265,23 @@ oauth_json_array_start(void *state) ctx->target_field_name); } + ++ctx->nested; + if (ctx->nested > MAX_SASL_NESTING_LEVEL) + oauth_json_set_error(ctx, libpq_gettext("JSON is too deeply nested")); + return oauth_json_has_error(ctx) ? JSON_SEM_ACTION_FAILED : JSON_SUCCESS; } static JsonParseErrorType +oauth_json_array_end(void *state) +{ + struct json_ctx *ctx = state; + + --ctx->nested; + return JSON_SUCCESS; +} + +static JsonParseErrorType oauth_json_scalar(void *state, char *token, JsonTokenType type) { struct json_ctx *ctx = state; @@ -519,6 +543,7 @@ handle_oauth_sasl_error(PGconn *conn, const char *msg, int msglen) sem.object_end = oauth_json_object_end; sem.object_field_start = oauth_json_object_field_start; sem.array_start = oauth_json_array_start; + sem.array_end = oauth_json_array_end; sem.scalar = oauth_json_scalar; err = pg_parse_json(lex, &sem); diff --git a/src/interfaces/libpq/fe-cancel.c b/src/interfaces/libpq/fe-cancel.c index 8c7c198a530..65517c5703b 100644 --- a/src/interfaces/libpq/fe-cancel.c +++ b/src/interfaces/libpq/fe-cancel.c @@ -114,7 +114,7 @@ PQcancelCreate(PGconn *conn) if (conn->be_cancel_key != NULL) { cancelConn->be_cancel_key = malloc(conn->be_cancel_key_len); - if (!conn->be_cancel_key) + if (cancelConn->be_cancel_key == NULL) goto oom_error; memcpy(cancelConn->be_cancel_key, conn->be_cancel_key, conn->be_cancel_key_len); } @@ -137,6 +137,7 @@ PQcancelCreate(PGconn *conn) goto oom_error; originalHost = conn->connhost[conn->whichhost]; + cancelConn->connhost[0].type = originalHost.type; if (originalHost.host) { cancelConn->connhost[0].host = strdup(originalHost.host); diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index ccb01aad361..51a9c416584 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -2141,7 +2141,7 @@ pqConnectOptions2(PGconn *conn) if (conn->min_pversion > conn->max_pversion) { conn->status = CONNECTION_BAD; - libpq_append_conn_error(conn, "min_protocol_version is greater than max_protocol_version"); + libpq_append_conn_error(conn, "\"%s\" is greater than \"%s\"", "min_protocol_version", "max_protocol_version"); return false; } diff --git a/src/interfaces/libpq/fe-misc.c b/src/interfaces/libpq/fe-misc.c index c14e3c95250..dca44fdc5d2 100644 --- a/src/interfaces/libpq/fe-misc.c +++ b/src/interfaces/libpq/fe-misc.c @@ -553,9 +553,35 @@ pqPutMsgEnd(PGconn *conn) /* Make message eligible to send */ conn->outCount = conn->outMsgEnd; + /* If appropriate, try to push out some data */ if (conn->outCount >= 8192) { - int toSend = conn->outCount - (conn->outCount % 8192); + int toSend = conn->outCount; + + /* + * On Unix-pipe connections, it seems profitable to prefer sending + * pipe-buffer-sized packets not randomly-sized ones, so retain the + * last partial-8K chunk in our buffer for now. On TCP connections, + * the advantage of that is far less clear. Moreover, it flat out + * isn't safe when using SSL or GSSAPI, because those code paths have + * API stipulations that if they fail to send all the data that was + * offered in the previous write attempt, we mustn't offer less data + * in this write attempt. The previous write attempt might've been + * pqFlush attempting to send everything in the buffer, so we mustn't + * offer less now. (Presently, we won't try to use SSL or GSSAPI on + * Unix connections, so those checks are just Asserts. They'll have + * to become part of the regular if-test if we ever change that.) + */ + if (conn->raddr.addr.ss_family == AF_UNIX) + { +#ifdef USE_SSL + Assert(!conn->ssl_in_use); +#endif +#ifdef ENABLE_GSS + Assert(!conn->gssenc); +#endif + toSend -= toSend % 8192; + } if (pqSendSome(conn, toSend) < 0) return EOF; diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index beb1c889aad..1599de757d1 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -1434,7 +1434,7 @@ pqGetNegotiateProtocolVersion3(PGconn *conn) /* 3.1 never existed, we went straight from 3.0 to 3.2 */ if (their_version == PG_PROTOCOL(3, 1)) { - libpq_append_conn_error(conn, "received invalid protocol negotiation message: server requests downgrade to non-existent 3.1 protocol version"); + libpq_append_conn_error(conn, "received invalid protocol negotiation message: server requested downgrade to non-existent 3.1 protocol version"); goto failure; } @@ -1452,9 +1452,10 @@ pqGetNegotiateProtocolVersion3(PGconn *conn) if (their_version < conn->min_pversion) { - libpq_append_conn_error(conn, "server only supports protocol version %d.%d, but min_protocol_version was set to %d.%d", + libpq_append_conn_error(conn, "server only supports protocol version %d.%d, but \"%s\" was set to %d.%d", PG_PROTOCOL_MAJOR(their_version), PG_PROTOCOL_MINOR(their_version), + "min_protocol_version", PG_PROTOCOL_MAJOR(conn->min_pversion), PG_PROTOCOL_MINOR(conn->min_pversion)); @@ -1476,7 +1477,7 @@ pqGetNegotiateProtocolVersion3(PGconn *conn) } if (strncmp(conn->workBuffer.data, "_pq_.", 5) != 0) { - libpq_append_conn_error(conn, "received invalid protocol negotiation message: server reported unsupported parameter name without a _pq_. prefix (\"%s\")", conn->workBuffer.data); + libpq_append_conn_error(conn, "received invalid protocol negotiation message: server reported unsupported parameter name without a \"%s\" prefix (\"%s\")", "_pq_.", conn->workBuffer.data); goto failure; } libpq_append_conn_error(conn, "received invalid protocol negotiation message: server reported an unsupported parameter that was not requested (\"%s\")", conn->workBuffer.data); diff --git a/src/interfaces/libpq/fe-secure-gssapi.c b/src/interfaces/libpq/fe-secure-gssapi.c index ce183bc04b4..bc9e1ce06fa 100644 --- a/src/interfaces/libpq/fe-secure-gssapi.c +++ b/src/interfaces/libpq/fe-secure-gssapi.c @@ -47,11 +47,18 @@ * don't want the other side to send arbitrarily huge packets as we * would have to allocate memory for them to then pass them to GSSAPI. * - * Therefore, these two #define's are effectively part of the protocol + * Therefore, this #define is effectively part of the protocol * spec and can't ever be changed. */ -#define PQ_GSS_SEND_BUFFER_SIZE 16384 -#define PQ_GSS_RECV_BUFFER_SIZE 16384 +#define PQ_GSS_MAX_PACKET_SIZE 16384 /* includes uint32 header word */ + +/* + * However, during the authentication exchange we must cope with whatever + * message size the GSSAPI library wants to send (because our protocol + * doesn't support splitting those messages). Depending on configuration + * those messages might be as much as 64kB. + */ +#define PQ_GSS_AUTH_BUFFER_SIZE 65536 /* includes uint32 header word */ /* * We need these state variables per-connection. To allow the functions @@ -105,9 +112,9 @@ pg_GSS_write(PGconn *conn, const void *ptr, size_t len) * again, so if it offers a len less than that, something is wrong. * * Note: it may seem attractive to report partial write completion once - * we've successfully sent any encrypted packets. However, that can cause - * problems for callers; notably, pqPutMsgEnd's heuristic to send only - * full 8K blocks interacts badly with such a hack. We won't save much, + * we've successfully sent any encrypted packets. However, doing that + * expands the state space of this processing and has been responsible for + * bugs in the past (cf. commit d053a879b). We won't save much, * typically, by letting callers discard data early, so don't risk it. */ if (len < PqGSSSendConsumed) @@ -203,11 +210,11 @@ pg_GSS_write(PGconn *conn, const void *ptr, size_t len) goto cleanup; } - if (output.length > PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32)) + if (output.length > PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32)) { libpq_append_conn_error(conn, "client tried to send oversize GSSAPI packet (%zu > %zu)", (size_t) output.length, - PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32)); + PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32)); errno = EIO; /* for lack of a better idea */ goto cleanup; } @@ -342,11 +349,11 @@ pg_GSS_read(PGconn *conn, void *ptr, size_t len) /* Decode the packet length and check for overlength packet */ input.length = pg_ntoh32(*(uint32 *) PqGSSRecvBuffer); - if (input.length > PQ_GSS_RECV_BUFFER_SIZE - sizeof(uint32)) + if (input.length > PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32)) { libpq_append_conn_error(conn, "oversize GSSAPI packet sent by the server (%zu > %zu)", (size_t) input.length, - PQ_GSS_RECV_BUFFER_SIZE - sizeof(uint32)); + PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32)); errno = EIO; /* for lack of a better idea */ return -1; } @@ -485,12 +492,15 @@ pqsecure_open_gss(PGconn *conn) * initialize state variables. By malloc'ing the buffers separately, we * ensure that they are sufficiently aligned for the length-word accesses * that we do in some places in this file. + * + * We'll use PQ_GSS_AUTH_BUFFER_SIZE-sized buffers until transport + * negotiation is complete, then switch to PQ_GSS_MAX_PACKET_SIZE. */ if (PqGSSSendBuffer == NULL) { - PqGSSSendBuffer = malloc(PQ_GSS_SEND_BUFFER_SIZE); - PqGSSRecvBuffer = malloc(PQ_GSS_RECV_BUFFER_SIZE); - PqGSSResultBuffer = malloc(PQ_GSS_RECV_BUFFER_SIZE); + PqGSSSendBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE); + PqGSSRecvBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE); + PqGSSResultBuffer = malloc(PQ_GSS_AUTH_BUFFER_SIZE); if (!PqGSSSendBuffer || !PqGSSRecvBuffer || !PqGSSResultBuffer) { libpq_append_conn_error(conn, "out of memory"); @@ -564,13 +574,13 @@ pqsecure_open_gss(PGconn *conn) * so leave a spot at the end for a NULL byte too) and report that * back to the caller. */ - result = gss_read(conn, PqGSSRecvBuffer + PqGSSRecvLength, PQ_GSS_RECV_BUFFER_SIZE - PqGSSRecvLength - 1, &ret); + result = gss_read(conn, PqGSSRecvBuffer + PqGSSRecvLength, PQ_GSS_AUTH_BUFFER_SIZE - PqGSSRecvLength - 1, &ret); if (result != PGRES_POLLING_OK) return result; PqGSSRecvLength += ret; - Assert(PqGSSRecvLength < PQ_GSS_RECV_BUFFER_SIZE); + Assert(PqGSSRecvLength < PQ_GSS_AUTH_BUFFER_SIZE); PqGSSRecvBuffer[PqGSSRecvLength] = '\0'; appendPQExpBuffer(&conn->errorMessage, "%s\n", PqGSSRecvBuffer + 1); @@ -584,11 +594,11 @@ pqsecure_open_gss(PGconn *conn) /* Get the length and check for over-length packet */ input.length = pg_ntoh32(*(uint32 *) PqGSSRecvBuffer); - if (input.length > PQ_GSS_RECV_BUFFER_SIZE - sizeof(uint32)) + if (input.length > PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32)) { libpq_append_conn_error(conn, "oversize GSSAPI packet sent by the server (%zu > %zu)", (size_t) input.length, - PQ_GSS_RECV_BUFFER_SIZE - sizeof(uint32)); + PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32)); return PGRES_POLLING_FAILED; } @@ -669,11 +679,32 @@ pqsecure_open_gss(PGconn *conn) gss_release_buffer(&minor, &output); /* + * Release the large authentication buffers and allocate the ones we + * want for normal operation. (This maneuver is safe only because + * pqDropConnection will drop the buffers; otherwise, during a + * reconnection we'd be at risk of using undersized buffers during + * negotiation.) + */ + free(PqGSSSendBuffer); + free(PqGSSRecvBuffer); + free(PqGSSResultBuffer); + PqGSSSendBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE); + PqGSSRecvBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE); + PqGSSResultBuffer = malloc(PQ_GSS_MAX_PACKET_SIZE); + if (!PqGSSSendBuffer || !PqGSSRecvBuffer || !PqGSSResultBuffer) + { + libpq_append_conn_error(conn, "out of memory"); + return PGRES_POLLING_FAILED; + } + PqGSSSendLength = PqGSSSendNext = PqGSSSendConsumed = 0; + PqGSSRecvLength = PqGSSResultLength = PqGSSResultNext = 0; + + /* * Determine the max packet size which will fit in our buffer, after * accounting for the length. pg_GSS_write will need this. */ major = gss_wrap_size_limit(&minor, conn->gctx, 1, GSS_C_QOP_DEFAULT, - PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32), + PQ_GSS_MAX_PACKET_SIZE - sizeof(uint32), &PqGSSMaxPktSize); if (GSS_ERROR(major)) @@ -687,10 +718,11 @@ pqsecure_open_gss(PGconn *conn) } /* Must have output.length > 0 */ - if (output.length > PQ_GSS_SEND_BUFFER_SIZE - sizeof(uint32)) + if (output.length > PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32)) { - pg_GSS_error(libpq_gettext("GSSAPI context establishment error"), - conn, major, minor); + libpq_append_conn_error(conn, "client tried to send oversize GSSAPI packet (%zu > %zu)", + (size_t) output.length, + PQ_GSS_AUTH_BUFFER_SIZE - sizeof(uint32)); gss_release_buffer(&minor, &output); return PGRES_POLLING_FAILED; } diff --git a/src/interfaces/libpq/fe-secure-openssl.c b/src/interfaces/libpq/fe-secure-openssl.c index 78f9e84eb35..b08b3a6901b 100644 --- a/src/interfaces/libpq/fe-secure-openssl.c +++ b/src/interfaces/libpq/fe-secure-openssl.c @@ -711,7 +711,7 @@ SSL_CTX_keylog_cb(const SSL *ssl, const char *line) if (fd == -1) { - libpq_append_conn_error(conn, "could not open ssl keylog file \"%s\": %s", + libpq_append_conn_error(conn, "could not open SSL key logging file \"%s\": %s", conn->sslkeylogfile, pg_strerror(errno)); return; } @@ -719,7 +719,7 @@ SSL_CTX_keylog_cb(const SSL *ssl, const char *line) /* line is guaranteed by OpenSSL to be NUL terminated */ rc = write(fd, line, strlen(line)); if (rc < 0) - libpq_append_conn_error(conn, "could not write to ssl keylog file \"%s\": %s", + libpq_append_conn_error(conn, "could not write to SSL key logging file \"%s\": %s", conn->sslkeylogfile, pg_strerror(errno)); else rc = write(fd, "\n", 1); diff --git a/src/makefiles/meson.build b/src/makefiles/meson.build index 91a8de1ee9b..54dbc059ada 100644 --- a/src/makefiles/meson.build +++ b/src/makefiles/meson.build @@ -6,7 +6,7 @@ # Emulation of PGAC_CHECK_STRIP strip_bin = find_program(get_option('STRIP'), required: false, native: true) -strip_cmd = strip_bin.found() ? [strip_bin.path()] : [':'] +strip_cmd = strip_bin.found() ? [strip_bin.full_path()] : [':'] working_strip = false if strip_bin.found() @@ -49,8 +49,8 @@ pgxs_kv = { 'PORTNAME': portname, 'PG_SYSROOT': pg_sysroot, - 'abs_top_builddir': meson.build_root(), - 'abs_top_srcdir': meson.source_root(), + 'abs_top_builddir': meson.project_build_root(), + 'abs_top_srcdir': meson.project_source_root(), 'enable_rpath': get_option('rpath') ? 'yes' : 'no', 'enable_nls': libintl.found() ? 'yes' : 'no', @@ -123,7 +123,7 @@ pgxs_kv = { if llvm.found() pgxs_kv += { - 'CLANG': clang.path(), + 'CLANG': clang.full_path(), 'CXX': ' '.join(cpp.cmd_array()), 'LLVM_BINPATH': llvm_binpath, } @@ -258,7 +258,7 @@ pgxs_deps = { pgxs_cdata = configuration_data(pgxs_kv) foreach b, p : pgxs_bins - pgxs_cdata.set(b, p.found() ? p.path() : '') + pgxs_cdata.set(b, p.found() ? p.full_path() : '') endforeach foreach pe : pgxs_empty diff --git a/src/makefiles/pgxs.mk b/src/makefiles/pgxs.mk index 0de3737e789..039cee3dfe5 100644 --- a/src/makefiles/pgxs.mk +++ b/src/makefiles/pgxs.mk @@ -376,10 +376,7 @@ endif ifdef REGRESS # things created by various check targets rm -rf $(pg_regress_clean_files) -ifeq ($(PORTNAME), win) - rm -f regress.def endif -endif # REGRESS ifdef TAP_TESTS rm -rf tmp_check/ endif diff --git a/src/pl/plperl/meson.build b/src/pl/plperl/meson.build index b463d4d56c5..7c4081c3460 100644 --- a/src/pl/plperl/meson.build +++ b/src/pl/plperl/meson.build @@ -96,7 +96,7 @@ tests += { 'plperl_transaction', 'plperl_env', ], - 'regress_args': ['--dlpath', meson.build_root() / 'src/test/regress'], + 'regress_args': ['--dlpath', meson.project_build_root() / 'src/test/regress'], }, } diff --git a/src/pl/plpgsql/src/expected/plpgsql_misc.out b/src/pl/plpgsql/src/expected/plpgsql_misc.out index a6511df08ec..ffb377f5f54 100644 --- a/src/pl/plpgsql/src/expected/plpgsql_misc.out +++ b/src/pl/plpgsql/src/expected/plpgsql_misc.out @@ -65,3 +65,39 @@ do $$ declare x public.foo%rowtype; begin end $$; ERROR: relation "public.foo" does not exist CONTEXT: compilation of PL/pgSQL function "inline_code_block" near line 1 do $$ declare x public.misc_table%rowtype; begin end $$; +-- Test handling of an unreserved keyword as a variable name +-- and record field name. +do $$ +declare + execute int; + r record; +begin + execute := 10; + raise notice 'execute = %', execute; + select 1 as strict into r; + raise notice 'r.strict = %', r.strict; +end $$; +NOTICE: execute = 10 +NOTICE: r.strict = 1 +-- Test handling of a reserved keyword as a record field name. +do $$ declare r record; +begin + select 1 as x, 2 as foreach into r; + raise notice 'r.x = %', r.x; + raise notice 'r.foreach = %', r.foreach; -- fails +end $$; +NOTICE: r.x = 1 +ERROR: field name "foreach" is a reserved key word +LINE 1: r.foreach + ^ +HINT: Use double quotes to quote it. +QUERY: r.foreach +CONTEXT: PL/pgSQL function inline_code_block line 5 at RAISE +do $$ declare r record; +begin + select 1 as x, 2 as foreach into r; + raise notice 'r.x = %', r.x; + raise notice 'r."foreach" = %', r."foreach"; -- ok +end $$; +NOTICE: r.x = 1 +NOTICE: r."foreach" = 2 diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c index 519f7695d7c..ee961425a5b 100644 --- a/src/pl/plpgsql/src/pl_comp.c +++ b/src/pl/plpgsql/src/pl_comp.c @@ -226,8 +226,13 @@ plpgsql_compile_callback(FunctionCallInfo fcinfo, /* * All the permanent output of compilation (e.g. parse tree) is kept in a * per-function memory context, so it can be reclaimed easily. + * + * While the func_cxt needs to be long-lived, we initially make it a child + * of the assumed-short-lived caller's context, and reparent it under + * CacheMemoryContext only upon success. This arrangement avoids memory + * leakage during compilation of a faulty function. */ - func_cxt = AllocSetContextCreate(TopMemoryContext, + func_cxt = AllocSetContextCreate(CurrentMemoryContext, "PL/pgSQL function", ALLOCSET_DEFAULT_SIZES); plpgsql_compile_tmp_cxt = MemoryContextSwitchTo(func_cxt); @@ -704,6 +709,11 @@ plpgsql_compile_callback(FunctionCallInfo fcinfo, plpgsql_dumptree(function); /* + * All is well, so make the func_cxt long-lived + */ + MemoryContextSetParent(func_cxt, CacheMemoryContext); + + /* * Pop the error context stack */ error_context_stack = plerrcontext.previous; @@ -1201,17 +1211,22 @@ resolve_column_ref(ParseState *pstate, PLpgSQL_expr *expr, } /* - * We should not get here, because a RECFIELD datum should - * have been built at parse time for every possible qualified - * reference to fields of this record. But if we do, handle - * it like field-not-found: throw error or return NULL. + * Ideally we'd never get here, because a RECFIELD datum + * should have been built at parse time for every qualified + * reference to a field of this record that appears in the + * source text. However, plpgsql_yylex will not build such a + * datum unless the field name lexes as token type IDENT. + * Hence, if the would-be field name is a PL/pgSQL reserved + * word, we lose. Assume that that's what happened and tell + * the user to quote it, unless the caller prefers we just + * return NULL. */ if (error_if_no_field) ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("record \"%s\" has no field \"%s\"", - (nnames_field == 1) ? name1 : name2, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("field name \"%s\" is a reserved key word", colname), + errhint("Use double quotes to quote it."), parser_errposition(pstate, cref->location))); } break; diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c index bb99781c56e..b9acc790dc6 100644 --- a/src/pl/plpgsql/src/pl_exec.c +++ b/src/pl/plpgsql/src/pl_exec.c @@ -5703,7 +5703,7 @@ exec_eval_expr(PLpgSQL_execstate *estate, /* * Else do it the hard way via exec_run_select */ - rc = exec_run_select(estate, expr, 2, NULL); + rc = exec_run_select(estate, expr, 0, NULL); if (rc != SPI_OK_SELECT) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -5757,6 +5757,10 @@ exec_eval_expr(PLpgSQL_execstate *estate, /* ---------- * exec_run_select Execute a select query + * + * Note: passing maxtuples different from 0 ("return all tuples") is + * deprecated because it will prevent parallel execution of the query. + * However, we retain the parameter in case we need it someday. * ---------- */ static int diff --git a/src/pl/plpgsql/src/pl_gram.y b/src/pl/plpgsql/src/pl_gram.y index 5612e66d023..7b672ea5179 100644 --- a/src/pl/plpgsql/src/pl_gram.y +++ b/src/pl/plpgsql/src/pl_gram.y @@ -1368,7 +1368,8 @@ for_control : for_variable K_IN int tok = yylex(&yylval, &yylloc, yyscanner); int tokloc = yylloc; - if (tok == K_EXECUTE) + if (tok_is_keyword(tok, &yylval, + K_EXECUTE, "execute")) { /* EXECUTE means it's a dynamic FOR loop */ PLpgSQL_stmt_dynfors *new; @@ -2135,7 +2136,8 @@ stmt_open : K_OPEN cursor_variable yyerror(&yylloc, NULL, yyscanner, "syntax error, expected \"FOR\""); tok = yylex(&yylval, &yylloc, yyscanner); - if (tok == K_EXECUTE) + if (tok_is_keyword(tok, &yylval, + K_EXECUTE, "execute")) { int endtoken; @@ -2536,6 +2538,7 @@ unreserved_keyword : | K_ERRCODE | K_ERROR | K_EXCEPTION + | K_EXECUTE | K_EXIT | K_FETCH | K_FIRST @@ -2581,6 +2584,7 @@ unreserved_keyword : | K_SLICE | K_SQLSTATE | K_STACKED + | K_STRICT | K_TABLE | K_TABLE_NAME | K_TYPE @@ -3514,7 +3518,8 @@ make_return_query_stmt(int location, YYSTYPE *yylvalp, YYLTYPE *yyllocp, yyscan_ new->stmtid = ++plpgsql_curr_compile->nstatements; /* check for RETURN QUERY EXECUTE */ - if ((tok = yylex(yylvalp, yyllocp, yyscanner)) != K_EXECUTE) + tok = yylex(yylvalp, yyllocp, yyscanner); + if (!tok_is_keyword(tok, yylvalp, K_EXECUTE, "execute")) { /* ordinary static query */ plpgsql_push_back_token(tok, yylvalp, yyllocp, yyscanner); @@ -3597,7 +3602,7 @@ read_into_target(PLpgSQL_variable **target, bool *strict, YYSTYPE *yylvalp, YYLT *strict = false; tok = yylex(yylvalp, yyllocp, yyscanner); - if (strict && tok == K_STRICT) + if (strict && tok_is_keyword(tok, yylvalp, K_STRICT, "strict")) { *strict = true; tok = yylex(yylvalp, yyllocp, yyscanner); diff --git a/src/pl/plpgsql/src/pl_reserved_kwlist.h b/src/pl/plpgsql/src/pl_reserved_kwlist.h index ce7b0c9d331..f3ef2cbd8d7 100644 --- a/src/pl/plpgsql/src/pl_reserved_kwlist.h +++ b/src/pl/plpgsql/src/pl_reserved_kwlist.h @@ -33,7 +33,6 @@ PG_KEYWORD("case", K_CASE) PG_KEYWORD("declare", K_DECLARE) PG_KEYWORD("else", K_ELSE) PG_KEYWORD("end", K_END) -PG_KEYWORD("execute", K_EXECUTE) PG_KEYWORD("for", K_FOR) PG_KEYWORD("foreach", K_FOREACH) PG_KEYWORD("from", K_FROM) @@ -44,7 +43,6 @@ PG_KEYWORD("loop", K_LOOP) PG_KEYWORD("not", K_NOT) PG_KEYWORD("null", K_NULL) PG_KEYWORD("or", K_OR) -PG_KEYWORD("strict", K_STRICT) PG_KEYWORD("then", K_THEN) PG_KEYWORD("to", K_TO) PG_KEYWORD("using", K_USING) diff --git a/src/pl/plpgsql/src/pl_scanner.c b/src/pl/plpgsql/src/pl_scanner.c index d08187dafcb..19825e5c718 100644 --- a/src/pl/plpgsql/src/pl_scanner.c +++ b/src/pl/plpgsql/src/pl_scanner.c @@ -53,7 +53,7 @@ IdentifierLookup plpgsql_IdentifierLookup = IDENTIFIER_LOOKUP_NORMAL; * We try to avoid reserving more keywords than we have to; but there's * little point in not reserving a word if it's reserved in the core grammar. * Currently, the following words are reserved here but not in the core: - * BEGIN BY DECLARE EXECUTE FOREACH IF LOOP STRICT WHILE + * BEGIN BY DECLARE FOREACH IF LOOP WHILE */ /* ScanKeywordList lookup data for PL/pgSQL keywords */ diff --git a/src/pl/plpgsql/src/pl_unreserved_kwlist.h b/src/pl/plpgsql/src/pl_unreserved_kwlist.h index 98f99ec470c..b48c5a645ff 100644 --- a/src/pl/plpgsql/src/pl_unreserved_kwlist.h +++ b/src/pl/plpgsql/src/pl_unreserved_kwlist.h @@ -58,6 +58,7 @@ PG_KEYWORD("elsif", K_ELSIF) PG_KEYWORD("errcode", K_ERRCODE) PG_KEYWORD("error", K_ERROR) PG_KEYWORD("exception", K_EXCEPTION) +PG_KEYWORD("execute", K_EXECUTE) PG_KEYWORD("exit", K_EXIT) PG_KEYWORD("fetch", K_FETCH) PG_KEYWORD("first", K_FIRST) @@ -103,6 +104,7 @@ PG_KEYWORD("scroll", K_SCROLL) PG_KEYWORD("slice", K_SLICE) PG_KEYWORD("sqlstate", K_SQLSTATE) PG_KEYWORD("stacked", K_STACKED) +PG_KEYWORD("strict", K_STRICT) PG_KEYWORD("table", K_TABLE) PG_KEYWORD("table_name", K_TABLE_NAME) PG_KEYWORD("type", K_TYPE) diff --git a/src/pl/plpgsql/src/sql/plpgsql_misc.sql b/src/pl/plpgsql/src/sql/plpgsql_misc.sql index d3a7f703a75..0bc39fcf325 100644 --- a/src/pl/plpgsql/src/sql/plpgsql_misc.sql +++ b/src/pl/plpgsql/src/sql/plpgsql_misc.sql @@ -37,3 +37,32 @@ do $$ declare x foo.bar%rowtype; begin end $$; do $$ declare x foo.bar.baz%rowtype; begin end $$; do $$ declare x public.foo%rowtype; begin end $$; do $$ declare x public.misc_table%rowtype; begin end $$; + +-- Test handling of an unreserved keyword as a variable name +-- and record field name. +do $$ +declare + execute int; + r record; +begin + execute := 10; + raise notice 'execute = %', execute; + select 1 as strict into r; + raise notice 'r.strict = %', r.strict; +end $$; + +-- Test handling of a reserved keyword as a record field name. + +do $$ declare r record; +begin + select 1 as x, 2 as foreach into r; + raise notice 'r.x = %', r.x; + raise notice 'r.foreach = %', r.foreach; -- fails +end $$; + +do $$ declare r record; +begin + select 1 as x, 2 as foreach into r; + raise notice 'r.x = %', r.x; + raise notice 'r."foreach" = %', r."foreach"; -- ok +end $$; diff --git a/src/pl/plpython/expected/README b/src/pl/plpython/expected/README deleted file mode 100644 index 388c553a589..00000000000 --- a/src/pl/plpython/expected/README +++ /dev/null @@ -1,3 +0,0 @@ -Guide to alternative expected files: - -plpython_error_5.out Python 3.5 and newer diff --git a/src/pl/plpython/expected/plpython_error.out b/src/pl/plpython/expected/plpython_error.out index 68722b00097..fd9cd73be74 100644 --- a/src/pl/plpython/expected/plpython_error.out +++ b/src/pl/plpython/expected/plpython_error.out @@ -243,7 +243,7 @@ $$ plpy.nonexistent $$ LANGUAGE plpython3u; SELECT toplevel_attribute_error(); -ERROR: AttributeError: 'module' object has no attribute 'nonexistent' +ERROR: AttributeError: module 'plpy' has no attribute 'nonexistent' CONTEXT: Traceback (most recent call last): PL/Python function "toplevel_attribute_error", line 2, in <module> plpy.nonexistent diff --git a/src/pl/plpython/expected/plpython_error_5.out b/src/pl/plpython/expected/plpython_error_5.out deleted file mode 100644 index fd9cd73be74..00000000000 --- a/src/pl/plpython/expected/plpython_error_5.out +++ /dev/null @@ -1,460 +0,0 @@ --- test error handling, i forgot to restore Warn_restart in --- the trigger handler once. the errors and subsequent core dump were --- interesting. -/* Flat out Python syntax error - */ -CREATE FUNCTION python_syntax_error() RETURNS text - AS -'.syntaxerror' - LANGUAGE plpython3u; -ERROR: could not compile PL/Python function "python_syntax_error" -DETAIL: SyntaxError: invalid syntax (<string>, line 2) -/* With check_function_bodies = false the function should get defined - * and the error reported when called - */ -SET check_function_bodies = false; -CREATE FUNCTION python_syntax_error() RETURNS text - AS -'.syntaxerror' - LANGUAGE plpython3u; -SELECT python_syntax_error(); -ERROR: could not compile PL/Python function "python_syntax_error" -DETAIL: SyntaxError: invalid syntax (<string>, line 2) -/* Run the function twice to check if the hashtable entry gets cleaned up */ -SELECT python_syntax_error(); -ERROR: could not compile PL/Python function "python_syntax_error" -DETAIL: SyntaxError: invalid syntax (<string>, line 2) -RESET check_function_bodies; -/* Flat out syntax error - */ -CREATE FUNCTION sql_syntax_error() RETURNS text - AS -'plpy.execute("syntax error")' - LANGUAGE plpython3u; -SELECT sql_syntax_error(); -ERROR: spiexceptions.SyntaxError: syntax error at or near "syntax" -LINE 1: syntax error - ^ -QUERY: syntax error -CONTEXT: Traceback (most recent call last): - PL/Python function "sql_syntax_error", line 1, in <module> - plpy.execute("syntax error") -PL/Python function "sql_syntax_error" -/* check the handling of uncaught python exceptions - */ -CREATE FUNCTION exception_index_invalid(text) RETURNS text - AS -'return args[1]' - LANGUAGE plpython3u; -SELECT exception_index_invalid('test'); -ERROR: IndexError: list index out of range -CONTEXT: Traceback (most recent call last): - PL/Python function "exception_index_invalid", line 1, in <module> - return args[1] -PL/Python function "exception_index_invalid" -/* check handling of nested exceptions - */ -CREATE FUNCTION exception_index_invalid_nested() RETURNS text - AS -'rv = plpy.execute("SELECT test5(''foo'')") -return rv[0]' - LANGUAGE plpython3u; -SELECT exception_index_invalid_nested(); -ERROR: spiexceptions.UndefinedFunction: function test5(unknown) does not exist -LINE 1: SELECT test5('foo') - ^ -HINT: No function matches the given name and argument types. You might need to add explicit type casts. -QUERY: SELECT test5('foo') -CONTEXT: Traceback (most recent call last): - PL/Python function "exception_index_invalid_nested", line 1, in <module> - rv = plpy.execute("SELECT test5('foo')") -PL/Python function "exception_index_invalid_nested" -/* a typo - */ -CREATE FUNCTION invalid_type_uncaught(a text) RETURNS text - AS -'if "plan" not in SD: - q = "SELECT fname FROM users WHERE lname = $1" - SD["plan"] = plpy.prepare(q, [ "test" ]) -rv = plpy.execute(SD["plan"], [ a ]) -if len(rv): - return rv[0]["fname"] -return None -' - LANGUAGE plpython3u; -SELECT invalid_type_uncaught('rick'); -ERROR: spiexceptions.UndefinedObject: type "test" does not exist -CONTEXT: Traceback (most recent call last): - PL/Python function "invalid_type_uncaught", line 3, in <module> - SD["plan"] = plpy.prepare(q, [ "test" ]) -PL/Python function "invalid_type_uncaught" -/* for what it's worth catch the exception generated by - * the typo, and return None - */ -CREATE FUNCTION invalid_type_caught(a text) RETURNS text - AS -'if "plan" not in SD: - q = "SELECT fname FROM users WHERE lname = $1" - try: - SD["plan"] = plpy.prepare(q, [ "test" ]) - except plpy.SPIError as ex: - plpy.notice(str(ex)) - return None -rv = plpy.execute(SD["plan"], [ a ]) -if len(rv): - return rv[0]["fname"] -return None -' - LANGUAGE plpython3u; -SELECT invalid_type_caught('rick'); -NOTICE: type "test" does not exist - invalid_type_caught ---------------------- - -(1 row) - -/* for what it's worth catch the exception generated by - * the typo, and reraise it as a plain error - */ -CREATE FUNCTION invalid_type_reraised(a text) RETURNS text - AS -'if "plan" not in SD: - q = "SELECT fname FROM users WHERE lname = $1" - try: - SD["plan"] = plpy.prepare(q, [ "test" ]) - except plpy.SPIError as ex: - plpy.error(str(ex)) -rv = plpy.execute(SD["plan"], [ a ]) -if len(rv): - return rv[0]["fname"] -return None -' - LANGUAGE plpython3u; -SELECT invalid_type_reraised('rick'); -ERROR: plpy.Error: type "test" does not exist -CONTEXT: Traceback (most recent call last): - PL/Python function "invalid_type_reraised", line 6, in <module> - plpy.error(str(ex)) -PL/Python function "invalid_type_reraised" -/* no typo no messing about - */ -CREATE FUNCTION valid_type(a text) RETURNS text - AS -'if "plan" not in SD: - SD["plan"] = plpy.prepare("SELECT fname FROM users WHERE lname = $1", [ "text" ]) -rv = plpy.execute(SD["plan"], [ a ]) -if len(rv): - return rv[0]["fname"] -return None -' - LANGUAGE plpython3u; -SELECT valid_type('rick'); - valid_type ------------- - -(1 row) - -/* error in nested functions to get a traceback -*/ -CREATE FUNCTION nested_error() RETURNS text - AS -'def fun1(): - plpy.error("boom") - -def fun2(): - fun1() - -def fun3(): - fun2() - -fun3() -return "not reached" -' - LANGUAGE plpython3u; -SELECT nested_error(); -ERROR: plpy.Error: boom -CONTEXT: Traceback (most recent call last): - PL/Python function "nested_error", line 10, in <module> - fun3() - PL/Python function "nested_error", line 8, in fun3 - fun2() - PL/Python function "nested_error", line 5, in fun2 - fun1() - PL/Python function "nested_error", line 2, in fun1 - plpy.error("boom") -PL/Python function "nested_error" -/* raising plpy.Error is just like calling plpy.error -*/ -CREATE FUNCTION nested_error_raise() RETURNS text - AS -'def fun1(): - raise plpy.Error("boom") - -def fun2(): - fun1() - -def fun3(): - fun2() - -fun3() -return "not reached" -' - LANGUAGE plpython3u; -SELECT nested_error_raise(); -ERROR: plpy.Error: boom -CONTEXT: Traceback (most recent call last): - PL/Python function "nested_error_raise", line 10, in <module> - fun3() - PL/Python function "nested_error_raise", line 8, in fun3 - fun2() - PL/Python function "nested_error_raise", line 5, in fun2 - fun1() - PL/Python function "nested_error_raise", line 2, in fun1 - raise plpy.Error("boom") -PL/Python function "nested_error_raise" -/* using plpy.warning should not produce a traceback -*/ -CREATE FUNCTION nested_warning() RETURNS text - AS -'def fun1(): - plpy.warning("boom") - -def fun2(): - fun1() - -def fun3(): - fun2() - -fun3() -return "you''ve been warned" -' - LANGUAGE plpython3u; -SELECT nested_warning(); -WARNING: boom - nested_warning --------------------- - you've been warned -(1 row) - -/* AttributeError at toplevel used to give segfaults with the traceback -*/ -CREATE FUNCTION toplevel_attribute_error() RETURNS void AS -$$ -plpy.nonexistent -$$ LANGUAGE plpython3u; -SELECT toplevel_attribute_error(); -ERROR: AttributeError: module 'plpy' has no attribute 'nonexistent' -CONTEXT: Traceback (most recent call last): - PL/Python function "toplevel_attribute_error", line 2, in <module> - plpy.nonexistent -PL/Python function "toplevel_attribute_error" -/* Calling PL/Python functions from SQL and vice versa should not lose context. - */ -CREATE OR REPLACE FUNCTION python_traceback() RETURNS void AS $$ -def first(): - second() - -def second(): - third() - -def third(): - plpy.execute("select sql_error()") - -first() -$$ LANGUAGE plpython3u; -CREATE OR REPLACE FUNCTION sql_error() RETURNS void AS $$ -begin - select 1/0; -end -$$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION python_from_sql_error() RETURNS void AS $$ -begin - select python_traceback(); -end -$$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION sql_from_python_error() RETURNS void AS $$ -plpy.execute("select sql_error()") -$$ LANGUAGE plpython3u; -SELECT python_traceback(); -ERROR: spiexceptions.DivisionByZero: division by zero -CONTEXT: Traceback (most recent call last): - PL/Python function "python_traceback", line 11, in <module> - first() - PL/Python function "python_traceback", line 3, in first - second() - PL/Python function "python_traceback", line 6, in second - third() - PL/Python function "python_traceback", line 9, in third - plpy.execute("select sql_error()") -PL/Python function "python_traceback" -SELECT sql_error(); -ERROR: division by zero -CONTEXT: SQL statement "select 1/0" -PL/pgSQL function sql_error() line 3 at SQL statement -SELECT python_from_sql_error(); -ERROR: spiexceptions.DivisionByZero: division by zero -CONTEXT: Traceback (most recent call last): - PL/Python function "python_traceback", line 11, in <module> - first() - PL/Python function "python_traceback", line 3, in first - second() - PL/Python function "python_traceback", line 6, in second - third() - PL/Python function "python_traceback", line 9, in third - plpy.execute("select sql_error()") -PL/Python function "python_traceback" -SQL statement "select python_traceback()" -PL/pgSQL function python_from_sql_error() line 3 at SQL statement -SELECT sql_from_python_error(); -ERROR: spiexceptions.DivisionByZero: division by zero -CONTEXT: Traceback (most recent call last): - PL/Python function "sql_from_python_error", line 2, in <module> - plpy.execute("select sql_error()") -PL/Python function "sql_from_python_error" -/* check catching specific types of exceptions - */ -CREATE TABLE specific ( - i integer PRIMARY KEY -); -CREATE FUNCTION specific_exception(i integer) RETURNS void AS -$$ -from plpy import spiexceptions -try: - plpy.execute("insert into specific values (%s)" % (i or "NULL")); -except spiexceptions.NotNullViolation as e: - plpy.notice("Violated the NOT NULL constraint, sqlstate %s" % e.sqlstate) -except spiexceptions.UniqueViolation as e: - plpy.notice("Violated the UNIQUE constraint, sqlstate %s" % e.sqlstate) -$$ LANGUAGE plpython3u; -SELECT specific_exception(2); - specific_exception --------------------- - -(1 row) - -SELECT specific_exception(NULL); -NOTICE: Violated the NOT NULL constraint, sqlstate 23502 - specific_exception --------------------- - -(1 row) - -SELECT specific_exception(2); -NOTICE: Violated the UNIQUE constraint, sqlstate 23505 - specific_exception --------------------- - -(1 row) - -/* SPI errors in PL/Python functions should preserve the SQLSTATE value - */ -CREATE FUNCTION python_unique_violation() RETURNS void AS $$ -plpy.execute("insert into specific values (1)") -plpy.execute("insert into specific values (1)") -$$ LANGUAGE plpython3u; -CREATE FUNCTION catch_python_unique_violation() RETURNS text AS $$ -begin - begin - perform python_unique_violation(); - exception when unique_violation then - return 'ok'; - end; - return 'not reached'; -end; -$$ language plpgsql; -SELECT catch_python_unique_violation(); - catch_python_unique_violation -------------------------------- - ok -(1 row) - -/* manually starting subtransactions - a bad idea - */ -CREATE FUNCTION manual_subxact() RETURNS void AS $$ -plpy.execute("savepoint save") -plpy.execute("create table foo(x integer)") -plpy.execute("rollback to save") -$$ LANGUAGE plpython3u; -SELECT manual_subxact(); -ERROR: plpy.SPIError: SPI_execute failed: SPI_ERROR_TRANSACTION -CONTEXT: Traceback (most recent call last): - PL/Python function "manual_subxact", line 2, in <module> - plpy.execute("savepoint save") -PL/Python function "manual_subxact" -/* same for prepared plans - */ -CREATE FUNCTION manual_subxact_prepared() RETURNS void AS $$ -save = plpy.prepare("savepoint save") -rollback = plpy.prepare("rollback to save") -plpy.execute(save) -plpy.execute("create table foo(x integer)") -plpy.execute(rollback) -$$ LANGUAGE plpython3u; -SELECT manual_subxact_prepared(); -ERROR: plpy.SPIError: SPI_execute_plan failed: SPI_ERROR_TRANSACTION -CONTEXT: Traceback (most recent call last): - PL/Python function "manual_subxact_prepared", line 4, in <module> - plpy.execute(save) -PL/Python function "manual_subxact_prepared" -/* raising plpy.spiexception.* from python code should preserve sqlstate - */ -CREATE FUNCTION plpy_raise_spiexception() RETURNS void AS $$ -raise plpy.spiexceptions.DivisionByZero() -$$ LANGUAGE plpython3u; -DO $$ -BEGIN - SELECT plpy_raise_spiexception(); -EXCEPTION WHEN division_by_zero THEN - -- NOOP -END -$$ LANGUAGE plpgsql; -/* setting a custom sqlstate should be handled - */ -CREATE FUNCTION plpy_raise_spiexception_override() RETURNS void AS $$ -exc = plpy.spiexceptions.DivisionByZero() -exc.sqlstate = 'SILLY' -raise exc -$$ LANGUAGE plpython3u; -DO $$ -BEGIN - SELECT plpy_raise_spiexception_override(); -EXCEPTION WHEN SQLSTATE 'SILLY' THEN - -- NOOP -END -$$ LANGUAGE plpgsql; -/* test the context stack trace for nested execution levels - */ -CREATE FUNCTION notice_innerfunc() RETURNS int AS $$ -plpy.execute("DO LANGUAGE plpython3u $x$ plpy.notice('inside DO') $x$") -return 1 -$$ LANGUAGE plpython3u; -CREATE FUNCTION notice_outerfunc() RETURNS int AS $$ -plpy.execute("SELECT notice_innerfunc()") -return 1 -$$ LANGUAGE plpython3u; -\set SHOW_CONTEXT always -SELECT notice_outerfunc(); -NOTICE: inside DO -CONTEXT: PL/Python anonymous code block -SQL statement "DO LANGUAGE plpython3u $x$ plpy.notice('inside DO') $x$" -PL/Python function "notice_innerfunc" -SQL statement "SELECT notice_innerfunc()" -PL/Python function "notice_outerfunc" - notice_outerfunc ------------------- - 1 -(1 row) - -/* test error logged with an underlying exception that includes a detail - * string (bug #18070). - */ -CREATE FUNCTION python_error_detail() RETURNS SETOF text AS $$ - plan = plpy.prepare("SELECT to_date('xy', 'DD') d") - for row in plpy.cursor(plan): - yield row['d'] -$$ LANGUAGE plpython3u; -SELECT python_error_detail(); -ERROR: error fetching next item from iterator -DETAIL: spiexceptions.InvalidDatetimeFormat: invalid value "xy" for "DD" -CONTEXT: Traceback (most recent call last): -PL/Python function "python_error_detail" diff --git a/src/pl/plpython/plpy_cursorobject.c b/src/pl/plpython/plpy_cursorobject.c index 37d7efca77c..cc74c4df6ba 100644 --- a/src/pl/plpython/plpy_cursorobject.c +++ b/src/pl/plpython/plpy_cursorobject.c @@ -58,9 +58,9 @@ static PyType_Slot PLyCursor_slots[] = static PyType_Spec PLyCursor_spec = { .name = "PLyCursor", - .basicsize = sizeof(PLyCursorObject), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, - .slots = PLyCursor_slots, + .basicsize = sizeof(PLyCursorObject), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, + .slots = PLyCursor_slots, }; static PyTypeObject *PLy_CursorType; diff --git a/src/pl/plpython/plpy_elog.c b/src/pl/plpython/plpy_elog.c index ddf3573f0e7..f6d10045e5c 100644 --- a/src/pl/plpython/plpy_elog.c +++ b/src/pl/plpython/plpy_elog.c @@ -18,7 +18,8 @@ PyObject *PLy_exc_spi_error = NULL; static void PLy_traceback(PyObject *e, PyObject *v, PyObject *tb, - char **xmsg, char **tbmsg, int *tb_depth); + char *volatile *xmsg, char *volatile *tbmsg, + int *tb_depth); static void PLy_get_spi_error_data(PyObject *exc, int *sqlerrcode, char **detail, char **hint, char **query, int *position, char **schema_name, char **table_name, char **column_name, @@ -43,78 +44,82 @@ void PLy_elog_impl(int elevel, const char *fmt,...) { int save_errno = errno; - char *xmsg; - char *tbmsg; + char *volatile xmsg = NULL; + char *volatile tbmsg = NULL; int tb_depth; StringInfoData emsg; PyObject *exc, *val, *tb; - const char *primary = NULL; - int sqlerrcode = 0; - char *detail = NULL; - char *hint = NULL; - char *query = NULL; - int position = 0; - char *schema_name = NULL; - char *table_name = NULL; - char *column_name = NULL; - char *datatype_name = NULL; - char *constraint_name = NULL; + + /* If we'll need emsg, must initialize it before entering PG_TRY */ + if (fmt) + initStringInfo(&emsg); PyErr_Fetch(&exc, &val, &tb); - if (exc != NULL) + /* Use a PG_TRY block to ensure we release the PyObjects just acquired */ + PG_TRY(); { - PyErr_NormalizeException(&exc, &val, &tb); - - if (PyErr_GivenExceptionMatches(val, PLy_exc_spi_error)) - PLy_get_spi_error_data(val, &sqlerrcode, - &detail, &hint, &query, &position, + const char *primary = NULL; + int sqlerrcode = 0; + char *detail = NULL; + char *hint = NULL; + char *query = NULL; + int position = 0; + char *schema_name = NULL; + char *table_name = NULL; + char *column_name = NULL; + char *datatype_name = NULL; + char *constraint_name = NULL; + + if (exc != NULL) + { + PyErr_NormalizeException(&exc, &val, &tb); + + if (PyErr_GivenExceptionMatches(val, PLy_exc_spi_error)) + PLy_get_spi_error_data(val, &sqlerrcode, + &detail, &hint, &query, &position, + &schema_name, &table_name, &column_name, + &datatype_name, &constraint_name); + else if (PyErr_GivenExceptionMatches(val, PLy_exc_error)) + PLy_get_error_data(val, &sqlerrcode, &detail, &hint, &schema_name, &table_name, &column_name, &datatype_name, &constraint_name); - else if (PyErr_GivenExceptionMatches(val, PLy_exc_error)) - PLy_get_error_data(val, &sqlerrcode, &detail, &hint, - &schema_name, &table_name, &column_name, - &datatype_name, &constraint_name); - else if (PyErr_GivenExceptionMatches(val, PLy_exc_fatal)) - elevel = FATAL; - } + else if (PyErr_GivenExceptionMatches(val, PLy_exc_fatal)) + elevel = FATAL; + } - /* this releases our refcount on tb! */ - PLy_traceback(exc, val, tb, - &xmsg, &tbmsg, &tb_depth); + PLy_traceback(exc, val, tb, + &xmsg, &tbmsg, &tb_depth); - if (fmt) - { - initStringInfo(&emsg); - for (;;) + if (fmt) { - va_list ap; - int needed; - - errno = save_errno; - va_start(ap, fmt); - needed = appendStringInfoVA(&emsg, dgettext(TEXTDOMAIN, fmt), ap); - va_end(ap); - if (needed == 0) - break; - enlargeStringInfo(&emsg, needed); - } - primary = emsg.data; + for (;;) + { + va_list ap; + int needed; + + errno = save_errno; + va_start(ap, fmt); + needed = appendStringInfoVA(&emsg, dgettext(TEXTDOMAIN, fmt), ap); + va_end(ap); + if (needed == 0) + break; + enlargeStringInfo(&emsg, needed); + } + primary = emsg.data; - /* If there's an exception message, it goes in the detail. */ - if (xmsg) - detail = xmsg; - } - else - { - if (xmsg) - primary = xmsg; - } + /* If there's an exception message, it goes in the detail. */ + if (xmsg) + detail = xmsg; + } + else + { + if (xmsg) + primary = xmsg; + } - PG_TRY(); - { ereport(elevel, (errcode(sqlerrcode ? sqlerrcode : ERRCODE_EXTERNAL_ROUTINE_EXCEPTION), errmsg_internal("%s", primary ? primary : "no exception data"), @@ -136,14 +141,23 @@ PLy_elog_impl(int elevel, const char *fmt,...) } PG_FINALLY(); { + Py_XDECREF(exc); + Py_XDECREF(val); + /* Must release all the objects in the traceback stack */ + while (tb != NULL && tb != Py_None) + { + PyObject *tb_prev = tb; + + tb = PyObject_GetAttrString(tb, "tb_next"); + Py_DECREF(tb_prev); + } + /* For neatness' sake, also release our string buffers */ if (fmt) pfree(emsg.data); if (xmsg) pfree(xmsg); if (tbmsg) pfree(tbmsg); - Py_XDECREF(exc); - Py_XDECREF(val); } PG_END_TRY(); } @@ -154,21 +168,14 @@ PLy_elog_impl(int elevel, const char *fmt,...) * The exception error message is returned in xmsg, the traceback in * tbmsg (both as palloc'd strings) and the traceback depth in * tb_depth. - * - * We release refcounts on all the Python objects in the traceback stack, - * but not on e or v. */ static void PLy_traceback(PyObject *e, PyObject *v, PyObject *tb, - char **xmsg, char **tbmsg, int *tb_depth) + char *volatile *xmsg, char *volatile *tbmsg, int *tb_depth) { - PyObject *e_type_o; - PyObject *e_module_o; - char *e_type_s = NULL; - char *e_module_s = NULL; - PyObject *vob = NULL; - char *vstr; - StringInfoData xstr; + PyObject *volatile e_type_o = NULL; + PyObject *volatile e_module_o = NULL; + PyObject *volatile vob = NULL; StringInfoData tbstr; /* @@ -186,47 +193,59 @@ PLy_traceback(PyObject *e, PyObject *v, PyObject *tb, /* * Format the exception and its value and put it in xmsg. */ - - e_type_o = PyObject_GetAttrString(e, "__name__"); - e_module_o = PyObject_GetAttrString(e, "__module__"); - if (e_type_o) - e_type_s = PLyUnicode_AsString(e_type_o); - if (e_type_s) - e_module_s = PLyUnicode_AsString(e_module_o); - - if (v && ((vob = PyObject_Str(v)) != NULL)) - vstr = PLyUnicode_AsString(vob); - else - vstr = "unknown"; - - initStringInfo(&xstr); - if (!e_type_s || !e_module_s) + PG_TRY(); { - /* shouldn't happen */ - appendStringInfoString(&xstr, "unrecognized exception"); + char *e_type_s = NULL; + char *e_module_s = NULL; + const char *vstr; + StringInfoData xstr; + + e_type_o = PyObject_GetAttrString(e, "__name__"); + e_module_o = PyObject_GetAttrString(e, "__module__"); + if (e_type_o) + e_type_s = PLyUnicode_AsString(e_type_o); + if (e_module_o) + e_module_s = PLyUnicode_AsString(e_module_o); + + if (v && ((vob = PyObject_Str(v)) != NULL)) + vstr = PLyUnicode_AsString(vob); + else + vstr = "unknown"; + + initStringInfo(&xstr); + if (!e_type_s || !e_module_s) + { + /* shouldn't happen */ + appendStringInfoString(&xstr, "unrecognized exception"); + } + /* mimics behavior of traceback.format_exception_only */ + else if (strcmp(e_module_s, "builtins") == 0 + || strcmp(e_module_s, "__main__") == 0 + || strcmp(e_module_s, "exceptions") == 0) + appendStringInfoString(&xstr, e_type_s); + else + appendStringInfo(&xstr, "%s.%s", e_module_s, e_type_s); + appendStringInfo(&xstr, ": %s", vstr); + + *xmsg = xstr.data; } - /* mimics behavior of traceback.format_exception_only */ - else if (strcmp(e_module_s, "builtins") == 0 - || strcmp(e_module_s, "__main__") == 0 - || strcmp(e_module_s, "exceptions") == 0) - appendStringInfoString(&xstr, e_type_s); - else - appendStringInfo(&xstr, "%s.%s", e_module_s, e_type_s); - appendStringInfo(&xstr, ": %s", vstr); - - *xmsg = xstr.data; + PG_FINALLY(); + { + Py_XDECREF(e_type_o); + Py_XDECREF(e_module_o); + Py_XDECREF(vob); + } + PG_END_TRY(); /* * Now format the traceback and put it in tbmsg. */ - *tb_depth = 0; initStringInfo(&tbstr); /* Mimic Python traceback reporting as close as possible. */ appendStringInfoString(&tbstr, "Traceback (most recent call last):"); while (tb != NULL && tb != Py_None) { - PyObject *volatile tb_prev = NULL; PyObject *volatile frame = NULL; PyObject *volatile code = NULL; PyObject *volatile name = NULL; @@ -254,84 +273,74 @@ PLy_traceback(PyObject *e, PyObject *v, PyObject *tb, filename = PyObject_GetAttrString(code, "co_filename"); if (filename == NULL) elog(ERROR, "could not get file name from Python code object"); + + /* The first frame always points at <module>, skip it. */ + if (*tb_depth > 0) + { + PLyExecutionContext *exec_ctx = PLy_current_execution_context(); + char *proname; + char *fname; + char *line; + char *plain_filename; + long plain_lineno; + + /* + * The second frame points at the internal function, but to + * mimic Python error reporting we want to say <module>. + */ + if (*tb_depth == 1) + fname = "<module>"; + else + fname = PLyUnicode_AsString(name); + + proname = PLy_procedure_name(exec_ctx->curr_proc); + plain_filename = PLyUnicode_AsString(filename); + plain_lineno = PyLong_AsLong(lineno); + + if (proname == NULL) + appendStringInfo(&tbstr, "\n PL/Python anonymous code block, line %ld, in %s", + plain_lineno - 1, fname); + else + appendStringInfo(&tbstr, "\n PL/Python function \"%s\", line %ld, in %s", + proname, plain_lineno - 1, fname); + + /* + * function code object was compiled with "<string>" as the + * filename + */ + if (exec_ctx->curr_proc && plain_filename != NULL && + strcmp(plain_filename, "<string>") == 0) + { + /* + * If we know the current procedure, append the exact line + * from the source, again mimicking Python's traceback.py + * module behavior. We could store the already line-split + * source to avoid splitting it every time, but producing + * a traceback is not the most important scenario to + * optimize for. But we do not go as far as traceback.py + * in reading the source of imported modules. + */ + line = get_source_line(exec_ctx->curr_proc->src, plain_lineno); + if (line) + { + appendStringInfo(&tbstr, "\n %s", line); + pfree(line); + } + } + } } - PG_CATCH(); + PG_FINALLY(); { Py_XDECREF(frame); Py_XDECREF(code); Py_XDECREF(name); Py_XDECREF(lineno); Py_XDECREF(filename); - PG_RE_THROW(); } PG_END_TRY(); - /* The first frame always points at <module>, skip it. */ - if (*tb_depth > 0) - { - PLyExecutionContext *exec_ctx = PLy_current_execution_context(); - char *proname; - char *fname; - char *line; - char *plain_filename; - long plain_lineno; - - /* - * The second frame points at the internal function, but to mimic - * Python error reporting we want to say <module>. - */ - if (*tb_depth == 1) - fname = "<module>"; - else - fname = PLyUnicode_AsString(name); - - proname = PLy_procedure_name(exec_ctx->curr_proc); - plain_filename = PLyUnicode_AsString(filename); - plain_lineno = PyLong_AsLong(lineno); - - if (proname == NULL) - appendStringInfo(&tbstr, "\n PL/Python anonymous code block, line %ld, in %s", - plain_lineno - 1, fname); - else - appendStringInfo(&tbstr, "\n PL/Python function \"%s\", line %ld, in %s", - proname, plain_lineno - 1, fname); - - /* - * function code object was compiled with "<string>" as the - * filename - */ - if (exec_ctx->curr_proc && plain_filename != NULL && - strcmp(plain_filename, "<string>") == 0) - { - /* - * If we know the current procedure, append the exact line - * from the source, again mimicking Python's traceback.py - * module behavior. We could store the already line-split - * source to avoid splitting it every time, but producing a - * traceback is not the most important scenario to optimize - * for. But we do not go as far as traceback.py in reading - * the source of imported modules. - */ - line = get_source_line(exec_ctx->curr_proc->src, plain_lineno); - if (line) - { - appendStringInfo(&tbstr, "\n %s", line); - pfree(line); - } - } - } - - Py_DECREF(frame); - Py_DECREF(code); - Py_DECREF(name); - Py_DECREF(lineno); - Py_DECREF(filename); - - /* Release the current frame and go to the next one. */ - tb_prev = tb; + /* Advance to the next frame. */ tb = PyObject_GetAttrString(tb, "tb_next"); - Assert(tb_prev != Py_None); - Py_DECREF(tb_prev); if (tb == NULL) elog(ERROR, "could not traverse Python traceback"); (*tb_depth)++; @@ -339,10 +348,6 @@ PLy_traceback(PyObject *e, PyObject *v, PyObject *tb, /* Return the traceback. */ *tbmsg = tbstr.data; - - Py_XDECREF(e_type_o); - Py_XDECREF(e_module_o); - Py_XDECREF(vob); } /* diff --git a/src/pl/plpython/plpy_planobject.c b/src/pl/plpython/plpy_planobject.c index 6044893afdd..edfb76c8770 100644 --- a/src/pl/plpython/plpy_planobject.c +++ b/src/pl/plpython/plpy_planobject.c @@ -45,9 +45,9 @@ static PyType_Slot PLyPlan_slots[] = static PyType_Spec PLyPlan_spec = { .name = "PLyPlan", - .basicsize = sizeof(PLyPlanObject), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, - .slots = PLyPlan_slots, + .basicsize = sizeof(PLyPlanObject), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, + .slots = PLyPlan_slots, }; static PyTypeObject *PLy_PlanType; diff --git a/src/pl/plpython/plpy_resultobject.c b/src/pl/plpython/plpy_resultobject.c index 0d9997cbaa3..d433929b360 100644 --- a/src/pl/plpython/plpy_resultobject.c +++ b/src/pl/plpython/plpy_resultobject.c @@ -70,9 +70,9 @@ static PyType_Slot PLyResult_slots[] = static PyType_Spec PLyResult_spec = { .name = "PLyResult", - .basicsize = sizeof(PLyResultObject), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, - .slots = PLyResult_slots, + .basicsize = sizeof(PLyResultObject), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, + .slots = PLyResult_slots, }; static PyTypeObject *PLy_ResultType; diff --git a/src/pl/plpython/plpy_subxactobject.c b/src/pl/plpython/plpy_subxactobject.c index c2484a99b4a..c225b652ab4 100644 --- a/src/pl/plpython/plpy_subxactobject.c +++ b/src/pl/plpython/plpy_subxactobject.c @@ -46,9 +46,9 @@ static PyType_Slot PLySubtransaction_slots[] = static PyType_Spec PLySubtransaction_spec = { .name = "PLySubtransaction", - .basicsize = sizeof(PLySubtransactionObject), - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, - .slots = PLySubtransaction_slots, + .basicsize = sizeof(PLySubtransactionObject), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, + .slots = PLySubtransaction_slots, }; static PyTypeObject *PLy_SubtransactionType; diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c index 9af3474a6ca..1a717255355 100644 --- a/src/port/pg_crc32c_sse42.c +++ b/src/port/pg_crc32c_sse42.c @@ -123,7 +123,7 @@ pg_comp_crc32c_avx512(pg_crc32c crc, const void *data, size_t len) __m512i k; k = _mm512_broadcast_i32x4(_mm_setr_epi32(0x740eef02, 0, 0x9e4addf8, 0)); - x0 = _mm512_xor_si512(_mm512_castsi128_si512(_mm_cvtsi32_si128(crc0)), x0); + x0 = _mm512_xor_si512(_mm512_zextsi128_si512(_mm_cvtsi32_si128(crc0)), x0); buf += 64; /* Main loop. */ diff --git a/src/port/pg_numa.c b/src/port/pg_numa.c index 4b487a2a4e8..3368a43a338 100644 --- a/src/port/pg_numa.c +++ b/src/port/pg_numa.c @@ -16,6 +16,7 @@ #include "c.h" #include <unistd.h> +#include "miscadmin.h" #include "port/pg_numa.h" /* @@ -29,6 +30,19 @@ #include <numa.h> #include <numaif.h> +/* + * numa_move_pages() chunk size, has to be <= 16 to work around a kernel bug + * in do_pages_stat() (chunked by DO_PAGES_STAT_CHUNK_NR). By using the same + * chunk size, we make it work even on unfixed kernels. + * + * 64-bit system are not affected by the bug, and so use much larger chunks. + */ +#if SIZEOF_SIZE_T == 4 +#define NUMA_QUERY_CHUNK_SIZE 16 +#else +#define NUMA_QUERY_CHUNK_SIZE 1024 +#endif + /* libnuma requires initialization as per numa(3) on Linux */ int pg_numa_init(void) @@ -42,11 +56,48 @@ pg_numa_init(void) * We use move_pages(2) syscall here - instead of get_mempolicy(2) - as the * first one allows us to batch and query about many memory pages in one single * giant system call that is way faster. + * + * We call numa_move_pages() for smaller chunks of the whole array. The first + * reason is to work around a kernel bug, but also to allow interrupting the + * query between the calls (for many pointers processing the whole array can + * take a lot of time). */ int pg_numa_query_pages(int pid, unsigned long count, void **pages, int *status) { - return numa_move_pages(pid, count, pages, NULL, status, 0); + unsigned long next = 0; + int ret = 0; + + /* + * Chunk pointers passed to numa_move_pages to NUMA_QUERY_CHUNK_SIZE + * items, to work around a kernel bug in do_pages_stat(). + */ + while (next < count) + { + unsigned long count_chunk = Min(count - next, + NUMA_QUERY_CHUNK_SIZE); + + CHECK_FOR_INTERRUPTS(); + + /* + * Bail out if any of the chunks errors out (ret<0). We ignore (ret>0) + * which is used to return number of nonmigrated pages, but we're not + * migrating any pages here. + */ + ret = numa_move_pages(pid, count_chunk, &pages[next], NULL, &status[next], 0); + if (ret < 0) + { + /* plain error, return as is */ + return ret; + } + + next += count_chunk; + } + + /* should have consumed the input array exactly */ + Assert(next == count); + + return 0; } int diff --git a/src/test/authentication/t/001_password.pl b/src/test/authentication/t/001_password.pl index 37d96d95a1a..a16e9a563f3 100644 --- a/src/test/authentication/t/001_password.pl +++ b/src/test/authentication/t/001_password.pl @@ -79,39 +79,40 @@ $node->start; # other tests are added to this file in the future $node->safe_psql('postgres', "CREATE DATABASE test_log_connections"); -my $log_connections = $node->safe_psql('test_log_connections', q(SHOW log_connections;)); +my $log_connections = + $node->safe_psql('test_log_connections', q(SHOW log_connections;)); is($log_connections, 'on', qq(check log connections has expected value 'on')); -$node->connect_ok('test_log_connections', +$node->connect_ok( + 'test_log_connections', qq(log_connections 'on' works as expected for backwards compatibility), log_like => [ qr/connection received/, qr/connection authenticated/, qr/connection authorized: user=\S+ database=test_log_connections/, ], - log_unlike => [ - qr/connection ready/, - ],); + log_unlike => [ qr/connection ready/, ],); -$node->safe_psql('test_log_connections', +$node->safe_psql( + 'test_log_connections', q[ALTER SYSTEM SET log_connections = receipt,authorization,setup_durations; SELECT pg_reload_conf();]); -$node->connect_ok('test_log_connections', +$node->connect_ok( + 'test_log_connections', q(log_connections with subset of specified options logs only those aspects), log_like => [ qr/connection received/, qr/connection authorized: user=\S+ database=test_log_connections/, qr/connection ready/, ], - log_unlike => [ - qr/connection authenticated/, - ],); + log_unlike => [ qr/connection authenticated/, ],); $node->safe_psql('test_log_connections', qq(ALTER SYSTEM SET log_connections = 'all'; SELECT pg_reload_conf();)); -$node->connect_ok('test_log_connections', +$node->connect_ok( + 'test_log_connections', qq(log_connections 'all' logs all available connection aspects), log_like => [ qr/connection received/, diff --git a/src/test/modules/commit_ts/t/001_base.pl b/src/test/modules/commit_ts/t/001_base.pl index 1953b18f6b3..50e79ce6409 100644 --- a/src/test/modules/commit_ts/t/001_base.pl +++ b/src/test/modules/commit_ts/t/001_base.pl @@ -11,8 +11,7 @@ use Test::More; use PostgreSQL::Test::Cluster; my $node = PostgreSQL::Test::Cluster->new('foxtrot'); -$node->init; -$node->append_conf('postgresql.conf', 'track_commit_timestamp = on'); +$node->init(extra => [ '-c', "track_commit_timestamp=on" ]); $node->start; # Create a table, compare "now()" to the commit TS of its xmin diff --git a/src/test/modules/injection_points/Makefile b/src/test/modules/injection_points/Makefile index e680991f8d4..fc82cd67f6c 100644 --- a/src/test/modules/injection_points/Makefile +++ b/src/test/modules/injection_points/Makefile @@ -11,7 +11,7 @@ EXTENSION = injection_points DATA = injection_points--1.0.sql PGFILEDESC = "injection_points - facility for injection points" -REGRESS = injection_points hashagg reindex_conc +REGRESS = injection_points hashagg reindex_conc vacuum REGRESS_OPTS = --dlpath=$(top_builddir)/src/test/regress ISOLATION = basic inplace syscache-update-pruned diff --git a/src/test/modules/injection_points/expected/vacuum.out b/src/test/modules/injection_points/expected/vacuum.out new file mode 100644 index 00000000000..58df59fa927 --- /dev/null +++ b/src/test/modules/injection_points/expected/vacuum.out @@ -0,0 +1,122 @@ +-- Tests for VACUUM +CREATE EXTENSION injection_points; +SELECT injection_points_set_local(); + injection_points_set_local +---------------------------- + +(1 row) + +SELECT injection_points_attach('vacuum-index-cleanup-auto', 'notice'); + injection_points_attach +------------------------- + +(1 row) + +SELECT injection_points_attach('vacuum-index-cleanup-disabled', 'notice'); + injection_points_attach +------------------------- + +(1 row) + +SELECT injection_points_attach('vacuum-index-cleanup-enabled', 'notice'); + injection_points_attach +------------------------- + +(1 row) + +SELECT injection_points_attach('vacuum-truncate-auto', 'notice'); + injection_points_attach +------------------------- + +(1 row) + +SELECT injection_points_attach('vacuum-truncate-disabled', 'notice'); + injection_points_attach +------------------------- + +(1 row) + +SELECT injection_points_attach('vacuum-truncate-enabled', 'notice'); + injection_points_attach +------------------------- + +(1 row) + +-- Check state of index_cleanup and truncate in VACUUM. +CREATE TABLE vac_tab_on_toast_off(i int, j text) WITH + (autovacuum_enabled=false, + vacuum_index_cleanup=true, toast.vacuum_index_cleanup=false, + vacuum_truncate=true, toast.vacuum_truncate=false); +CREATE TABLE vac_tab_off_toast_on(i int, j text) WITH + (autovacuum_enabled=false, + vacuum_index_cleanup=false, toast.vacuum_index_cleanup=true, + vacuum_truncate=false, toast.vacuum_truncate=true); +-- Multiple relations should use their options in isolation. +VACUUM vac_tab_on_toast_off, vac_tab_off_toast_on; +NOTICE: notice triggered for injection point vacuum-index-cleanup-enabled +NOTICE: notice triggered for injection point vacuum-truncate-enabled +NOTICE: notice triggered for injection point vacuum-index-cleanup-disabled +NOTICE: notice triggered for injection point vacuum-truncate-disabled +NOTICE: notice triggered for injection point vacuum-index-cleanup-disabled +NOTICE: notice triggered for injection point vacuum-truncate-disabled +NOTICE: notice triggered for injection point vacuum-index-cleanup-enabled +NOTICE: notice triggered for injection point vacuum-truncate-enabled +-- Check "auto" case of index_cleanup and "truncate" controlled by +-- its GUC. +CREATE TABLE vac_tab_auto(i int, j text) WITH + (autovacuum_enabled=false, + vacuum_index_cleanup=auto, toast.vacuum_index_cleanup=auto); +SET vacuum_truncate = false; +VACUUM vac_tab_auto; +NOTICE: notice triggered for injection point vacuum-index-cleanup-auto +NOTICE: notice triggered for injection point vacuum-truncate-disabled +NOTICE: notice triggered for injection point vacuum-index-cleanup-auto +NOTICE: notice triggered for injection point vacuum-truncate-disabled +SET vacuum_truncate = true; +VACUUM vac_tab_auto; +NOTICE: notice triggered for injection point vacuum-index-cleanup-auto +NOTICE: notice triggered for injection point vacuum-truncate-enabled +NOTICE: notice triggered for injection point vacuum-index-cleanup-auto +NOTICE: notice triggered for injection point vacuum-truncate-enabled +RESET vacuum_truncate; +DROP TABLE vac_tab_auto; +DROP TABLE vac_tab_on_toast_off; +DROP TABLE vac_tab_off_toast_on; +-- Cleanup +SELECT injection_points_detach('vacuum-index-cleanup-auto'); + injection_points_detach +------------------------- + +(1 row) + +SELECT injection_points_detach('vacuum-index-cleanup-disabled'); + injection_points_detach +------------------------- + +(1 row) + +SELECT injection_points_detach('vacuum-index-cleanup-enabled'); + injection_points_detach +------------------------- + +(1 row) + +SELECT injection_points_detach('vacuum-truncate-auto'); + injection_points_detach +------------------------- + +(1 row) + +SELECT injection_points_detach('vacuum-truncate-disabled'); + injection_points_detach +------------------------- + +(1 row) + +SELECT injection_points_detach('vacuum-truncate-enabled'); + injection_points_detach +------------------------- + +(1 row) + +DROP EXTENSION injection_points; diff --git a/src/test/modules/injection_points/meson.build b/src/test/modules/injection_points/meson.build index d61149712fd..20390d6b4bf 100644 --- a/src/test/modules/injection_points/meson.build +++ b/src/test/modules/injection_points/meson.build @@ -37,8 +37,9 @@ tests += { 'injection_points', 'hashagg', 'reindex_conc', + 'vacuum', ], - 'regress_args': ['--dlpath', meson.build_root() / 'src/test/regress'], + 'regress_args': ['--dlpath', meson.project_build_root() / 'src/test/regress'], # The injection points are cluster-wide, so disable installcheck 'runningcheck': false, }, diff --git a/src/test/modules/injection_points/sql/vacuum.sql b/src/test/modules/injection_points/sql/vacuum.sql new file mode 100644 index 00000000000..23760dd0f38 --- /dev/null +++ b/src/test/modules/injection_points/sql/vacuum.sql @@ -0,0 +1,47 @@ +-- Tests for VACUUM + +CREATE EXTENSION injection_points; + +SELECT injection_points_set_local(); +SELECT injection_points_attach('vacuum-index-cleanup-auto', 'notice'); +SELECT injection_points_attach('vacuum-index-cleanup-disabled', 'notice'); +SELECT injection_points_attach('vacuum-index-cleanup-enabled', 'notice'); +SELECT injection_points_attach('vacuum-truncate-auto', 'notice'); +SELECT injection_points_attach('vacuum-truncate-disabled', 'notice'); +SELECT injection_points_attach('vacuum-truncate-enabled', 'notice'); + +-- Check state of index_cleanup and truncate in VACUUM. +CREATE TABLE vac_tab_on_toast_off(i int, j text) WITH + (autovacuum_enabled=false, + vacuum_index_cleanup=true, toast.vacuum_index_cleanup=false, + vacuum_truncate=true, toast.vacuum_truncate=false); +CREATE TABLE vac_tab_off_toast_on(i int, j text) WITH + (autovacuum_enabled=false, + vacuum_index_cleanup=false, toast.vacuum_index_cleanup=true, + vacuum_truncate=false, toast.vacuum_truncate=true); +-- Multiple relations should use their options in isolation. +VACUUM vac_tab_on_toast_off, vac_tab_off_toast_on; + +-- Check "auto" case of index_cleanup and "truncate" controlled by +-- its GUC. +CREATE TABLE vac_tab_auto(i int, j text) WITH + (autovacuum_enabled=false, + vacuum_index_cleanup=auto, toast.vacuum_index_cleanup=auto); +SET vacuum_truncate = false; +VACUUM vac_tab_auto; +SET vacuum_truncate = true; +VACUUM vac_tab_auto; +RESET vacuum_truncate; + +DROP TABLE vac_tab_auto; +DROP TABLE vac_tab_on_toast_off; +DROP TABLE vac_tab_off_toast_on; + +-- Cleanup +SELECT injection_points_detach('vacuum-index-cleanup-auto'); +SELECT injection_points_detach('vacuum-index-cleanup-disabled'); +SELECT injection_points_detach('vacuum-index-cleanup-enabled'); +SELECT injection_points_detach('vacuum-truncate-auto'); +SELECT injection_points_detach('vacuum-truncate-disabled'); +SELECT injection_points_detach('vacuum-truncate-enabled'); +DROP EXTENSION injection_points; diff --git a/src/test/modules/libpq_pipeline/t/001_libpq_pipeline.pl b/src/test/modules/libpq_pipeline/t/001_libpq_pipeline.pl index 61524bdbd8f..f9678853070 100644 --- a/src/test/modules/libpq_pipeline/t/001_libpq_pipeline.pl +++ b/src/test/modules/libpq_pipeline/t/001_libpq_pipeline.pl @@ -53,7 +53,8 @@ for my $testname (@tests) $node->command_ok( [ 'libpq_pipeline', @extraargs, - $testname, $node->connstr('postgres') . " max_protocol_version=latest" + $testname, + $node->connstr('postgres') . " max_protocol_version=latest" ], "libpq_pipeline $testname"); @@ -76,7 +77,8 @@ for my $testname (@tests) # test separately that it still works the old protocol version too. $node->command_ok( [ - 'libpq_pipeline', 'cancel', $node->connstr('postgres') . " max_protocol_version=3.0" + 'libpq_pipeline', 'cancel', + $node->connstr('postgres') . " max_protocol_version=3.0" ], "libpq_pipeline cancel with protocol 3.0"); diff --git a/src/test/modules/oauth_validator/meson.build b/src/test/modules/oauth_validator/meson.build index e190f9cf15a..a6f937fd7d7 100644 --- a/src/test/modules/oauth_validator/meson.build +++ b/src/test/modules/oauth_validator/meson.build @@ -77,7 +77,7 @@ tests += { 't/002_client.pl', ], 'env': { - 'PYTHON': python.path(), + 'PYTHON': python.full_path(), 'with_libcurl': oauth_flow_supported ? 'yes' : 'no', 'with_python': 'yes', }, diff --git a/src/test/modules/oauth_validator/t/001_server.pl b/src/test/modules/oauth_validator/t/001_server.pl index bfc9dc3b542..41672ebd5c6 100644 --- a/src/test/modules/oauth_validator/t/001_server.pl +++ b/src/test/modules/oauth_validator/t/001_server.pl @@ -295,6 +295,26 @@ $node->connect_fails( expected_stderr => qr/failed to obtain access token: response is too large/); +my $nesting_limit = 16; +$node->connect_ok( + connstr( + stage => 'device', + nested_array => $nesting_limit, + nested_object => $nesting_limit), + "nested arrays and objects, up to parse limit", + expected_stderr => + qr@Visit https://example\.com/ and enter the code: postgresuser@); +$node->connect_fails( + connstr(stage => 'device', nested_array => $nesting_limit + 1), + "bad discovery response: overly nested JSON array", + expected_stderr => + qr/failed to parse device authorization: JSON is too deeply nested/); +$node->connect_fails( + connstr(stage => 'device', nested_object => $nesting_limit + 1), + "bad discovery response: overly nested JSON object", + expected_stderr => + qr/failed to parse device authorization: JSON is too deeply nested/); + $node->connect_fails( connstr(stage => 'device', content_type => 'text/plain'), "bad device authz response: wrong content type", diff --git a/src/test/modules/oauth_validator/t/oauth_server.py b/src/test/modules/oauth_validator/t/oauth_server.py index 20b3a9506cb..0f8836aadf3 100755 --- a/src/test/modules/oauth_validator/t/oauth_server.py +++ b/src/test/modules/oauth_validator/t/oauth_server.py @@ -7,6 +7,7 @@ # import base64 +import functools import http.server import json import os @@ -213,14 +214,32 @@ class OAuthHandler(http.server.BaseHTTPRequestHandler): @property def _response_padding(self): """ - If the huge_response test parameter is set to True, returns a dict - containing a gigantic string value, which can then be folded into a JSON - response. + Returns a dict with any additional entries that should be folded into a + JSON response, as determined by test parameters provided by the client: + + - huge_response: if set to True, the dict will contain a gigantic string + value + + - nested_array: if set to nonzero, the dict will contain a deeply nested + array so that the top-level object has the given depth + + - nested_object: if set to nonzero, the dict will contain a deeply + nested JSON object so that the top-level object has the given depth """ - if not self._get_param("huge_response", False): - return dict() + ret = dict() + + if self._get_param("huge_response", False): + ret["_pad_"] = "x" * 1024 * 1024 + + depth = self._get_param("nested_array", 0) + if depth: + ret["_arr_"] = functools.reduce(lambda x, _: [x], range(depth)) + + depth = self._get_param("nested_object", 0) + if depth: + ret["_obj_"] = functools.reduce(lambda x, _: {"": x}, range(depth)) - return {"_pad_": "x" * 1024 * 1024} + return ret @property def _access_token(self): diff --git a/src/test/modules/test_aio/t/001_aio.pl b/src/test/modules/test_aio/t/001_aio.pl index 4527c70785d..82ffffc058f 100644 --- a/src/test/modules/test_aio/t/001_aio.pl +++ b/src/test/modules/test_aio/t/001_aio.pl @@ -1123,7 +1123,8 @@ COMMIT; { # Create a corruption and then read the block without waiting for # completion. - $psql_a->query(qq( + $psql_a->query( + qq( SELECT modify_rel_block('tbl_zero', 1, corrupt_header=>true); SELECT read_rel_block_ll('tbl_zero', 1, wait_complete=>false, zero_on_error=>true) )); @@ -1133,7 +1134,8 @@ SELECT read_rel_block_ll('tbl_zero', 1, wait_complete=>false, zero_on_error=>tru $psql_b, "$persistency: test completing read by other session doesn't generate warning", qq(SELECT count(*) > 0 FROM tbl_zero;), - qr/^t$/, qr/^$/); + qr/^t$/, + qr/^$/); } # Clean up @@ -1355,18 +1357,24 @@ SELECT modify_rel_block('tbl_cs_fail', 6, corrupt_checksum=>true); )); $psql->query_safe($invalidate_sql); - psql_like($io_method, $psql, + psql_like( + $io_method, + $psql, "reading block w/ wrong checksum with ignore_checksum_failure=off fails", - $count_sql, qr/^$/, qr/ERROR: invalid page in block/); + $count_sql, + qr/^$/, + qr/ERROR: invalid page in block/); $psql->query_safe("SET ignore_checksum_failure=on"); $psql->query_safe($invalidate_sql); - psql_like($io_method, $psql, - "reading block w/ wrong checksum with ignore_checksum_failure=off succeeds", - $count_sql, - qr/^$expect$/, - qr/WARNING: ignoring (checksum failure|\d checksum failures)/); + psql_like( + $io_method, + $psql, + "reading block w/ wrong checksum with ignore_checksum_failure=off succeeds", + $count_sql, + qr/^$expect$/, + qr/WARNING: ignoring (checksum failure|\d checksum failures)/); # Verify that ignore_checksum_failure=off works in multi-block reads @@ -1432,19 +1440,22 @@ SELECT read_rel_block_ll('tbl_cs_fail', 1, nblocks=>5, zero_on_error=>true);), # file. $node->wait_for_log(qr/LOG: ignoring checksum failure in block 2/, - $log_location); + $log_location); ok(1, "$io_method: found information about checksum failure in block 2"); - $node->wait_for_log(qr/LOG: invalid page in block 3 of relation base.*; zeroing out page/, - $log_location); + $node->wait_for_log( + qr/LOG: invalid page in block 3 of relation base.*; zeroing out page/, + $log_location); ok(1, "$io_method: found information about invalid page in block 3"); - $node->wait_for_log(qr/LOG: invalid page in block 4 of relation base.*; zeroing out page/, - $log_location); + $node->wait_for_log( + qr/LOG: invalid page in block 4 of relation base.*; zeroing out page/, + $log_location); ok(1, "$io_method: found information about checksum failure in block 4"); - $node->wait_for_log(qr/LOG: invalid page in block 5 of relation base.*; zeroing out page/, - $log_location); + $node->wait_for_log( + qr/LOG: invalid page in block 5 of relation base.*; zeroing out page/, + $log_location); ok(1, "$io_method: found information about checksum failure in block 5"); @@ -1462,8 +1473,7 @@ SELECT modify_rel_block('tbl_cs_fail', 3, corrupt_checksum=>true, corrupt_header qq( SELECT read_rel_block_ll('tbl_cs_fail', 3, nblocks=>1, zero_on_error=>false);), qr/^$/, - qr/^psql:<stdin>:\d+: ERROR: invalid page in block 3 of relation/ - ); + qr/^psql:<stdin>:\d+: ERROR: invalid page in block 3 of relation/); psql_like( $io_method, diff --git a/src/test/modules/test_aio/test_aio.c b/src/test/modules/test_aio/test_aio.c index 5cdfb89210b..c55cf6c0aac 100644 --- a/src/test/modules/test_aio/test_aio.c +++ b/src/test/modules/test_aio/test_aio.c @@ -42,9 +42,9 @@ typedef struct InjIoErrorState bool short_read_result_set; int short_read_result; -} InjIoErrorState; +} InjIoErrorState; -static InjIoErrorState * inj_io_error_state; +static InjIoErrorState *inj_io_error_state; /* Shared memory init callbacks */ static shmem_request_hook_type prev_shmem_request_hook = NULL; diff --git a/src/test/modules/test_dsm_registry/expected/test_dsm_registry.out b/src/test/modules/test_dsm_registry/expected/test_dsm_registry.out index 8ffbd343a05..8ded82e59d6 100644 --- a/src/test/modules/test_dsm_registry/expected/test_dsm_registry.out +++ b/src/test/modules/test_dsm_registry/expected/test_dsm_registry.out @@ -5,6 +5,12 @@ SELECT set_val_in_shmem(1236); (1 row) +SELECT set_val_in_hash('test', '1414'); + set_val_in_hash +----------------- + +(1 row) + \c SELECT get_val_in_shmem(); get_val_in_shmem @@ -12,3 +18,9 @@ SELECT get_val_in_shmem(); 1236 (1 row) +SELECT get_val_in_hash('test'); + get_val_in_hash +----------------- + 1414 +(1 row) + diff --git a/src/test/modules/test_dsm_registry/sql/test_dsm_registry.sql b/src/test/modules/test_dsm_registry/sql/test_dsm_registry.sql index b3351be0a16..c2e25cddaae 100644 --- a/src/test/modules/test_dsm_registry/sql/test_dsm_registry.sql +++ b/src/test/modules/test_dsm_registry/sql/test_dsm_registry.sql @@ -1,4 +1,6 @@ CREATE EXTENSION test_dsm_registry; SELECT set_val_in_shmem(1236); +SELECT set_val_in_hash('test', '1414'); \c SELECT get_val_in_shmem(); +SELECT get_val_in_hash('test'); diff --git a/src/test/modules/test_dsm_registry/test_dsm_registry--1.0.sql b/src/test/modules/test_dsm_registry/test_dsm_registry--1.0.sql index 8c55b0919b1..5da45155be9 100644 --- a/src/test/modules/test_dsm_registry/test_dsm_registry--1.0.sql +++ b/src/test/modules/test_dsm_registry/test_dsm_registry--1.0.sql @@ -8,3 +8,9 @@ CREATE FUNCTION set_val_in_shmem(val INT) RETURNS VOID CREATE FUNCTION get_val_in_shmem() RETURNS INT AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION set_val_in_hash(key TEXT, val TEXT) RETURNS VOID + AS 'MODULE_PATHNAME' LANGUAGE C; + +CREATE FUNCTION get_val_in_hash(key TEXT) RETURNS TEXT + AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/src/test/modules/test_dsm_registry/test_dsm_registry.c b/src/test/modules/test_dsm_registry/test_dsm_registry.c index 462a80f8790..141c8ed1b34 100644 --- a/src/test/modules/test_dsm_registry/test_dsm_registry.c +++ b/src/test/modules/test_dsm_registry/test_dsm_registry.c @@ -15,6 +15,7 @@ #include "fmgr.h" #include "storage/dsm_registry.h" #include "storage/lwlock.h" +#include "utils/builtins.h" PG_MODULE_MAGIC; @@ -24,15 +25,31 @@ typedef struct TestDSMRegistryStruct LWLock lck; } TestDSMRegistryStruct; -static TestDSMRegistryStruct *tdr_state; +typedef struct TestDSMRegistryHashEntry +{ + char key[64]; + dsa_pointer val; +} TestDSMRegistryHashEntry; + +static TestDSMRegistryStruct *tdr_dsm; +static dsa_area *tdr_dsa; +static dshash_table *tdr_hash; + +static const dshash_parameters dsh_params = { + offsetof(TestDSMRegistryHashEntry, val), + sizeof(TestDSMRegistryHashEntry), + dshash_strcmp, + dshash_strhash, + dshash_strcpy +}; static void -tdr_init_shmem(void *ptr) +init_tdr_dsm(void *ptr) { - TestDSMRegistryStruct *state = (TestDSMRegistryStruct *) ptr; + TestDSMRegistryStruct *dsm = (TestDSMRegistryStruct *) ptr; - LWLockInitialize(&state->lck, LWLockNewTrancheId()); - state->val = 0; + LWLockInitialize(&dsm->lck, LWLockNewTrancheId()); + dsm->val = 0; } static void @@ -40,11 +57,17 @@ tdr_attach_shmem(void) { bool found; - tdr_state = GetNamedDSMSegment("test_dsm_registry", - sizeof(TestDSMRegistryStruct), - tdr_init_shmem, - &found); - LWLockRegisterTranche(tdr_state->lck.tranche, "test_dsm_registry"); + tdr_dsm = GetNamedDSMSegment("test_dsm_registry_dsm", + sizeof(TestDSMRegistryStruct), + init_tdr_dsm, + &found); + LWLockRegisterTranche(tdr_dsm->lck.tranche, "test_dsm_registry"); + + if (tdr_dsa == NULL) + tdr_dsa = GetNamedDSA("test_dsm_registry_dsa", &found); + + if (tdr_hash == NULL) + tdr_hash = GetNamedDSHash("test_dsm_registry_hash", &dsh_params, &found); } PG_FUNCTION_INFO_V1(set_val_in_shmem); @@ -53,9 +76,9 @@ set_val_in_shmem(PG_FUNCTION_ARGS) { tdr_attach_shmem(); - LWLockAcquire(&tdr_state->lck, LW_EXCLUSIVE); - tdr_state->val = PG_GETARG_UINT32(0); - LWLockRelease(&tdr_state->lck); + LWLockAcquire(&tdr_dsm->lck, LW_EXCLUSIVE); + tdr_dsm->val = PG_GETARG_INT32(0); + LWLockRelease(&tdr_dsm->lck); PG_RETURN_VOID(); } @@ -68,9 +91,57 @@ get_val_in_shmem(PG_FUNCTION_ARGS) tdr_attach_shmem(); - LWLockAcquire(&tdr_state->lck, LW_SHARED); - ret = tdr_state->val; - LWLockRelease(&tdr_state->lck); + LWLockAcquire(&tdr_dsm->lck, LW_SHARED); + ret = tdr_dsm->val; + LWLockRelease(&tdr_dsm->lck); + + PG_RETURN_INT32(ret); +} + +PG_FUNCTION_INFO_V1(set_val_in_hash); +Datum +set_val_in_hash(PG_FUNCTION_ARGS) +{ + TestDSMRegistryHashEntry *entry; + char *key = TextDatumGetCString(PG_GETARG_DATUM(0)); + char *val = TextDatumGetCString(PG_GETARG_DATUM(1)); + bool found; + + if (strlen(key) >= offsetof(TestDSMRegistryHashEntry, val)) + ereport(ERROR, + (errmsg("key too long"))); + + tdr_attach_shmem(); + + entry = dshash_find_or_insert(tdr_hash, key, &found); + if (found) + dsa_free(tdr_dsa, entry->val); + + entry->val = dsa_allocate(tdr_dsa, strlen(val) + 1); + strcpy(dsa_get_address(tdr_dsa, entry->val), val); + + dshash_release_lock(tdr_hash, entry); + + PG_RETURN_VOID(); +} + +PG_FUNCTION_INFO_V1(get_val_in_hash); +Datum +get_val_in_hash(PG_FUNCTION_ARGS) +{ + TestDSMRegistryHashEntry *entry; + char *key = TextDatumGetCString(PG_GETARG_DATUM(0)); + text *val = NULL; + + tdr_attach_shmem(); + + entry = dshash_find(tdr_hash, key, false); + if (entry == NULL) + PG_RETURN_NULL(); + + val = cstring_to_text(dsa_get_address(tdr_dsa, entry->val)); + + dshash_release_lock(tdr_hash, entry); - PG_RETURN_UINT32(ret); + PG_RETURN_TEXT_P(val); } diff --git a/src/test/modules/test_shm_mq/worker.c b/src/test/modules/test_shm_mq/worker.c index 96cd304dbbc..c1d321b69a4 100644 --- a/src/test/modules/test_shm_mq/worker.c +++ b/src/test/modules/test_shm_mq/worker.c @@ -77,7 +77,7 @@ test_shm_mq_main(Datum main_arg) * exit, which is fine. If there were a ResourceOwner, it would acquire * ownership of the mapping, but we have no need for that. */ - seg = dsm_attach(DatumGetInt32(main_arg)); + seg = dsm_attach(DatumGetUInt32(main_arg)); if (seg == NULL) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), diff --git a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm index 1725fe2f948..7224c286e1d 100644 --- a/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm +++ b/src/test/perl/PostgreSQL/Test/AdjustUpgrade.pm @@ -251,6 +251,32 @@ sub adjust_database_contents 'drop operator if exists public.=> (bigint, NONE)'); } + # Version 19 changed the output format of pg_lsn. To avoid output + # differences, set all pg_lsn columns to NULL if the old version is + # older than 19. + if ($old_version < 19) + { + if ($old_version >= '9.5') + { + _add_st($result, 'regression', + "update brintest set lsncol = NULL"); + } + + if ($old_version >= 12) + { + _add_st($result, 'regression', + "update tab_core_types set pg_lsn = NULL"); + } + + if ($old_version >= 14) + { + _add_st($result, 'regression', + "update brintest_multi set lsncol = NULL"); + _add_st($result, 'regression', + "update brintest_bloom set lsncol = NULL"); + } + } + return $result; } diff --git a/src/test/perl/PostgreSQL/Test/Cluster.pm b/src/test/perl/PostgreSQL/Test/Cluster.pm index 1c11750ac1d..301766d2ed9 100644 --- a/src/test/perl/PostgreSQL/Test/Cluster.pm +++ b/src/test/perl/PostgreSQL/Test/Cluster.pm @@ -684,7 +684,7 @@ sub init print $conf "\n# Added by PostgreSQL::Test::Cluster.pm\n"; print $conf "fsync = off\n"; print $conf "restart_after_crash = off\n"; - print $conf "log_line_prefix = '%m [%p] %q%a '\n"; + print $conf "log_line_prefix = '%m %b[%p] %q%a '\n"; print $conf "log_statement = all\n"; print $conf "log_replication_commands = on\n"; print $conf "wal_retrieve_retry_interval = '500ms'\n"; @@ -2199,6 +2199,14 @@ sub psql $ret = $?; }; my $exc_save = $@; + + # we need a dummy $stderr from hereon, if we didn't collect it + if (! defined $stderr) + { + my $errtxt = "<not collected>"; + $stderr = \$errtxt; + } + if ($exc_save) { diff --git a/src/test/postmaster/t/002_connection_limits.pl b/src/test/postmaster/t/002_connection_limits.pl index 6442500fc37..4a7fb16261f 100644 --- a/src/test/postmaster/t/002_connection_limits.pl +++ b/src/test/postmaster/t/002_connection_limits.pl @@ -68,7 +68,8 @@ sub connect_fails_wait my $log_location = -s $node->logfile; $node->connect_fails($connstr, $test_name, %params); - $node->wait_for_log(qr/DEBUG: (00000: )?client backend.*exited with exit code 1/, + $node->wait_for_log( + qr/DEBUG: (00000: )?client backend.*exited with exit code 1/, $log_location); ok(1, "$test_name: client backend process exited"); } diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build index cb983766c67..6e78ff1a030 100644 --- a/src/test/recovery/meson.build +++ b/src/test/recovery/meson.build @@ -54,6 +54,8 @@ tests += { 't/043_no_contrecord_switch.pl', 't/044_invalidate_inactive_slots.pl', 't/045_archive_restartpoint.pl', + 't/047_checkpoint_physical_slot.pl', + 't/048_vacuum_horizon_floor.pl' ], }, } diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl index 0ae2e982727..f2109efa9b1 100644 --- a/src/test/recovery/t/003_recovery_targets.pl +++ b/src/test/recovery/t/003_recovery_targets.pl @@ -187,4 +187,54 @@ ok( $logfile =~ qr/FATAL: .* recovery ended before configured recovery target was reached/, 'recovery end before target reached is a fatal error'); +# Invalid timeline target +$node_standby = PostgreSQL::Test::Cluster->new('standby_9'); +$node_standby->init_from_backup($node_primary, 'my_backup', + has_restoring => 1); +$node_standby->append_conf('postgresql.conf', + "recovery_target_timeline = 'bogus'"); + +$res = run_log( + [ + 'pg_ctl', + '--pgdata' => $node_standby->data_dir, + '--log' => $node_standby->logfile, + 'start', + ]); +ok(!$res, 'invalid timeline target (bogus value)'); + +my $log_start = $node_standby->wait_for_log("is not a valid number"); + +# Timeline target out of min range +$node_standby->append_conf('postgresql.conf', + "recovery_target_timeline = '0'"); + +$res = run_log( + [ + 'pg_ctl', + '--pgdata' => $node_standby->data_dir, + '--log' => $node_standby->logfile, + 'start', + ]); +ok(!$res, 'invalid timeline target (lower bound check)'); + +$log_start = + $node_standby->wait_for_log("must be between 1 and 4294967295", $log_start); + +# Timeline target out of max range +$node_standby->append_conf('postgresql.conf', + "recovery_target_timeline = '4294967296'"); + +$res = run_log( + [ + 'pg_ctl', + '--pgdata' => $node_standby->data_dir, + '--log' => $node_standby->logfile, + 'start', + ]); +ok(!$res, 'invalid timeline target (upper bound check)'); + +$log_start = + $node_standby->wait_for_log("must be between 1 and 4294967295", $log_start); + done_testing(); diff --git a/src/test/recovery/t/016_min_consistency.pl b/src/test/recovery/t/016_min_consistency.pl index 9a3b4866fce..b381d0c21b5 100644 --- a/src/test/recovery/t/016_min_consistency.pl +++ b/src/test/recovery/t/016_min_consistency.pl @@ -39,7 +39,7 @@ sub find_largest_lsn defined($len) or die "read error on $filename: $!"; close($fh); - return sprintf("%X/%X", $max_hi, $max_lo); + return sprintf("%X/%08X", $max_hi, $max_lo); } # Initialize primary node diff --git a/src/test/recovery/t/040_standby_failover_slots_sync.pl b/src/test/recovery/t/040_standby_failover_slots_sync.pl index 9c8b49e942d..2c61c51e914 100644 --- a/src/test/recovery/t/040_standby_failover_slots_sync.pl +++ b/src/test/recovery/t/040_standby_failover_slots_sync.pl @@ -941,8 +941,7 @@ is( $standby1->safe_psql( 'synced slot retained on the new primary'); # Commit the prepared transaction -$standby1->safe_psql('postgres', - "COMMIT PREPARED 'test_twophase_slotsync';"); +$standby1->safe_psql('postgres', "COMMIT PREPARED 'test_twophase_slotsync';"); $standby1->wait_for_catchup('regress_mysub1'); # Confirm that the prepared transaction is replicated to the subscriber diff --git a/src/test/recovery/t/047_checkpoint_physical_slot.pl b/src/test/recovery/t/047_checkpoint_physical_slot.pl new file mode 100644 index 00000000000..a1332b5d44c --- /dev/null +++ b/src/test/recovery/t/047_checkpoint_physical_slot.pl @@ -0,0 +1,132 @@ +# Copyright (c) 2025, PostgreSQL Global Development Group +# +# This test verifies the case when the physical slot is advanced during +# checkpoint. The test checks that the physical slot's restart_lsn still refers +# to an existed WAL segment after immediate restart. +# +use strict; +use warnings FATAL => 'all'; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use Test::More; + +if ($ENV{enable_injection_points} ne 'yes') +{ + plan skip_all => 'Injection points not supported by this build'; +} + +my ($node, $result); + +$node = PostgreSQL::Test::Cluster->new('mike'); +$node->init; +$node->append_conf('postgresql.conf', "wal_level = 'replica'"); +$node->start; + +# Check if the extension injection_points is available, as it may be +# possible that this script is run with installcheck, where the module +# would not be installed by default. +if (!$node->check_extension('injection_points')) +{ + plan skip_all => 'Extension injection_points not installed'; +} + +$node->safe_psql('postgres', q(CREATE EXTENSION injection_points)); + +# Create a physical replication slot. +$node->safe_psql('postgres', + q{select pg_create_physical_replication_slot('slot_physical', true)}); + +# Advance slot to the current position, just to have everything "valid". +$node->safe_psql('postgres', + q{select pg_replication_slot_advance('slot_physical', pg_current_wal_lsn())} +); + +# Run checkpoint to flush current state to disk and set a baseline. +$node->safe_psql('postgres', q{checkpoint}); + +# Insert 2M rows; that's about 260MB (~20 segments) worth of WAL. +$node->advance_wal(20); + +# Advance slot to the current position, just to have everything "valid". +$node->safe_psql('postgres', + q{select pg_replication_slot_advance('slot_physical', pg_current_wal_lsn())} +); + +# Run another checkpoint to set a new restore LSN. +$node->safe_psql('postgres', q{checkpoint}); + +# Another 2M rows; that's about 260MB (~20 segments) worth of WAL. +$node->advance_wal(20); + +my $restart_lsn_init = $node->safe_psql('postgres', + q{select restart_lsn from pg_replication_slots where slot_name = 'slot_physical'} +); +chomp($restart_lsn_init); +note("restart lsn before checkpoint: $restart_lsn_init"); + +# Run another checkpoint, this time in the background, and make it wait +# on the injection point) so that the checkpoint stops right before +# removing old WAL segments. +note('starting checkpoint'); + +my $checkpoint = $node->background_psql('postgres'); +$checkpoint->query_safe( + q{select injection_points_attach('checkpoint-before-old-wal-removal','wait')} +); +$checkpoint->query_until( + qr/starting_checkpoint/, + q(\echo starting_checkpoint +checkpoint; +\q +)); + +# Wait until the checkpoint stops right before removing WAL segments. +note('waiting for injection_point'); +$node->wait_for_event('checkpointer', 'checkpoint-before-old-wal-removal'); +note('injection_point is reached'); + +# OK, we're in the right situation: time to advance the physical slot, which +# recalculates the required LSN and then unblock the checkpoint, which +# removes the WAL still needed by the physical slot. +$node->safe_psql('postgres', + q{select pg_replication_slot_advance('slot_physical', pg_current_wal_lsn())} +); + +# Continue the checkpoint. +$node->safe_psql('postgres', + q{select injection_points_wakeup('checkpoint-before-old-wal-removal')}); + +my $restart_lsn_old = $node->safe_psql('postgres', + q{select restart_lsn from pg_replication_slots where slot_name = 'slot_physical'} +); +chomp($restart_lsn_old); +note("restart lsn before stop: $restart_lsn_old"); + +# Abruptly stop the server (1 second should be enough for the checkpoint +# to finish; it would be better). +$node->stop('immediate'); + +$node->start; + +# Get the restart_lsn of the slot right after restarting. +my $restart_lsn = $node->safe_psql('postgres', + q{select restart_lsn from pg_replication_slots where slot_name = 'slot_physical'} +); +chomp($restart_lsn); +note("restart lsn: $restart_lsn"); + +# Get the WAL segment name for the slot's restart_lsn. +my $restart_lsn_segment = $node->safe_psql('postgres', + "SELECT pg_walfile_name('$restart_lsn'::pg_lsn)"); +chomp($restart_lsn_segment); + +# Check if the required wal segment exists. +note("required by slot segment name: $restart_lsn_segment"); +my $datadir = $node->data_dir; +ok( -f "$datadir/pg_wal/$restart_lsn_segment", + "WAL segment $restart_lsn_segment for physical slot's restart_lsn $restart_lsn exists" +); + +done_testing(); diff --git a/src/test/recovery/t/048_vacuum_horizon_floor.pl b/src/test/recovery/t/048_vacuum_horizon_floor.pl new file mode 100644 index 00000000000..e56fce59d58 --- /dev/null +++ b/src/test/recovery/t/048_vacuum_horizon_floor.pl @@ -0,0 +1,288 @@ +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use Test::More; + +# Test that vacuum prunes away all dead tuples killed before OldestXmin +# +# This test creates a table on a primary, updates the table to generate dead +# tuples for vacuum, and then, during the vacuum, uses the replica to force +# GlobalVisState->maybe_needed on the primary to move backwards and precede +# the value of OldestXmin set at the beginning of vacuuming the table. + +# Set up nodes +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(allows_streaming => 'physical'); + +# io_combine_limit is set to 1 to avoid pinning more than one buffer at a time +# to ensure test determinism. +$node_primary->append_conf( + 'postgresql.conf', qq[ +hot_standby_feedback = on +autovacuum = off +log_min_messages = INFO +maintenance_work_mem = 64 +io_combine_limit = 1 +]); +$node_primary->start; + +my $node_replica = PostgreSQL::Test::Cluster->new('standby'); + +$node_primary->backup('my_backup'); +$node_replica->init_from_backup($node_primary, 'my_backup', + has_streaming => 1); + +$node_replica->start; + +my $test_db = "test_db"; +$node_primary->safe_psql('postgres', "CREATE DATABASE $test_db"); + +# Save the original connection info for later use +my $orig_conninfo = $node_primary->connstr(); + +my $table1 = "vac_horizon_floor_table"; + +# Long-running Primary Session A +my $psql_primaryA = + $node_primary->background_psql($test_db, on_error_stop => 1); + +# Long-running Primary Session B +my $psql_primaryB = + $node_primary->background_psql($test_db, on_error_stop => 1); + +# Our test relies on two rounds of index vacuuming for reasons elaborated +# later. To trigger two rounds of index vacuuming, we must fill up the +# TIDStore with dead items partway through a vacuum of the table. The number +# of rows is just enough to ensure we exceed maintenance_work_mem on all +# supported platforms, while keeping test runtime as short as we can. +my $nrows = 2000; + +# Because vacuum's first pass, pruning, is where we use the GlobalVisState to +# check tuple visibility, GlobalVisState->maybe_needed must move backwards +# during pruning before checking the visibility for a tuple which would have +# been considered HEAPTUPLE_DEAD prior to maybe_needed moving backwards but +# HEAPTUPLE_RECENTLY_DEAD compared to the new, older value of maybe_needed. +# +# We must not only force the horizon on the primary to move backwards but also +# force the vacuuming backend's GlobalVisState to be updated. GlobalVisState +# is forced to update during index vacuuming. +# +# _bt_pendingfsm_finalize() calls GetOldestNonRemovableTransactionId() at the +# end of a round of index vacuuming, updating the backend's GlobalVisState +# and, in our case, moving maybe_needed backwards. +# +# Then vacuum's first (pruning) pass will continue and pruning will find our +# later inserted and updated tuple HEAPTUPLE_RECENTLY_DEAD when compared to +# maybe_needed but HEAPTUPLE_DEAD when compared to OldestXmin. +# +# Thus, we must force at least two rounds of index vacuuming to ensure that +# some tuple visibility checks will happen after a round of index vacuuming. +# To accomplish this, we set maintenance_work_mem to its minimum value and +# insert and delete enough rows that we force at least one round of index +# vacuuming before getting to a dead tuple which was killed after the standby +# is disconnected. +$node_primary->safe_psql( + $test_db, qq[ + CREATE TABLE ${table1}(col1 int) + WITH (autovacuum_enabled=false, fillfactor=10); + INSERT INTO $table1 VALUES(7); + INSERT INTO $table1 SELECT generate_series(1, $nrows) % 3; + CREATE INDEX on ${table1}(col1); + DELETE FROM $table1 WHERE col1 = 0; + INSERT INTO $table1 VALUES(7); +]); + +# We will later move the primary forward while the standby is disconnected. +# For now, however, there is no reason not to wait for the standby to catch +# up. +my $primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_replica, 'replay', $primary_lsn); + +# Test that the WAL receiver is up and running. +$node_replica->poll_query_until( + $test_db, qq[ + SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);], 't'); + +# Set primary_conninfo to something invalid on the replica and reload the +# config. Once the config is reloaded, the startup process will force the WAL +# receiver to restart and it will be unable to reconnect because of the +# invalid connection information. +$node_replica->safe_psql( + $test_db, qq[ + ALTER SYSTEM SET primary_conninfo = ''; + SELECT pg_reload_conf(); + ]); + +# Wait until the WAL receiver has shut down and been unable to start up again. +$node_replica->poll_query_until( + $test_db, qq[ + SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);], 'f'); + +# Now insert and update a tuple which will be visible to the vacuum on the +# primary but which will have xmax newer than the oldest xmin on the standby +# that was recently disconnected. +my $res = $psql_primaryA->query_safe( + qq[ + INSERT INTO $table1 VALUES (99); + UPDATE $table1 SET col1 = 100 WHERE col1 = 99; + SELECT 'after_update'; + ] +); + +# Make sure the UPDATE finished +like($res, qr/^after_update$/m, "UPDATE occurred on primary session A"); + +# Open a cursor on the primary whose pin will keep VACUUM from getting a +# cleanup lock on the first page of the relation. We want VACUUM to be able to +# start, calculate initial values for OldestXmin and GlobalVisState and then +# be unable to proceed with pruning our dead tuples. This will allow us to +# reconnect the standby and push the horizon back before we start actual +# pruning and vacuuming. +my $primary_cursor1 = "vac_horizon_floor_cursor1"; + +# The first value inserted into the table was a 7, so FETCH FORWARD should +# return a 7. That's how we know the cursor has a pin. +# Disable index scans so the cursor pins heap pages and not index pages. +$res = $psql_primaryB->query_safe( + qq[ + BEGIN; + SET enable_bitmapscan = off; + SET enable_indexscan = off; + SET enable_indexonlyscan = off; + DECLARE $primary_cursor1 CURSOR FOR SELECT * FROM $table1 WHERE col1 = 7; + FETCH $primary_cursor1; + ] +); + +is($res, 7, qq[Cursor query returned $res. Expected value 7.]); + +# Get the PID of the session which will run the VACUUM FREEZE so that we can +# use it to filter pg_stat_activity later. +my $vacuum_pid = $psql_primaryA->query_safe("SELECT pg_backend_pid();"); + +# Now start a VACUUM FREEZE on the primary. It will call vacuum_get_cutoffs() +# and establish values of OldestXmin and GlobalVisState which are newer than +# all of our dead tuples. Then it will be unable to get a cleanup lock to +# start pruning, so it will hang. +# +# We use VACUUM FREEZE because it will wait for a cleanup lock instead of +# skipping the page pinned by the cursor. Note that works because the target +# tuple's xmax precedes OldestXmin which ensures that lazy_scan_noprune() will +# return false and we will wait for the cleanup lock. +# +# Disable any prefetching, parallelism, or other concurrent I/O by vacuum. The +# pages of the heap must be processed in order by a single worker to ensure +# test stability (PARALLEL 0 shouldn't be necessary but guards against the +# possibility of parallel heap vacuuming). +$psql_primaryA->{stdin} .= qq[ + SET maintenance_io_concurrency = 0; + VACUUM (VERBOSE, FREEZE, PARALLEL 0) $table1; + \\echo VACUUM + ]; + +# Make sure the VACUUM command makes it to the server. +$psql_primaryA->{run}->pump_nb(); + +# Make sure that the VACUUM has already called vacuum_get_cutoffs() and is +# just waiting on the lock to start vacuuming. We don't want the standby to +# re-establish a connection to the primary and push the horizon back until +# we've saved initial values in GlobalVisState and calculated OldestXmin. +$node_primary->poll_query_until( + $test_db, + qq[ + SELECT count(*) >= 1 FROM pg_stat_activity + WHERE pid = $vacuum_pid + AND wait_event = 'BufferPin'; + ], + 't'); + +# Ensure the WAL receiver is still not active on the replica. +$node_replica->poll_query_until( + $test_db, qq[ + SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);], 'f'); + +# Allow the WAL receiver connection to re-establish. +$node_replica->safe_psql( + $test_db, qq[ + ALTER SYSTEM SET primary_conninfo = '$orig_conninfo'; + SELECT pg_reload_conf(); + ]); + +# Ensure the new WAL receiver has connected. +$node_replica->poll_query_until( + $test_db, qq[ + SELECT EXISTS (SELECT * FROM pg_stat_wal_receiver);], 't'); + +# Once the WAL sender is shown on the primary, the replica should have +# connected with the primary and pushed the horizon backward. Primary Session +# A won't see that until the VACUUM FREEZE proceeds and does its first round +# of index vacuuming. +$node_primary->poll_query_until( + $test_db, qq[ + SELECT EXISTS (SELECT * FROM pg_stat_replication);], 't'); + +# Move the cursor forward to the next 7. We inserted the 7 much later, so +# advancing the cursor should allow vacuum to proceed vacuuming most pages of +# the relation. Because we set maintanence_work_mem sufficiently low, we +# expect that a round of index vacuuming has happened and that the vacuum is +# now waiting for the cursor to release its pin on the last page of the +# relation. +$res = $psql_primaryB->query_safe("FETCH $primary_cursor1"); +is($res, 7, + qq[Cursor query returned $res from second fetch. Expected value 7.]); + +# Prevent the test from incorrectly passing by confirming that we did indeed +# do a pass of index vacuuming. +$node_primary->poll_query_until( + $test_db, qq[ + SELECT index_vacuum_count > 0 + FROM pg_stat_progress_vacuum + WHERE datname='$test_db' AND relid::regclass = '$table1'::regclass; + ], 't'); + +# Commit the transaction with the open cursor so that the VACUUM can finish. +$psql_primaryB->query_until( + qr/^commit$/m, + qq[ + COMMIT; + \\echo commit + ] +); + +# VACUUM proceeds with pruning and does a visibility check on each tuple. In +# older versions of Postgres, pruning found our final dead tuple +# non-removable (HEAPTUPLE_RECENTLY_DEAD) since its xmax is after the new +# value of maybe_needed. Then heap_prepare_freeze_tuple() would decide the +# tuple xmax should be frozen because it precedes OldestXmin. Vacuum would +# then error out in heap_pre_freeze_checks() with "cannot freeze committed +# xmax". This was fixed by changing pruning to find all +# HEAPTUPLE_RECENTLY_DEAD tuples with xmaxes preceding OldestXmin +# HEAPTUPLE_DEAD and removing them. + +# With the fix, VACUUM should finish successfully, incrementing the table +# vacuum_count. +$node_primary->poll_query_until( + $test_db, + qq[ + SELECT vacuum_count > 0 + FROM pg_stat_all_tables WHERE relname = '${table1}'; + ] + , 't'); + +$primary_lsn = $node_primary->lsn('flush'); + +# Make sure something causes us to flush +$node_primary->safe_psql($test_db, "INSERT INTO $table1 VALUES (1);"); + +# Nothing on the replica should cause a recovery conflict, so this should +# finish successfully. +$node_primary->wait_for_catchup($node_replica, 'replay', $primary_lsn); + +## Shut down psqls +$psql_primaryA->quit; +$psql_primaryB->quit; + +$node_replica->stop(); +$node_primary->stop(); + +done_testing(); diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out index 476266e3f4b..08984dd98f1 100644 --- a/src/test/regress/expected/alter_table.out +++ b/src/test/regress/expected/alter_table.out @@ -4745,6 +4745,21 @@ alter table attbl alter column p1 set data type bigint; alter table atref alter column c1 set data type bigint; drop table attbl, atref; /* End test case for bug #17409 */ +/* Test case for bug #18970 */ +create table attbl(a int); +create table atref(b attbl check ((b).a is not null)); +alter table attbl alter column a type numeric; -- someday this should work +ERROR: cannot alter table "attbl" because column "atref.b" uses its row type +alter table atref drop constraint atref_b_check; +create statistics atref_stat on ((b).a is not null) from atref; +alter table attbl alter column a type numeric; -- someday this should work +ERROR: cannot alter table "attbl" because column "atref.b" uses its row type +drop statistics atref_stat; +create index atref_idx on atref (((b).a)); +alter table attbl alter column a type numeric; -- someday this should work +ERROR: cannot alter table "attbl" because column "atref.b" uses its row type +drop table attbl, atref; +/* End test case for bug #18970 */ -- Test that ALTER TABLE rewrite preserves a clustered index -- for normal indexes and indexes on constraints. create table alttype_cluster (a int); diff --git a/src/test/regress/expected/btree_index.out b/src/test/regress/expected/btree_index.out index bfb1a286ea4..21dc9b5783a 100644 --- a/src/test/regress/expected/btree_index.out +++ b/src/test/regress/expected/btree_index.out @@ -195,54 +195,123 @@ ORDER BY proname DESC, proargtypes DESC, pronamespace DESC LIMIT 1; (1 row) -- --- Add coverage for RowCompare quals whose rhs row has a NULL that ends scan +-- Forwards scan RowCompare qual whose row arg has a NULL that affects our +-- initial positioning strategy -- explain (costs off) SELECT proname, proargtypes, pronamespace FROM pg_proc - WHERE proname = 'abs' AND (proname, proargtypes) < ('abs', NULL) + WHERE (proname, proargtypes) >= ('abs', NULL) AND proname <= 'abs' ORDER BY proname, proargtypes, pronamespace; - QUERY PLAN -------------------------------------------------------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------------------------------------------------- Index Only Scan using pg_proc_proname_args_nsp_index on pg_proc - Index Cond: ((ROW(proname, proargtypes) < ROW('abs'::name, NULL::oidvector)) AND (proname = 'abs'::name)) + Index Cond: ((ROW(proname, proargtypes) >= ROW('abs'::name, NULL::oidvector)) AND (proname <= 'abs'::name)) (2 rows) SELECT proname, proargtypes, pronamespace FROM pg_proc - WHERE proname = 'abs' AND (proname, proargtypes) < ('abs', NULL) + WHERE (proname, proargtypes) >= ('abs', NULL) AND proname <= 'abs' ORDER BY proname, proargtypes, pronamespace; proname | proargtypes | pronamespace ---------+-------------+-------------- (0 rows) -- --- Add coverage for backwards scan RowCompare quals whose rhs row has a NULL --- that ends scan +-- Forwards scan RowCompare quals whose row arg has a NULL that ends scan -- explain (costs off) SELECT proname, proargtypes, pronamespace FROM pg_proc - WHERE proname = 'abs' AND (proname, proargtypes) > ('abs', NULL) + WHERE proname >= 'abs' AND (proname, proargtypes) < ('abs', NULL) +ORDER BY proname, proargtypes, pronamespace; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------- + Index Only Scan using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: ((proname >= 'abs'::name) AND (ROW(proname, proargtypes) < ROW('abs'::name, NULL::oidvector))) +(2 rows) + +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE proname >= 'abs' AND (proname, proargtypes) < ('abs', NULL) +ORDER BY proname, proargtypes, pronamespace; + proname | proargtypes | pronamespace +---------+-------------+-------------- +(0 rows) + +-- +-- Backwards scan RowCompare qual whose row arg has a NULL that affects our +-- initial positioning strategy +-- +explain (costs off) +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE proname >= 'abs' AND (proname, proargtypes) <= ('abs', NULL) +ORDER BY proname DESC, proargtypes DESC, pronamespace DESC; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------- + Index Only Scan Backward using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: ((proname >= 'abs'::name) AND (ROW(proname, proargtypes) <= ROW('abs'::name, NULL::oidvector))) +(2 rows) + +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE proname >= 'abs' AND (proname, proargtypes) <= ('abs', NULL) +ORDER BY proname DESC, proargtypes DESC, pronamespace DESC; + proname | proargtypes | pronamespace +---------+-------------+-------------- +(0 rows) + +-- +-- Backwards scan RowCompare qual whose row arg has a NULL that ends scan +-- +explain (costs off) +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE (proname, proargtypes) > ('abs', NULL) AND proname <= 'abs' ORDER BY proname DESC, proargtypes DESC, pronamespace DESC; - QUERY PLAN -------------------------------------------------------------------------------------------------------------- + QUERY PLAN +-------------------------------------------------------------------------------------------------------------- Index Only Scan Backward using pg_proc_proname_args_nsp_index on pg_proc - Index Cond: ((ROW(proname, proargtypes) > ROW('abs'::name, NULL::oidvector)) AND (proname = 'abs'::name)) + Index Cond: ((ROW(proname, proargtypes) > ROW('abs'::name, NULL::oidvector)) AND (proname <= 'abs'::name)) (2 rows) SELECT proname, proargtypes, pronamespace FROM pg_proc - WHERE proname = 'abs' AND (proname, proargtypes) > ('abs', NULL) + WHERE (proname, proargtypes) > ('abs', NULL) AND proname <= 'abs' ORDER BY proname DESC, proargtypes DESC, pronamespace DESC; proname | proargtypes | pronamespace ---------+-------------+-------------- (0 rows) +-- Makes B-Tree preprocessing deal with unmarking redundant keys that were +-- initially marked required (test case relies on current row compare +-- preprocessing limitations) +explain (costs off) +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE proname = 'zzzzzz' AND (proname, proargtypes) > ('abs', NULL) + AND pronamespace IN (1, 2, 3) AND proargtypes IN ('26 23', '5077') +ORDER BY proname, proargtypes, pronamespace; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Index Only Scan using pg_proc_proname_args_nsp_index on pg_proc + Index Cond: ((ROW(proname, proargtypes) > ROW('abs'::name, NULL::oidvector)) AND (proname = 'zzzzzz'::name) AND (proargtypes = ANY ('{"26 23",5077}'::oidvector[])) AND (pronamespace = ANY ('{1,2,3}'::oid[]))) +(2 rows) + +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE proname = 'zzzzzz' AND (proname, proargtypes) > ('abs', NULL) + AND pronamespace IN (1, 2, 3) AND proargtypes IN ('26 23', '5077') +ORDER BY proname, proargtypes, pronamespace; + proname | proargtypes | pronamespace +---------+-------------+-------------- +(0 rows) + -- --- Add coverage for recheck of > key following array advancement on previous --- (left sibling) page that used a high key whose attribute value corresponding --- to the > key was -inf (due to being truncated when the high key was created). +-- Performs a recheck of > key following array advancement on previous (left +-- sibling) page that used a high key whose attribute value corresponding to +-- the > key was -inf (due to being truncated when the high key was created). -- -- XXX This relies on the assumption that tenk1_thous_tenthous has a truncated -- high key "(183, -inf)" on the first page that we'll scan. The test will only diff --git a/src/test/regress/expected/constraints.out b/src/test/regress/expected/constraints.out index ad6aaab7385..ccea883cffd 100644 --- a/src/test/regress/expected/constraints.out +++ b/src/test/regress/expected/constraints.out @@ -748,6 +748,11 @@ ALTER TABLE unique_tbl ALTER CONSTRAINT unique_tbl_i_key ENFORCED; ERROR: cannot alter enforceability of constraint "unique_tbl_i_key" of relation "unique_tbl" ALTER TABLE unique_tbl ALTER CONSTRAINT unique_tbl_i_key NOT ENFORCED; ERROR: cannot alter enforceability of constraint "unique_tbl_i_key" of relation "unique_tbl" +-- can't make an existing constraint NOT VALID +ALTER TABLE unique_tbl ALTER CONSTRAINT unique_tbl_i_key NOT VALID; +ERROR: constraints cannot be altered to be NOT VALID +LINE 1: ...ABLE unique_tbl ALTER CONSTRAINT unique_tbl_i_key NOT VALID; + ^ DROP TABLE unique_tbl; -- -- EXCLUDE constraints @@ -1659,6 +1664,8 @@ EXECUTE get_nnconstraint_info('{constr_parent3, constr_child3}'); constr_parent3 | constr_parent3_a_not_null | t | t | 0 (2 rows) +COMMENT ON CONSTRAINT constr_parent2_a_not_null ON constr_parent2 IS 'this constraint is invalid'; +COMMENT ON CONSTRAINT constr_parent2_a_not_null ON constr_child2 IS 'this constraint is valid'; DEALLOCATE get_nnconstraint_info; -- end NOT NULL NOT VALID -- Comments diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out index 8d5a06563c4..ac66eb55aee 100644 --- a/src/test/regress/expected/copy.out +++ b/src/test/regress/expected/copy.out @@ -81,6 +81,29 @@ copy copytest4 to stdout (header); c1 colname with tab: \t 1 a 2 b +-- test multi-line header line feature +create temp table copytest5 (c1 int); +copy copytest5 from stdin (format csv, header 2); +copy copytest5 to stdout (header); +c1 +1 +2 +truncate copytest5; +copy copytest5 from stdin (format csv, header 4); +select count(*) from copytest5; + count +------- + 0 +(1 row) + +truncate copytest5; +copy copytest5 from stdin (format csv, header 5); +select count(*) from copytest5; + count +------- + 0 +(1 row) + -- test copy from with a partitioned table create table parted_copytest ( a int, @@ -224,7 +247,7 @@ alter table header_copytest add column c text; copy header_copytest to stdout with (header match); ERROR: cannot use "match" with HEADER in COPY TO copy header_copytest from stdin with (header wrong_choice); -ERROR: header requires a Boolean value or "match" +ERROR: header requires a Boolean value, a non-negative integer, or the string "match" -- works copy header_copytest from stdin with (header match); copy header_copytest (c, a, b) from stdin with (header match); diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index 64ea33aeae8..caa3c44f0d0 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -132,6 +132,12 @@ COPY x from stdin with (reject_limit 1); ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE COPY x from stdin with (on_error ignore, reject_limit 0); ERROR: REJECT_LIMIT (0) must be greater than zero +COPY x from stdin with (header -1); +ERROR: a negative integer value cannot be specified for header +COPY x from stdin with (header 2.5); +ERROR: header requires a Boolean value, a non-negative integer, or the string "match" +COPY x to stdout with (header 2); +ERROR: cannot use multi-line header in COPY TO -- too many columns in column list: should fail COPY x (a, b, c, d, e, d, c) from stdin; ERROR: column "d" specified more than once diff --git a/src/test/regress/expected/create_table_like.out b/src/test/regress/expected/create_table_like.out index bf34289e984..29a779c2e90 100644 --- a/src/test/regress/expected/create_table_like.out +++ b/src/test/regress/expected/create_table_like.out @@ -332,9 +332,10 @@ COMMENT ON CONSTRAINT ctlt1_a_check ON ctlt1 IS 't1_a_check'; COMMENT ON INDEX ctlt1_pkey IS 'index pkey'; COMMENT ON INDEX ctlt1_b_key IS 'index b_key'; ALTER TABLE ctlt1 ALTER COLUMN a SET STORAGE MAIN; -CREATE TABLE ctlt2 (c text); +CREATE TABLE ctlt2 (c text NOT NULL); ALTER TABLE ctlt2 ALTER COLUMN c SET STORAGE EXTERNAL; COMMENT ON COLUMN ctlt2.c IS 'C'; +COMMENT ON CONSTRAINT ctlt2_c_not_null ON ctlt2 IS 't2_c_not_null'; CREATE TABLE ctlt3 (a text CHECK (length(a) < 5), c text CHECK (length(c) < 7)); ALTER TABLE ctlt3 ALTER COLUMN c SET STORAGE EXTERNAL; ALTER TABLE ctlt3 ALTER COLUMN a SET STORAGE MAIN; @@ -351,9 +352,10 @@ CREATE TABLE ctlt12_storage (LIKE ctlt1 INCLUDING STORAGE, LIKE ctlt2 INCLUDING --------+------+-----------+----------+---------+----------+--------------+------------- a | text | | not null | | main | | b | text | | | | extended | | - c | text | | | | external | | + c | text | | not null | | external | | Not-null constraints: "ctlt1_a_not_null" NOT NULL "a" + "ctlt2_c_not_null" NOT NULL "c" CREATE TABLE ctlt12_comments (LIKE ctlt1 INCLUDING COMMENTS, LIKE ctlt2 INCLUDING COMMENTS); \d+ ctlt12_comments @@ -362,9 +364,16 @@ CREATE TABLE ctlt12_comments (LIKE ctlt1 INCLUDING COMMENTS, LIKE ctlt2 INCLUDIN --------+------+-----------+----------+---------+----------+--------------+------------- a | text | | not null | | extended | | A b | text | | | | extended | | B - c | text | | | | extended | | C + c | text | | not null | | extended | | C Not-null constraints: "ctlt1_a_not_null" NOT NULL "a" + "ctlt2_c_not_null" NOT NULL "c" + +SELECT conname, description FROM pg_description, pg_constraint c WHERE classoid = 'pg_constraint'::regclass AND objoid = c.oid AND c.conrelid = 'ctlt12_comments'::regclass; + conname | description +------------------+--------------- + ctlt2_c_not_null | t2_c_not_null +(1 row) CREATE TABLE ctlt1_inh (LIKE ctlt1 INCLUDING CONSTRAINTS INCLUDING COMMENTS) INHERITS (ctlt1); NOTICE: merging column "a" with inherited definition @@ -529,7 +538,9 @@ NOTICE: drop cascades to table inhe -- LIKE must respect NO INHERIT property of constraints CREATE TABLE noinh_con_copy (a int CHECK (a > 0) NO INHERIT, b int not null, c int not null no inherit); -CREATE TABLE noinh_con_copy1 (LIKE noinh_con_copy INCLUDING CONSTRAINTS); +COMMENT ON CONSTRAINT noinh_con_copy_b_not_null ON noinh_con_copy IS 'not null b'; +COMMENT ON CONSTRAINT noinh_con_copy_c_not_null ON noinh_con_copy IS 'not null c no inherit'; +CREATE TABLE noinh_con_copy1 (LIKE noinh_con_copy INCLUDING CONSTRAINTS INCLUDING COMMENTS); \d+ noinh_con_copy1 Table "public.noinh_con_copy1" Column | Type | Collation | Nullable | Default | Storage | Stats target | Description @@ -543,6 +554,17 @@ Not-null constraints: "noinh_con_copy_b_not_null" NOT NULL "b" "noinh_con_copy_c_not_null" NOT NULL "c" NO INHERIT +SELECT conname, description +FROM pg_description, pg_constraint c +WHERE classoid = 'pg_constraint'::regclass +AND objoid = c.oid AND c.conrelid = 'noinh_con_copy1'::regclass +ORDER BY conname COLLATE "C"; + conname | description +---------------------------+----------------------- + noinh_con_copy_b_not_null | not null b + noinh_con_copy_c_not_null | not null c no inherit +(2 rows) + -- fail, as partitioned tables don't allow NO INHERIT constraints CREATE TABLE noinh_con_copy1_parted (LIKE noinh_con_copy INCLUDING ALL) PARTITION BY LIST (a); diff --git a/src/test/regress/expected/domain.out b/src/test/regress/expected/domain.out index ba6f05eeb7d..b5ea707df31 100644 --- a/src/test/regress/expected/domain.out +++ b/src/test/regress/expected/domain.out @@ -1019,6 +1019,11 @@ insert into domain_test values (1, 2); -- should fail alter table domain_test add column c str_domain; ERROR: domain str_domain does not allow null values +-- disallow duplicated not-null constraints +create domain int_domain1 as int constraint nn1 not null constraint nn2 not null; +ERROR: redundant NOT NULL constraint definition +LINE 1: ...domain int_domain1 as int constraint nn1 not null constraint... + ^ create domain str_domain2 as text check (value <> 'foo') default 'foo'; -- should fail alter table domain_test add column d str_domain2; diff --git a/src/test/regress/expected/foreign_key.out b/src/test/regress/expected/foreign_key.out index 4f3f280a439..f9bd252444f 100644 --- a/src/test/regress/expected/foreign_key.out +++ b/src/test/regress/expected/foreign_key.out @@ -1359,7 +1359,7 @@ LINE 1: ...e ALTER CONSTRAINT fktable_fk_fkey NOT DEFERRABLE INITIALLY ... ALTER TABLE fktable ALTER CONSTRAINT fktable_fk_fkey NO INHERIT; ERROR: constraint "fktable_fk_fkey" of relation "fktable" is not a not-null constraint ALTER TABLE fktable ALTER CONSTRAINT fktable_fk_fkey NOT VALID; -ERROR: FOREIGN KEY constraints cannot be marked NOT VALID +ERROR: constraints cannot be altered to be NOT VALID LINE 1: ...ER TABLE fktable ALTER CONSTRAINT fktable_fk_fkey NOT VALID; ^ ALTER TABLE fktable ALTER CONSTRAINT fktable_fk_fkey ENFORCED NOT ENFORCED; @@ -1895,29 +1895,76 @@ WHERE conrelid::regclass::text like 'fk_partitioned_fk%' ORDER BY oid::regclass: (5 rows) DROP TABLE fk_partitioned_fk, fk_notpartitioned_pk; --- NOT VALID foreign key on a non-partitioned table referencing a partitioned table +-- NOT VALID and NOT ENFORCED foreign key on a non-partitioned table +-- referencing a partitioned table CREATE TABLE fk_partitioned_pk (a int, b int, PRIMARY KEY (a, b)) PARTITION BY RANGE (a, b); CREATE TABLE fk_partitioned_pk_1 PARTITION OF fk_partitioned_pk FOR VALUES FROM (0,0) TO (1000,1000); +CREATE TABLE fk_partitioned_pk_2 PARTITION OF fk_partitioned_pk FOR VALUES FROM (1000,1000) TO (2000,2000); CREATE TABLE fk_notpartitioned_fk (b int, a int); -ALTER TABLE fk_notpartitioned_fk ADD FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT VALID; --- Constraint will be invalid. -SELECT conname, convalidated FROM pg_constraint +INSERT INTO fk_partitioned_pk VALUES(100,100), (1000,1000); +INSERT INTO fk_notpartitioned_fk VALUES(100,100), (1000,1000); +ALTER TABLE fk_notpartitioned_fk ADD CONSTRAINT fk_notpartitioned_fk_a_b_fkey + FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT VALID; +ALTER TABLE fk_notpartitioned_fk ADD CONSTRAINT fk_notpartitioned_fk_a_b_fkey2 + FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT ENFORCED; +-- All constraints will be invalid, and _fkey2 constraints will not be enforced. +SELECT conname, conenforced, convalidated FROM pg_constraint WHERE conrelid = 'fk_notpartitioned_fk'::regclass ORDER BY oid::regclass::text; - conname | convalidated ----------------------------------+-------------- - fk_notpartitioned_fk_a_b_fkey | f - fk_notpartitioned_fk_a_b_fkey_1 | f -(2 rows) + conname | conenforced | convalidated +----------------------------------+-------------+-------------- + fk_notpartitioned_fk_a_b_fkey | t | f + fk_notpartitioned_fk_a_b_fkey_1 | t | f + fk_notpartitioned_fk_a_b_fkey_2 | t | f + fk_notpartitioned_fk_a_b_fkey2 | f | f + fk_notpartitioned_fk_a_b_fkey2_1 | f | f + fk_notpartitioned_fk_a_b_fkey2_2 | f | f +(6 rows) ALTER TABLE fk_notpartitioned_fk VALIDATE CONSTRAINT fk_notpartitioned_fk_a_b_fkey; --- All constraints are now valid. -SELECT conname, convalidated FROM pg_constraint +ALTER TABLE fk_notpartitioned_fk ALTER CONSTRAINT fk_notpartitioned_fk_a_b_fkey2 ENFORCED; +-- All constraints are now valid and enforced. +SELECT conname, conenforced, convalidated FROM pg_constraint WHERE conrelid = 'fk_notpartitioned_fk'::regclass ORDER BY oid::regclass::text; - conname | convalidated ----------------------------------+-------------- - fk_notpartitioned_fk_a_b_fkey | t - fk_notpartitioned_fk_a_b_fkey_1 | t -(2 rows) + conname | conenforced | convalidated +----------------------------------+-------------+-------------- + fk_notpartitioned_fk_a_b_fkey | t | t + fk_notpartitioned_fk_a_b_fkey_1 | t | t + fk_notpartitioned_fk_a_b_fkey_2 | t | t + fk_notpartitioned_fk_a_b_fkey2 | t | t + fk_notpartitioned_fk_a_b_fkey2_1 | t | t + fk_notpartitioned_fk_a_b_fkey2_2 | t | t +(6 rows) + +-- test a self-referential FK +ALTER TABLE fk_partitioned_pk ADD CONSTRAINT selffk FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT VALID; +CREATE TABLE fk_partitioned_pk_3 PARTITION OF fk_partitioned_pk FOR VALUES FROM (2000,2000) TO (3000,3000) + PARTITION BY RANGE (a); +CREATE TABLE fk_partitioned_pk_3_1 PARTITION OF fk_partitioned_pk_3 FOR VALUES FROM (2000) TO (2100); +SELECT conname, conenforced, convalidated FROM pg_constraint +WHERE conrelid = 'fk_partitioned_pk'::regclass AND contype = 'f' +ORDER BY oid::regclass::text; + conname | conenforced | convalidated +------------+-------------+-------------- + selffk | t | f + selffk_1 | t | f + selffk_2 | t | f + selffk_3 | t | f + selffk_3_1 | t | f +(5 rows) + +ALTER TABLE fk_partitioned_pk_2 VALIDATE CONSTRAINT selffk; +ALTER TABLE fk_partitioned_pk VALIDATE CONSTRAINT selffk; +SELECT conname, conenforced, convalidated FROM pg_constraint +WHERE conrelid = 'fk_partitioned_pk'::regclass AND contype = 'f' +ORDER BY oid::regclass::text; + conname | conenforced | convalidated +------------+-------------+-------------- + selffk | t | t + selffk_1 | t | t + selffk_2 | t | t + selffk_3 | t | t + selffk_3_1 | t | t +(5 rows) DROP TABLE fk_notpartitioned_fk, fk_partitioned_pk; -- Test some other exotic foreign key features: MATCH SIMPLE, ON UPDATE/DELETE diff --git a/src/test/regress/expected/generated_stored.out b/src/test/regress/expected/generated_stored.out index 16de30ab191..adac2cedfb2 100644 --- a/src/test/regress/expected/generated_stored.out +++ b/src/test/regress/expected/generated_stored.out @@ -1313,6 +1313,18 @@ CREATE TABLE gtest31_1 (a int, b text GENERATED ALWAYS AS ('hello') STORED, c te CREATE TABLE gtest31_2 (x int, y gtest31_1); ALTER TABLE gtest31_1 ALTER COLUMN b TYPE varchar; -- fails ERROR: cannot alter table "gtest31_1" because column "gtest31_2.y" uses its row type +-- bug #18970: these cases are unsupported, but make sure they fail cleanly +ALTER TABLE gtest31_2 ADD CONSTRAINT cc CHECK ((y).b IS NOT NULL); +ALTER TABLE gtest31_1 ALTER COLUMN b SET EXPRESSION AS ('hello1'); +ERROR: cannot alter table "gtest31_1" because column "gtest31_2.y" uses its row type +ALTER TABLE gtest31_2 DROP CONSTRAINT cc; +CREATE STATISTICS gtest31_2_stat ON ((y).b is not null) FROM gtest31_2; +ALTER TABLE gtest31_1 ALTER COLUMN b SET EXPRESSION AS ('hello2'); +ERROR: cannot alter table "gtest31_1" because column "gtest31_2.y" uses its row type +DROP STATISTICS gtest31_2_stat; +CREATE INDEX gtest31_2_y_idx ON gtest31_2(((y).b)); +ALTER TABLE gtest31_1 ALTER COLUMN b SET EXPRESSION AS ('hello3'); +ERROR: cannot alter table "gtest31_1" because column "gtest31_2.y" uses its row type DROP TABLE gtest31_1, gtest31_2; -- Check it for a partitioned table, too CREATE TABLE gtest31_1 (a int, b text GENERATED ALWAYS AS ('hello') STORED, c text) PARTITION BY LIST (a); diff --git a/src/test/regress/expected/generated_virtual.out b/src/test/regress/expected/generated_virtual.out index 6300e7c1d96..3b40e15a95a 100644 --- a/src/test/regress/expected/generated_virtual.out +++ b/src/test/regress/expected/generated_virtual.out @@ -553,15 +553,11 @@ CREATE TABLE gtest4 ( a int, b double_int GENERATED ALWAYS AS ((a * 2, a * 3)) VIRTUAL ); -INSERT INTO gtest4 VALUES (1), (6); -SELECT * FROM gtest4; - a | b ----+--------- - 1 | (2,3) - 6 | (12,18) -(2 rows) - -DROP TABLE gtest4; +ERROR: virtual generated column "b" cannot have a user-defined type +DETAIL: Virtual generated columns that make use of user-defined types are not yet supported. +--INSERT INTO gtest4 VALUES (1), (6); +--SELECT * FROM gtest4; +--DROP TABLE gtest4; DROP TYPE double_int; -- using tableoid is allowed CREATE TABLE gtest_tableoid ( @@ -604,9 +600,13 @@ INSERT INTO gtest11 VALUES (1, 10), (2, 20); GRANT SELECT (a, c) ON gtest11 TO regress_user11; CREATE FUNCTION gf1(a int) RETURNS int AS $$ SELECT a * 3 $$ IMMUTABLE LANGUAGE SQL; REVOKE ALL ON FUNCTION gf1(int) FROM PUBLIC; -CREATE TABLE gtest12 (a int PRIMARY KEY, b int, c int GENERATED ALWAYS AS (gf1(b)) VIRTUAL); -INSERT INTO gtest12 VALUES (1, 10), (2, 20); -GRANT SELECT (a, c), INSERT ON gtest12 TO regress_user11; +CREATE TABLE gtest12 (a int PRIMARY KEY, b int, c int GENERATED ALWAYS AS (gf1(b)) VIRTUAL); -- fails, user-defined function +ERROR: generation expression uses user-defined function +LINE 1: ...nt PRIMARY KEY, b int, c int GENERATED ALWAYS AS (gf1(b)) VI... + ^ +DETAIL: Virtual generated columns that make use of user-defined functions are not yet supported. +--INSERT INTO gtest12 VALUES (1, 10), (2, 20); +--GRANT SELECT (a, c), INSERT ON gtest12 TO regress_user11; SET ROLE regress_user11; SELECT a, b FROM gtest11; -- not allowed ERROR: permission denied for table gtest11 @@ -619,15 +619,12 @@ SELECT a, c FROM gtest11; -- allowed SELECT gf1(10); -- not allowed ERROR: permission denied for function gf1 -INSERT INTO gtest12 VALUES (3, 30), (4, 40); -- allowed (does not actually invoke the function) -SELECT a, c FROM gtest12; -- currently not allowed because of function permissions, should arguably be allowed -ERROR: permission denied for function gf1 +--INSERT INTO gtest12 VALUES (3, 30), (4, 40); -- allowed (does not actually invoke the function) +--SELECT a, c FROM gtest12; -- currently not allowed because of function permissions, should arguably be allowed RESET ROLE; -DROP FUNCTION gf1(int); -- fail -ERROR: cannot drop function gf1(integer) because other objects depend on it -DETAIL: column c of table gtest12 depends on function gf1(integer) -HINT: Use DROP ... CASCADE to drop the dependent objects too. -DROP TABLE gtest11, gtest12; +--DROP FUNCTION gf1(int); -- fail +DROP TABLE gtest11; +--DROP TABLE gtest12; DROP FUNCTION gf1(int); DROP USER regress_user11; -- check constraints @@ -637,10 +634,10 @@ INSERT INTO gtest20 (a) VALUES (30); -- violates constraint ERROR: new row for relation "gtest20" violates check constraint "gtest20_b_check" DETAIL: Failing row contains (30, virtual). ALTER TABLE gtest20 ALTER COLUMN b SET EXPRESSION AS (a * 100); -- violates constraint (currently not supported) -ERROR: ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns on tables with check constraints +ERROR: ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns in tables with check constraints DETAIL: Column "b" of relation "gtest20" is a virtual generated column. ALTER TABLE gtest20 ALTER COLUMN b SET EXPRESSION AS (a * 3); -- ok (currently not supported) -ERROR: ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns on tables with check constraints +ERROR: ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns in tables with check constraints DETAIL: Column "b" of relation "gtest20" is a virtual generated column. CREATE TABLE gtest20a (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a * 2) VIRTUAL); INSERT INTO gtest20a (a) VALUES (10); @@ -800,11 +797,23 @@ CREATE TABLE gtest24r (a int PRIMARY KEY, b gtestdomain1range GENERATED ALWAYS A ERROR: virtual generated column "b" cannot have a domain type --INSERT INTO gtest24r (a) VALUES (4); -- ok --INSERT INTO gtest24r (a) VALUES (6); -- error +CREATE TABLE gtest24at (a int PRIMARY KEY); +ALTER TABLE gtest24at ADD COLUMN b gtestdomain1 GENERATED ALWAYS AS (a * 2) VIRTUAL; -- error +ERROR: virtual generated column "b" cannot have a domain type +CREATE TABLE gtest24ata (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a * 2) VIRTUAL); +ALTER TABLE gtest24ata ALTER COLUMN b TYPE gtestdomain1; -- error +ERROR: virtual generated column "b" cannot have a domain type CREATE DOMAIN gtestdomainnn AS int CHECK (VALUE IS NOT NULL); CREATE TABLE gtest24nn (a int, b gtestdomainnn GENERATED ALWAYS AS (a * 2) VIRTUAL); ERROR: virtual generated column "b" cannot have a domain type --INSERT INTO gtest24nn (a) VALUES (4); -- ok --INSERT INTO gtest24nn (a) VALUES (NULL); -- error +-- using user-defined type not yet supported +CREATE TABLE gtest24xxx (a gtestdomain1, b gtestdomain1, c int GENERATED ALWAYS AS (greatest(a, b)) VIRTUAL); -- error +ERROR: generation expression uses user-defined type +LINE 1: ...main1, b gtestdomain1, c int GENERATED ALWAYS AS (greatest(a... + ^ +DETAIL: Virtual generated columns that make use of user-defined types are not yet supported. -- typed tables (currently not supported) CREATE TYPE gtest_type AS (f1 integer, f2 text, f3 bigint); CREATE TABLE gtest28 OF gtest_type (f1 WITH OPTIONS GENERATED ALWAYS AS (f2 *2) VIRTUAL); @@ -1274,6 +1283,15 @@ CREATE TABLE gtest31_1 (a int, b text GENERATED ALWAYS AS ('hello') VIRTUAL, c t CREATE TABLE gtest31_2 (x int, y gtest31_1); ALTER TABLE gtest31_1 ALTER COLUMN b TYPE varchar; -- fails ERROR: cannot alter table "gtest31_1" because column "gtest31_2.y" uses its row type +-- bug #18970 +ALTER TABLE gtest31_2 ADD CONSTRAINT cc CHECK ((y).b IS NOT NULL); +ALTER TABLE gtest31_1 ALTER COLUMN b SET EXPRESSION AS ('hello1'); +ALTER TABLE gtest31_2 DROP CONSTRAINT cc; +CREATE STATISTICS gtest31_2_stat ON ((y).b is not null) FROM gtest31_2; +ALTER TABLE gtest31_1 ALTER COLUMN b SET EXPRESSION AS ('hello2'); +DROP STATISTICS gtest31_2_stat; +CREATE INDEX gtest31_2_y_idx ON gtest31_2(((y).b)); +ALTER TABLE gtest31_1 ALTER COLUMN b SET EXPRESSION AS ('hello3'); DROP TABLE gtest31_1, gtest31_2; -- Check it for a partitioned table, too CREATE TABLE gtest31_1 (a int, b text GENERATED ALWAYS AS ('hello') VIRTUAL, c text) PARTITION BY LIST (a); @@ -1470,7 +1488,8 @@ create table gtest32 ( a int primary key, b int generated always as (a * 2), c int generated always as (10 + 10), - d int generated always as (coalesce(a, 100)) + d int generated always as (coalesce(a, 100)), + e int ); insert into gtest32 values (1), (2); analyze gtest32; @@ -1554,41 +1573,44 @@ select t2.* from gtest32 t1 left join gtest32 t2 on false; QUERY PLAN ------------------------------------------------------ Nested Loop Left Join - Output: a, (a * 2), (20), (COALESCE(a, 100)) + Output: a, (a * 2), (20), (COALESCE(a, 100)), e Join Filter: false -> Seq Scan on generated_virtual_tests.gtest32 t1 - Output: t1.a, t1.b, t1.c, t1.d + Output: t1.a, t1.b, t1.c, t1.d, t1.e -> Result - Output: a, 20, COALESCE(a, 100) + Output: a, e, 20, COALESCE(a, 100) One-Time Filter: false (8 rows) select t2.* from gtest32 t1 left join gtest32 t2 on false; - a | b | c | d ----+---+---+--- - | | | - | | | + a | b | c | d | e +---+---+---+---+--- + | | | | + | | | | (2 rows) explain (verbose, costs off) -select * from gtest32 t group by grouping sets (a, b, c, d) having c = 20; +select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20; QUERY PLAN ----------------------------------------------------- HashAggregate - Output: a, ((a * 2)), (20), (COALESCE(a, 100)) + Output: a, ((a * 2)), (20), (COALESCE(a, 100)), e Hash Key: t.a Hash Key: (t.a * 2) Hash Key: 20 Hash Key: COALESCE(t.a, 100) + Hash Key: t.e Filter: ((20) = 20) -> Seq Scan on generated_virtual_tests.gtest32 t - Output: a, (a * 2), 20, COALESCE(a, 100) -(9 rows) + Output: a, (a * 2), 20, COALESCE(a, 100), e +(10 rows) -select * from gtest32 t group by grouping sets (a, b, c, d) having c = 20; - a | b | c | d ----+---+----+--- - | | 20 | +select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20; + a | b | c | d | e +---+---+----+---+--- + | | 20 | | (1 row) +-- Ensure that the virtual generated columns in ALTER COLUMN TYPE USING expression are expanded +alter table gtest32 alter column e type bigint using b; drop table gtest32; diff --git a/src/test/regress/expected/horology.out b/src/test/regress/expected/horology.out index b90bfcd794f..5ae93d8e8a5 100644 --- a/src/test/regress/expected/horology.out +++ b/src/test/regress/expected/horology.out @@ -467,6 +467,15 @@ SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08'; ERROR: invalid input syntax for type timestamp with time zone: "Y2001M12D27H04MM05S06.789-08" LINE 1: SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-0... ^ +-- More examples we used to accept and should not +SELECT timestamp with time zone 'J2452271 T X03456-08'; +ERROR: invalid input syntax for type timestamp with time zone: "J2452271 T X03456-08" +LINE 1: SELECT timestamp with time zone 'J2452271 T X03456-08'; + ^ +SELECT timestamp with time zone 'J2452271 T X03456.001e6-08'; +ERROR: invalid input syntax for type timestamp with time zone: "J2452271 T X03456.001e6-08" +LINE 1: SELECT timestamp with time zone 'J2452271 T X03456.001e6-08'... + ^ -- conflicting fields should throw errors SELECT date '1995-08-06 epoch'; ERROR: invalid input syntax for type date: "1995-08-06 epoch" diff --git a/src/test/regress/expected/incremental_sort.out b/src/test/regress/expected/incremental_sort.out index b00219643b9..5a1dd9fc022 100644 --- a/src/test/regress/expected/incremental_sort.out +++ b/src/test/regress/expected/incremental_sort.out @@ -1722,3 +1722,43 @@ order by t1.four, t1.two limit 1; -> Seq Scan on tenk1 t2 (12 rows) +-- +-- Test incremental sort for Append/MergeAppend +-- +create table prt_tbl (a int, b int) partition by range (a); +create table prt_tbl_1 partition of prt_tbl for values from (0) to (100); +create table prt_tbl_2 partition of prt_tbl for values from (100) to (200); +insert into prt_tbl select i%200, i from generate_series(1,1000)i; +create index on prt_tbl_1(a); +create index on prt_tbl_2(a, b); +analyze prt_tbl; +set enable_seqscan to off; +set enable_bitmapscan to off; +-- Ensure we get an incremental sort for the subpath of Append +explain (costs off) select * from prt_tbl order by a, b; + QUERY PLAN +------------------------------------------------------------ + Append + -> Incremental Sort + Sort Key: prt_tbl_1.a, prt_tbl_1.b + Presorted Key: prt_tbl_1.a + -> Index Scan using prt_tbl_1_a_idx on prt_tbl_1 + -> Index Only Scan using prt_tbl_2_a_b_idx on prt_tbl_2 +(6 rows) + +-- Ensure we get an incremental sort for the subpath of MergeAppend +explain (costs off) select * from prt_tbl_1 union all select * from prt_tbl_2 order by a, b; + QUERY PLAN +------------------------------------------------------------ + Merge Append + Sort Key: prt_tbl_1.a, prt_tbl_1.b + -> Incremental Sort + Sort Key: prt_tbl_1.a, prt_tbl_1.b + Presorted Key: prt_tbl_1.a + -> Index Scan using prt_tbl_1_a_idx on prt_tbl_1 + -> Index Only Scan using prt_tbl_2_a_b_idx on prt_tbl_2 +(7 rows) + +reset enable_bitmapscan; +reset enable_seqscan; +drop table prt_tbl; diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out index f9b0c415cfd..5b5055babdc 100644 --- a/src/test/regress/expected/inherit.out +++ b/src/test/regress/expected/inherit.out @@ -1898,10 +1898,11 @@ ORDER BY thousand, tenthous; Merge Append Sort Key: tenk1.thousand, tenk1.tenthous -> Index Only Scan using tenk1_thous_tenthous on tenk1 - -> Sort + -> Incremental Sort Sort Key: tenk1_1.thousand, tenk1_1.thousand + Presorted Key: tenk1_1.thousand -> Index Only Scan using tenk1_thous_tenthous on tenk1 tenk1_1 -(6 rows) +(7 rows) explain (costs off) SELECT thousand, tenthous, thousand+tenthous AS x FROM tenk1 @@ -1982,10 +1983,11 @@ ORDER BY x, y; Merge Append Sort Key: a.thousand, a.tenthous -> Index Only Scan using tenk1_thous_tenthous on tenk1 a - -> Sort + -> Incremental Sort Sort Key: b.unique2, b.unique2 + Presorted Key: b.unique2 -> Index Only Scan using tenk1_unique2 on tenk1 b -(6 rows) +(7 rows) -- exercise rescan code path via a repeatedly-evaluated subquery explain (costs off) @@ -2281,7 +2283,7 @@ Inherits: pp1, create table cc3 (a2 int not null no inherit) inherits (cc1); NOTICE: moving and merging column "a2" with inherited definition DETAIL: User-specified column moved to the position of the inherited column. -ERROR: cannot define not-null constraint on column "a2" with NO INHERIT +ERROR: cannot define not-null constraint with NO INHERIT on column "a2" DETAIL: The column has an inherited not-null constraint. -- change NO INHERIT status of inherited constraint: no dice, it's inherited alter table cc2 add not null a2 no inherit; @@ -2530,7 +2532,7 @@ ERROR: conflicting NO INHERIT declaration for not-null constraint on column "a" CREATE TABLE inh_nn1 (a int not null); CREATE TABLE inh_nn2 (a int not null no inherit) INHERITS (inh_nn1); NOTICE: merging column "a" with inherited definition -ERROR: cannot define not-null constraint on column "a" with NO INHERIT +ERROR: cannot define not-null constraint with NO INHERIT on column "a" DETAIL: The column has an inherited not-null constraint. CREATE TABLE inh_nn3 (a int not null, b int, not null a no inherit); ERROR: conflicting NO INHERIT declaration for not-null constraint on column "a" diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index f35a0b18c37..46ddfa844c5 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -3946,6 +3946,59 @@ where t1.unique2 < 42 and t1.stringu1 > t2.stringu2; (1 row) -- variant that isn't quite a star-schema case +explain (verbose, costs off) +select ss1.d1 from + tenk1 as t1 + inner join tenk1 as t2 + on t1.tenthous = t2.ten + inner join + int8_tbl as i8 + left join int4_tbl as i4 + inner join (select 64::information_schema.cardinal_number as d1 + from tenk1 t3, + lateral (select abs(t3.unique1) + random()) ss0(x) + where t3.fivethous < 0) as ss1 + on i4.f1 = ss1.d1 + on i8.q1 = i4.f1 + on t1.tenthous = ss1.d1 +where t1.unique1 < i4.f1; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Nested Loop + Output: (64)::information_schema.cardinal_number + Join Filter: (t1.tenthous = ((64)::information_schema.cardinal_number)::integer) + -> Seq Scan on public.tenk1 t3 + Output: t3.unique1, t3.unique2, t3.two, t3.four, t3.ten, t3.twenty, t3.hundred, t3.thousand, t3.twothousand, t3.fivethous, t3.tenthous, t3.odd, t3.even, t3.stringu1, t3.stringu2, t3.string4 + Filter: (t3.fivethous < 0) + -> Nested Loop + Output: t1.tenthous, t2.ten + -> Nested Loop + Output: t1.tenthous, t2.ten, i4.f1 + Join Filter: (t1.unique1 < i4.f1) + -> Hash Join + Output: t1.tenthous, t1.unique1, t2.ten + Hash Cond: (t2.ten = t1.tenthous) + -> Seq Scan on public.tenk1 t2 + Output: t2.unique1, t2.unique2, t2.two, t2.four, t2.ten, t2.twenty, t2.hundred, t2.thousand, t2.twothousand, t2.fivethous, t2.tenthous, t2.odd, t2.even, t2.stringu1, t2.stringu2, t2.string4 + -> Hash + Output: t1.tenthous, t1.unique1 + -> Nested Loop + Output: t1.tenthous, t1.unique1 + -> Subquery Scan on ss0 + Output: ss0.x, (64)::information_schema.cardinal_number + -> Result + Output: ((abs(t3.unique1))::double precision + random()) + -> Index Scan using tenk1_thous_tenthous on public.tenk1 t1 + Output: t1.unique1, t1.unique2, t1.two, t1.four, t1.ten, t1.twenty, t1.hundred, t1.thousand, t1.twothousand, t1.fivethous, t1.tenthous, t1.odd, t1.even, t1.stringu1, t1.stringu2, t1.string4 + Index Cond: (t1.tenthous = (((64)::information_schema.cardinal_number))::integer) + -> Seq Scan on public.int4_tbl i4 + Output: i4.f1 + Filter: (i4.f1 = ((64)::information_schema.cardinal_number)::integer) + -> Seq Scan on public.int8_tbl i8 + Output: i8.q1, i8.q2 + Filter: (i8.q1 = ((64)::information_schema.cardinal_number)::integer) +(33 rows) + select ss1.d1 from tenk1 as t1 inner join tenk1 as t2 @@ -4035,6 +4088,195 @@ select * from 1 | 2 | 2 (1 row) +-- This example demonstrates the folly of our old "have_dangerous_phv" logic +begin; +set local from_collapse_limit to 2; +explain (verbose, costs off) +select * from int8_tbl t1 + left join + (select coalesce(t2.q1 + x, 0) from int8_tbl t2, + lateral (select t3.q1 as x from int8_tbl t3, + lateral (select t2.q1, t3.q1 offset 0) s)) + on true; + QUERY PLAN +------------------------------------------------------------------ + Nested Loop Left Join + Output: t1.q1, t1.q2, (COALESCE((t2.q1 + t3.q1), '0'::bigint)) + -> Seq Scan on public.int8_tbl t1 + Output: t1.q1, t1.q2 + -> Materialize + Output: (COALESCE((t2.q1 + t3.q1), '0'::bigint)) + -> Nested Loop + Output: COALESCE((t2.q1 + t3.q1), '0'::bigint) + -> Seq Scan on public.int8_tbl t2 + Output: t2.q1, t2.q2 + -> Nested Loop + Output: t3.q1 + -> Seq Scan on public.int8_tbl t3 + Output: t3.q1, t3.q2 + -> Result + Output: NULL::bigint, NULL::bigint +(16 rows) + +rollback; +-- ... not that the initial replacement didn't have some bugs too +begin; +create temp table t(i int primary key); +explain (verbose, costs off) +select * from t t1 + left join (select 1 as x, * from t t2(i2)) t2ss on t1.i = t2ss.i2 + left join t t3(i3) on false + left join t t4(i4) on t4.i4 > t2ss.x; + QUERY PLAN +---------------------------------------------------------- + Nested Loop Left Join + Output: t1.i, (1), t2.i2, i3, t4.i4 + -> Nested Loop Left Join + Output: t1.i, t2.i2, (1), i3 + Join Filter: false + -> Hash Left Join + Output: t1.i, t2.i2, (1) + Inner Unique: true + Hash Cond: (t1.i = t2.i2) + -> Seq Scan on pg_temp.t t1 + Output: t1.i + -> Hash + Output: t2.i2, (1) + -> Seq Scan on pg_temp.t t2 + Output: t2.i2, 1 + -> Result + Output: i3 + One-Time Filter: false + -> Memoize + Output: t4.i4 + Cache Key: (1) + Cache Mode: binary + -> Index Only Scan using t_pkey on pg_temp.t t4 + Output: t4.i4 + Index Cond: (t4.i4 > (1)) +(25 rows) + +explain (verbose, costs off) +select * from + (select k from + (select i, coalesce(i, j) as k from + (select i from t union all select 0) + join (select 1 as j limit 1) on i = j) + right join (select 2 as x) on true + join (select 3 as y) on i is not null + ), + lateral (select k as kl limit 1); + QUERY PLAN +------------------------------------------------------------------- + Nested Loop + Output: COALESCE(t.i, (1)), ((COALESCE(t.i, (1)))) + -> Limit + Output: 1 + -> Result + Output: 1 + -> Nested Loop + Output: t.i, ((COALESCE(t.i, (1)))) + -> Result + Output: t.i, COALESCE(t.i, (1)) + -> Append + -> Index Only Scan using t_pkey on pg_temp.t + Output: t.i + Index Cond: (t.i = (1)) + -> Result + Output: 0 + One-Time Filter: ((1) = 0) + -> Limit + Output: ((COALESCE(t.i, (1)))) + -> Result + Output: (COALESCE(t.i, (1))) +(21 rows) + +rollback; +-- PHVs containing SubLinks are quite tricky to get right +explain (verbose, costs off) +select * +from int8_tbl i8 + inner join + (select (select true) as x + from int4_tbl i4, lateral (select i4.f1 as y limit 1) ss1 + where i4.f1 = 0) ss2 on true + right join (select false as z) ss3 on true, + lateral (select i8.q2 as q2l where x limit 1) ss4 +where i8.q2 = 123; + QUERY PLAN +---------------------------------------------------------------- + Nested Loop + Output: i8.q1, i8.q2, (InitPlan 1).col1, false, (i8.q2) + InitPlan 1 + -> Result + Output: true + InitPlan 2 + -> Result + Output: true + -> Seq Scan on public.int4_tbl i4 + Output: i4.f1 + Filter: (i4.f1 = 0) + -> Nested Loop + Output: i8.q1, i8.q2, (i8.q2) + -> Subquery Scan on ss1 + Output: ss1.y, (InitPlan 1).col1 + -> Limit + Output: NULL::integer + -> Result + Output: NULL::integer + -> Nested Loop + Output: i8.q1, i8.q2, (i8.q2) + -> Seq Scan on public.int8_tbl i8 + Output: i8.q1, i8.q2 + Filter: (i8.q2 = 123) + -> Limit + Output: (i8.q2) + -> Result + Output: i8.q2 + One-Time Filter: ((InitPlan 1).col1) +(29 rows) + +explain (verbose, costs off) +select * +from int8_tbl i8 + inner join + (select (select true) as x + from int4_tbl i4, lateral (select 1 as y limit 1) ss1 + where i4.f1 = 0) ss2 on true + right join (select false as z) ss3 on true, + lateral (select i8.q2 as q2l where x limit 1) ss4 +where i8.q2 = 123; + QUERY PLAN +---------------------------------------------------------------- + Nested Loop + Output: i8.q1, i8.q2, (InitPlan 1).col1, false, (i8.q2) + InitPlan 1 + -> Result + Output: true + InitPlan 2 + -> Result + Output: true + -> Limit + Output: NULL::integer + -> Result + Output: NULL::integer + -> Nested Loop + Output: i8.q1, i8.q2, (i8.q2) + -> Seq Scan on public.int4_tbl i4 + Output: i4.f1, (InitPlan 1).col1 + Filter: (i4.f1 = 0) + -> Nested Loop + Output: i8.q1, i8.q2, (i8.q2) + -> Seq Scan on public.int8_tbl i8 + Output: i8.q1, i8.q2 + Filter: (i8.q2 = 123) + -> Limit + Output: (i8.q2) + -> Result + Output: i8.q2 + One-Time Filter: ((InitPlan 1).col1) +(27 rows) + -- Test proper handling of appendrel PHVs during useless-RTE removal explain (costs off) select * from @@ -5384,14 +5626,14 @@ select * from (select 1 as id) as xx left join (tenk1 as a1 full join (select 1 as id) as yy on (a1.unique1 = yy.id)) - on (xx.id = coalesce(yy.id)); - QUERY PLAN ---------------------------------------- + on (xx.id = coalesce(yy.id, yy.id)); + QUERY PLAN +------------------------------------------ Nested Loop Left Join -> Result -> Hash Full Join Hash Cond: (a1.unique1 = (1)) - Filter: (1 = COALESCE((1))) + Filter: (1 = COALESCE((1), (1))) -> Seq Scan on tenk1 a1 -> Hash -> Result @@ -5401,7 +5643,7 @@ select * from (select 1 as id) as xx left join (tenk1 as a1 full join (select 1 as id) as yy on (a1.unique1 = yy.id)) - on (xx.id = coalesce(yy.id)); + on (xx.id = coalesce(yy.id, yy.id)); id | unique1 | unique2 | two | four | ten | twenty | hundred | thousand | twothousand | fivethous | tenthous | odd | even | stringu1 | stringu2 | string4 | id ----+---------+---------+-----+------+-----+--------+---------+----------+-------------+-----------+----------+-----+------+----------+----------+---------+---- 1 | 1 | 2838 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 2 | 3 | BAAAAA | EFEAAA | OOOOxx | 1 @@ -8169,20 +8411,20 @@ select * from int4_tbl i left join explain (verbose, costs off) select * from int4_tbl i left join - lateral (select coalesce(i) from int2_tbl j where i.f1 = j.f1) k on true; - QUERY PLAN -------------------------------------- + lateral (select coalesce(i, i) from int2_tbl j where i.f1 = j.f1) k on true; + QUERY PLAN +------------------------------------------ Nested Loop Left Join - Output: i.f1, (COALESCE(i.*)) + Output: i.f1, (COALESCE(i.*, i.*)) -> Seq Scan on public.int4_tbl i Output: i.f1, i.* -> Seq Scan on public.int2_tbl j - Output: j.f1, COALESCE(i.*) + Output: j.f1, COALESCE(i.*, i.*) Filter: (i.f1 = j.f1) (7 rows) select * from int4_tbl i left join - lateral (select coalesce(i) from int2_tbl j where i.f1 = j.f1) k on true; + lateral (select coalesce(i, i) from int2_tbl j where i.f1 = j.f1) k on true; f1 | coalesce -------------+---------- 0 | (0) @@ -9351,14 +9593,14 @@ CREATE STATISTICS group_tbl_stat (ndistinct) ON a, b FROM group_tbl; ANALYZE group_tbl; EXPLAIN (COSTS OFF) SELECT 1 FROM group_tbl t1 - LEFT JOIN (SELECT a c1, COALESCE(a) c2 FROM group_tbl t2) s ON TRUE + LEFT JOIN (SELECT a c1, COALESCE(a, a) c2 FROM group_tbl t2) s ON TRUE GROUP BY s.c1, s.c2; - QUERY PLAN --------------------------------------------- + QUERY PLAN +------------------------------------------------ Group - Group Key: t2.a, (COALESCE(t2.a)) + Group Key: t2.a, (COALESCE(t2.a, t2.a)) -> Sort - Sort Key: t2.a, (COALESCE(t2.a)) + Sort Key: t2.a, (COALESCE(t2.a, t2.a)) -> Nested Loop Left Join -> Seq Scan on group_tbl t1 -> Seq Scan on group_tbl t2 diff --git a/src/test/regress/expected/matview.out b/src/test/regress/expected/matview.out index 54939ecc6b0..c56c9fa3a25 100644 --- a/src/test/regress/expected/matview.out +++ b/src/test/regress/expected/matview.out @@ -587,7 +587,7 @@ CREATE MATERIALIZED VIEW drop_idx_matview AS NOTICE: index "mvtest_drop_idx" does not exist, skipping CREATE UNIQUE INDEX mvtest_drop_idx ON drop_idx_matview (i); REFRESH MATERIALIZED VIEW CONCURRENTLY drop_idx_matview; -ERROR: could not find suitable unique index on materialized view +ERROR: could not find suitable unique index on materialized view "drop_idx_matview" DROP MATERIALIZED VIEW drop_idx_matview; -- clean up RESET search_path; -- make sure that create WITH NO DATA works via SPI diff --git a/src/test/regress/expected/memoize.out b/src/test/regress/expected/memoize.out index 38dfaf021c9..150dc1b44cf 100644 --- a/src/test/regress/expected/memoize.out +++ b/src/test/regress/expected/memoize.out @@ -25,6 +25,7 @@ begin ln := regexp_replace(ln, 'Heap Fetches: \d+', 'Heap Fetches: N'); ln := regexp_replace(ln, 'loops=\d+', 'loops=N'); ln := regexp_replace(ln, 'Index Searches: \d+', 'Index Searches: N'); + ln := regexp_replace(ln, 'Memory: \d+kB', 'Memory: NkB'); return next ln; end loop; end; @@ -500,3 +501,62 @@ RESET max_parallel_workers_per_gather; RESET parallel_tuple_cost; RESET parallel_setup_cost; RESET min_parallel_table_scan_size; +-- Ensure memoize works for ANTI joins +CREATE TABLE tab_anti (a int, b boolean); +INSERT INTO tab_anti SELECT i%3, false FROM generate_series(1,100)i; +ANALYZE tab_anti; +-- Ensure we get a Memoize plan for ANTI join +SELECT explain_memoize(' +SELECT COUNT(*) FROM tab_anti t1 LEFT JOIN +LATERAL (SELECT DISTINCT ON (a) a, b, t1.a AS x FROM tab_anti t2) t2 +ON t1.a+1 = t2.a +WHERE t2.a IS NULL;', false); + explain_memoize +-------------------------------------------------------------------------------------------- + Aggregate (actual rows=1.00 loops=N) + -> Nested Loop Anti Join (actual rows=33.00 loops=N) + -> Seq Scan on tab_anti t1 (actual rows=100.00 loops=N) + -> Memoize (actual rows=0.67 loops=N) + Cache Key: (t1.a + 1), t1.a + Cache Mode: binary + Hits: 97 Misses: 3 Evictions: Zero Overflows: 0 Memory Usage: NkB + -> Subquery Scan on t2 (actual rows=0.67 loops=N) + Filter: ((t1.a + 1) = t2.a) + Rows Removed by Filter: 2 + -> Unique (actual rows=2.67 loops=N) + -> Sort (actual rows=67.33 loops=N) + Sort Key: t2_1.a + Sort Method: quicksort Memory: NkB + -> Seq Scan on tab_anti t2_1 (actual rows=100.00 loops=N) +(15 rows) + +-- And check we get the expected results. +SELECT COUNT(*) FROM tab_anti t1 LEFT JOIN +LATERAL (SELECT DISTINCT ON (a) a, b, t1.a AS x FROM tab_anti t2) t2 +ON t1.a+1 = t2.a +WHERE t2.a IS NULL; + count +------- + 33 +(1 row) + +-- Ensure we do not add memoize node for SEMI join +EXPLAIN (COSTS OFF) +SELECT * FROM tab_anti t1 WHERE t1.a IN + (SELECT a FROM tab_anti t2 WHERE t2.b IN + (SELECT t1.b FROM tab_anti t3 WHERE t2.a > 1 OFFSET 0)); + QUERY PLAN +------------------------------------------------- + Nested Loop Semi Join + -> Seq Scan on tab_anti t1 + -> Nested Loop Semi Join + Join Filter: (t1.a = t2.a) + -> Seq Scan on tab_anti t2 + -> Subquery Scan on "ANY_subquery" + Filter: (t2.b = "ANY_subquery".b) + -> Result + One-Time Filter: (t2.a > 1) + -> Seq Scan on tab_anti t3 +(10 rows) + +DROP TABLE tab_anti; diff --git a/src/test/regress/expected/merge.out b/src/test/regress/expected/merge.out index bcd29668297..cf2219df754 100644 --- a/src/test/regress/expected/merge.out +++ b/src/test/regress/expected/merge.out @@ -2702,6 +2702,76 @@ SELECT * FROM new_measurement ORDER BY city_id, logdate; 1 | 01-17-2007 | | (2 rows) +-- MERGE into inheritance root table +DROP TRIGGER insert_measurement_trigger ON measurement; +ALTER TABLE measurement ADD CONSTRAINT mcheck CHECK (city_id = 0) NO INHERIT; +EXPLAIN (COSTS OFF) +MERGE INTO measurement m + USING (VALUES (1, '01-17-2007'::date)) nm(city_id, logdate) ON + (m.city_id = nm.city_id and m.logdate=nm.logdate) +WHEN NOT MATCHED THEN INSERT + (city_id, logdate, peaktemp, unitsales) + VALUES (city_id - 1, logdate, 25, 100); + QUERY PLAN +-------------------------------------------------------------------------- + Merge on measurement m + Merge on measurement_y2007m01 m_1 + -> Nested Loop Left Join + -> Result + -> Seq Scan on measurement_y2007m01 m_1 + Filter: ((city_id = 1) AND (logdate = '01-17-2007'::date)) +(6 rows) + +BEGIN; +MERGE INTO measurement m + USING (VALUES (1, '01-17-2007'::date)) nm(city_id, logdate) ON + (m.city_id = nm.city_id and m.logdate=nm.logdate) +WHEN NOT MATCHED THEN INSERT + (city_id, logdate, peaktemp, unitsales) + VALUES (city_id - 1, logdate, 25, 100); +SELECT * FROM ONLY measurement ORDER BY city_id, logdate; + city_id | logdate | peaktemp | unitsales +---------+------------+----------+----------- + 0 | 07-21-2005 | 25 | 35 + 0 | 01-17-2007 | 25 | 100 +(2 rows) + +ROLLBACK; +ALTER TABLE measurement ENABLE ROW LEVEL SECURITY; +ALTER TABLE measurement FORCE ROW LEVEL SECURITY; +CREATE POLICY measurement_p ON measurement USING (peaktemp IS NOT NULL); +MERGE INTO measurement m + USING (VALUES (1, '01-17-2007'::date)) nm(city_id, logdate) ON + (m.city_id = nm.city_id and m.logdate=nm.logdate) +WHEN NOT MATCHED THEN INSERT + (city_id, logdate, peaktemp, unitsales) + VALUES (city_id - 1, logdate, NULL, 100); -- should fail +ERROR: new row violates row-level security policy for table "measurement" +MERGE INTO measurement m + USING (VALUES (1, '01-17-2007'::date)) nm(city_id, logdate) ON + (m.city_id = nm.city_id and m.logdate=nm.logdate) +WHEN NOT MATCHED THEN INSERT + (city_id, logdate, peaktemp, unitsales) + VALUES (city_id - 1, logdate, 25, 100); -- ok +SELECT * FROM ONLY measurement ORDER BY city_id, logdate; + city_id | logdate | peaktemp | unitsales +---------+------------+----------+----------- + 0 | 07-21-2005 | 25 | 35 + 0 | 01-17-2007 | 25 | 100 +(2 rows) + +MERGE INTO measurement m + USING (VALUES (1, '01-18-2007'::date)) nm(city_id, logdate) ON + (m.city_id = nm.city_id and m.logdate=nm.logdate) +WHEN NOT MATCHED THEN INSERT + (city_id, logdate, peaktemp, unitsales) + VALUES (city_id - 1, logdate, 25, 200) +RETURNING merge_action(), m.*; + merge_action | city_id | logdate | peaktemp | unitsales +--------------+---------+------------+----------+----------- + INSERT | 0 | 01-18-2007 | 25 | 200 +(1 row) + DROP TABLE measurement, new_measurement CASCADE; NOTICE: drop cascades to 3 other objects DETAIL: drop cascades to table measurement_y2006m02 diff --git a/src/test/regress/expected/misc_functions.out b/src/test/regress/expected/misc_functions.out index cc517ed5e90..c3b2b9d8603 100644 --- a/src/test/regress/expected/misc_functions.out +++ b/src/test/regress/expected/misc_functions.out @@ -890,17 +890,17 @@ SELECT pg_column_toast_chunk_id(a) IS NULL, DROP TABLE test_chunk_id; DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); --- test stratnum support functions -SELECT gist_stratnum_common(7); - gist_stratnum_common ----------------------- - 3 +-- test stratnum translation support functions +SELECT gist_translate_cmptype_common(7); + gist_translate_cmptype_common +------------------------------- + 3 (1 row) -SELECT gist_stratnum_common(3); - gist_stratnum_common ----------------------- - 18 +SELECT gist_translate_cmptype_common(3); + gist_translate_cmptype_common +------------------------------- + 18 (1 row) -- relpath tests diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out index 072d76ce131..c58e232a263 100644 --- a/src/test/regress/expected/numeric.out +++ b/src/test/regress/expected/numeric.out @@ -1464,9 +1464,21 @@ ERROR: count must be greater than zero SELECT width_bucket(3.5::float8, 3.0::float8, 3.0::float8, 888); ERROR: lower bound cannot equal upper bound SELECT width_bucket('NaN', 3.0, 4.0, 888); -ERROR: operand, lower bound, and upper bound cannot be NaN + width_bucket +-------------- + 889 +(1 row) + +SELECT width_bucket('NaN'::float8, 3.0::float8, 4.0::float8, 888); + width_bucket +-------------- + 889 +(1 row) + +SELECT width_bucket(0, 'NaN', 4.0, 888); +ERROR: lower and upper bounds cannot be NaN SELECT width_bucket(0::float8, 'NaN', 4.0::float8, 888); -ERROR: operand, lower bound, and upper bound cannot be NaN +ERROR: lower and upper bounds cannot be NaN SELECT width_bucket(2.0, 3.0, '-inf', 888); ERROR: lower and upper bounds must be finite SELECT width_bucket(0::float8, '-inf', 4.0::float8, 888); @@ -3860,15 +3872,15 @@ ERROR: factorial of a negative number is undefined -- Tests for pg_lsn() -- SELECT pg_lsn(23783416::numeric); - pg_lsn ------------ - 0/16AE7F8 + pg_lsn +------------ + 0/016AE7F8 (1 row) SELECT pg_lsn(0::numeric); - pg_lsn --------- - 0/0 + pg_lsn +------------ + 0/00000000 (1 row) SELECT pg_lsn(18446744073709551615::numeric); diff --git a/src/test/regress/expected/pg_lsn.out b/src/test/regress/expected/pg_lsn.out index b27eec7c015..8ab59b2e445 100644 --- a/src/test/regress/expected/pg_lsn.out +++ b/src/test/regress/expected/pg_lsn.out @@ -41,9 +41,9 @@ SELECT * FROM pg_input_error_info('16AE7F7', 'pg_lsn'); -- Min/Max aggregation SELECT MIN(f1), MAX(f1) FROM PG_LSN_TBL; - min | max ------+------------------- - 0/0 | FFFFFFFF/FFFFFFFF + min | max +------------+------------------- + 0/00000000 | FFFFFFFF/FFFFFFFF (1 row) DROP TABLE PG_LSN_TBL; @@ -85,21 +85,21 @@ SELECT '0/16AE7F8'::pg_lsn - '0/16AE7F7'::pg_lsn; (1 row) SELECT '0/16AE7F7'::pg_lsn + 16::numeric; - ?column? ------------ - 0/16AE807 + ?column? +------------ + 0/016AE807 (1 row) SELECT 16::numeric + '0/16AE7F7'::pg_lsn; - ?column? ------------ - 0/16AE807 + ?column? +------------ + 0/016AE807 (1 row) SELECT '0/16AE7F7'::pg_lsn - 16::numeric; - ?column? ------------ - 0/16AE7E7 + ?column? +------------ + 0/016AE7E7 (1 row) SELECT 'FFFFFFFF/FFFFFFFE'::pg_lsn + 1::numeric; @@ -111,9 +111,9 @@ SELECT 'FFFFFFFF/FFFFFFFE'::pg_lsn + 1::numeric; SELECT 'FFFFFFFF/FFFFFFFE'::pg_lsn + 2::numeric; -- out of range error ERROR: pg_lsn out of range SELECT '0/1'::pg_lsn - 1::numeric; - ?column? ----------- - 0/0 + ?column? +------------ + 0/00000000 (1 row) SELECT '0/1'::pg_lsn - 2::numeric; -- out of range error @@ -125,9 +125,9 @@ SELECT '0/0'::pg_lsn + ('FFFFFFFF/FFFFFFFF'::pg_lsn - '0/0'::pg_lsn); (1 row) SELECT 'FFFFFFFF/FFFFFFFF'::pg_lsn - ('FFFFFFFF/FFFFFFFF'::pg_lsn - '0/0'::pg_lsn); - ?column? ----------- - 0/0 + ?column? +------------ + 0/00000000 (1 row) SELECT '0/16AE7F7'::pg_lsn + 'NaN'::numeric; @@ -164,107 +164,107 @@ SELECT DISTINCT (i || '/' || j)::pg_lsn f generate_series(1, 5) k WHERE i <= 10 AND j > 0 AND j <= 10 ORDER BY f; - f -------- - 1/1 - 1/2 - 1/3 - 1/4 - 1/5 - 1/6 - 1/7 - 1/8 - 1/9 - 1/10 - 2/1 - 2/2 - 2/3 - 2/4 - 2/5 - 2/6 - 2/7 - 2/8 - 2/9 - 2/10 - 3/1 - 3/2 - 3/3 - 3/4 - 3/5 - 3/6 - 3/7 - 3/8 - 3/9 - 3/10 - 4/1 - 4/2 - 4/3 - 4/4 - 4/5 - 4/6 - 4/7 - 4/8 - 4/9 - 4/10 - 5/1 - 5/2 - 5/3 - 5/4 - 5/5 - 5/6 - 5/7 - 5/8 - 5/9 - 5/10 - 6/1 - 6/2 - 6/3 - 6/4 - 6/5 - 6/6 - 6/7 - 6/8 - 6/9 - 6/10 - 7/1 - 7/2 - 7/3 - 7/4 - 7/5 - 7/6 - 7/7 - 7/8 - 7/9 - 7/10 - 8/1 - 8/2 - 8/3 - 8/4 - 8/5 - 8/6 - 8/7 - 8/8 - 8/9 - 8/10 - 9/1 - 9/2 - 9/3 - 9/4 - 9/5 - 9/6 - 9/7 - 9/8 - 9/9 - 9/10 - 10/1 - 10/2 - 10/3 - 10/4 - 10/5 - 10/6 - 10/7 - 10/8 - 10/9 - 10/10 + f +------------- + 1/00000001 + 1/00000002 + 1/00000003 + 1/00000004 + 1/00000005 + 1/00000006 + 1/00000007 + 1/00000008 + 1/00000009 + 1/00000010 + 2/00000001 + 2/00000002 + 2/00000003 + 2/00000004 + 2/00000005 + 2/00000006 + 2/00000007 + 2/00000008 + 2/00000009 + 2/00000010 + 3/00000001 + 3/00000002 + 3/00000003 + 3/00000004 + 3/00000005 + 3/00000006 + 3/00000007 + 3/00000008 + 3/00000009 + 3/00000010 + 4/00000001 + 4/00000002 + 4/00000003 + 4/00000004 + 4/00000005 + 4/00000006 + 4/00000007 + 4/00000008 + 4/00000009 + 4/00000010 + 5/00000001 + 5/00000002 + 5/00000003 + 5/00000004 + 5/00000005 + 5/00000006 + 5/00000007 + 5/00000008 + 5/00000009 + 5/00000010 + 6/00000001 + 6/00000002 + 6/00000003 + 6/00000004 + 6/00000005 + 6/00000006 + 6/00000007 + 6/00000008 + 6/00000009 + 6/00000010 + 7/00000001 + 7/00000002 + 7/00000003 + 7/00000004 + 7/00000005 + 7/00000006 + 7/00000007 + 7/00000008 + 7/00000009 + 7/00000010 + 8/00000001 + 8/00000002 + 8/00000003 + 8/00000004 + 8/00000005 + 8/00000006 + 8/00000007 + 8/00000008 + 8/00000009 + 8/00000010 + 9/00000001 + 9/00000002 + 9/00000003 + 9/00000004 + 9/00000005 + 9/00000006 + 9/00000007 + 9/00000008 + 9/00000009 + 9/00000010 + 10/00000001 + 10/00000002 + 10/00000003 + 10/00000004 + 10/00000005 + 10/00000006 + 10/00000007 + 10/00000008 + 10/00000009 + 10/00000010 (100 rows) diff --git a/src/test/regress/expected/psql.out b/src/test/regress/expected/psql.out index cf48ae6d0c2..236eba2540e 100644 --- a/src/test/regress/expected/psql.out +++ b/src/test/regress/expected/psql.out @@ -160,12 +160,12 @@ LINE 1: SELECT $1, $2 foo4 | bar4 (1 row) --- \close (extended query protocol) -\close -\close: missing required argument -\close '' -\close stmt2 -\close stmt2 +-- \close_prepared (extended query protocol) +\close_prepared +\close_prepared: missing required argument +\close_prepared '' +\close_prepared stmt2 +\close_prepared stmt2 SELECT name, statement FROM pg_prepared_statements ORDER BY name; name | statement -------+---------------- @@ -4666,7 +4666,7 @@ bar 'bar' "bar" \C arg1 \c arg1 arg2 arg3 arg4 \cd arg1 - \close stmt1 + \close_prepared stmt1 \conninfo \copy arg1 arg2 arg3 arg4 arg5 arg6 \copyright diff --git a/src/test/regress/expected/psql_pipeline.out b/src/test/regress/expected/psql_pipeline.out index a30dec088b9..a0816fb10b6 100644 --- a/src/test/regress/expected/psql_pipeline.out +++ b/src/test/regress/expected/psql_pipeline.out @@ -228,192 +228,6 @@ BEGIN \bind \sendpipeline INSERT INTO psql_pipeline VALUES ($1) \bind 1 \sendpipeline COMMIT \bind \sendpipeline \endpipeline --- COPY FROM STDIN --- with \sendpipeline and \bind -\startpipeline -SELECT $1 \bind 'val1' \sendpipeline -COPY psql_pipeline FROM STDIN \bind \sendpipeline -\endpipeline - ?column? ----------- - val1 -(1 row) - --- with semicolon -\startpipeline -SELECT 'val1'; -COPY psql_pipeline FROM STDIN; -\endpipeline - ?column? ----------- - val1 -(1 row) - --- COPY FROM STDIN with \flushrequest + \getresults --- with \sendpipeline and \bind -\startpipeline -SELECT $1 \bind 'val1' \sendpipeline -COPY psql_pipeline FROM STDIN \bind \sendpipeline -\flushrequest -\getresults - ?column? ----------- - val1 -(1 row) - -message type 0x5a arrived from server while idle -\endpipeline --- with semicolon -\startpipeline -SELECT 'val1'; -COPY psql_pipeline FROM STDIN; -\flushrequest -\getresults - ?column? ----------- - val1 -(1 row) - -message type 0x5a arrived from server while idle -\endpipeline --- COPY FROM STDIN with \syncpipeline + \getresults --- with \bind and \sendpipeline -\startpipeline -SELECT $1 \bind 'val1' \sendpipeline -COPY psql_pipeline FROM STDIN \bind \sendpipeline -\syncpipeline -\getresults - ?column? ----------- - val1 -(1 row) - -\endpipeline --- with semicolon -\startpipeline -SELECT 'val1'; -COPY psql_pipeline FROM STDIN; -\syncpipeline -\getresults - ?column? ----------- - val1 -(1 row) - -\endpipeline --- COPY TO STDOUT --- with \bind and \sendpipeline -\startpipeline -SELECT $1 \bind 'val1' \sendpipeline -copy psql_pipeline TO STDOUT \bind \sendpipeline -\endpipeline - ?column? ----------- - val1 -(1 row) - -1 \N -2 test2 -20 test2 -3 test3 -30 test3 -4 test4 -40 test4 --- with semicolon -\startpipeline -SELECT 'val1'; -copy psql_pipeline TO STDOUT; -\endpipeline - ?column? ----------- - val1 -(1 row) - -1 \N -2 test2 -20 test2 -3 test3 -30 test3 -4 test4 -40 test4 --- COPY TO STDOUT with \flushrequest + \getresults --- with \bind and \sendpipeline -\startpipeline -SELECT $1 \bind 'val1' \sendpipeline -copy psql_pipeline TO STDOUT \bind \sendpipeline -\flushrequest -\getresults - ?column? ----------- - val1 -(1 row) - -1 \N -2 test2 -20 test2 -3 test3 -30 test3 -4 test4 -40 test4 -\endpipeline --- with semicolon -\startpipeline -SELECT 'val1'; -copy psql_pipeline TO STDOUT; -\flushrequest -\getresults - ?column? ----------- - val1 -(1 row) - -1 \N -2 test2 -20 test2 -3 test3 -30 test3 -4 test4 -40 test4 -\endpipeline --- COPY TO STDOUT with \syncpipeline + \getresults --- with \bind and \sendpipeline -\startpipeline -SELECT $1 \bind 'val1' \sendpipeline -copy psql_pipeline TO STDOUT \bind \sendpipeline -\syncpipeline -\getresults - ?column? ----------- - val1 -(1 row) - -1 \N -2 test2 -20 test2 -3 test3 -30 test3 -4 test4 -40 test4 -\endpipeline --- with semicolon -\startpipeline -SELECT 'val1'; -copy psql_pipeline TO STDOUT; -\syncpipeline -\getresults - ?column? ----------- - val1 -(1 row) - -1 \N -2 test2 -20 test2 -3 test3 -30 test3 -4 test4 -40 test4 -\endpipeline -- Use \parse and \bind_named \startpipeline SELECT $1 \parse '' @@ -740,7 +554,7 @@ SELECT COUNT(*) FROM psql_pipeline \bind \sendpipeline count ------- - 7 + 1 (1 row) -- After an error, pipeline is aborted and requires \syncpipeline to be @@ -750,7 +564,7 @@ SELECT $1 \bind \sendpipeline SELECT $1 \bind 1 \sendpipeline SELECT $1 \parse a \bind_named a 1 \sendpipeline -\close a +\close_prepared a \flushrequest \getresults ERROR: bind message supplies 0 parameters, but prepared statement "" requires 1 @@ -758,7 +572,7 @@ ERROR: bind message supplies 0 parameters, but prepared statement "" requires 1 SELECT $1 \bind 1 \sendpipeline SELECT $1 \parse a \bind_named a 1 \sendpipeline -\close a +\close_prepared a -- Sync allows pipeline to recover. \syncpipeline \getresults @@ -766,7 +580,7 @@ Pipeline aborted, command did not run SELECT $1 \bind 1 \sendpipeline SELECT $1 \parse a \bind_named a 1 \sendpipeline -\close a +\close_prepared a \flushrequest \getresults ?column? diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out index 4de96c04f9d..3a2eacd793f 100644 --- a/src/test/regress/expected/publication.out +++ b/src/test/regress/expected/publication.out @@ -34,7 +34,8 @@ ERROR: conflicting or redundant options LINE 1: ...pub_xxx WITH (publish_generated_columns = stored, publish_ge... ^ CREATE PUBLICATION testpub_xxx WITH (publish_generated_columns = foo); -ERROR: publish_generated_columns requires a "none" or "stored" value +ERROR: invalid value for publication parameter "publish_generated_columns": "foo" +DETAIL: Valid values are "none" and "stored". \dRp List of publications Name | Owner | All tables | Inserts | Updates | Deletes | Truncates | Generated columns | Via root @@ -524,16 +525,22 @@ Tables from schemas: "testpub_rf_schema2" -- fail - virtual generated column uses user-defined function +-- (Actually, this already fails at CREATE TABLE rather than at CREATE +-- PUBLICATION, but let's keep the test in case the former gets +-- relaxed sometime.) CREATE TABLE testpub_rf_tbl6 (id int PRIMARY KEY, x int, y int GENERATED ALWAYS AS (x * testpub_rf_func2()) VIRTUAL); +ERROR: generation expression uses user-defined function +LINE 1: ...RIMARY KEY, x int, y int GENERATED ALWAYS AS (x * testpub_rf... + ^ +DETAIL: Virtual generated columns that make use of user-defined functions are not yet supported. CREATE PUBLICATION testpub7 FOR TABLE testpub_rf_tbl6 WHERE (y > 100); -ERROR: invalid publication WHERE expression -DETAIL: User-defined or built-in mutable functions are not allowed. +ERROR: relation "testpub_rf_tbl6" does not exist -- test that SET EXPRESSION is rejected, because it could affect a row filter SET client_min_messages = 'ERROR'; CREATE TABLE testpub_rf_tbl7 (id int PRIMARY KEY, x int, y int GENERATED ALWAYS AS (x * 111) VIRTUAL); CREATE PUBLICATION testpub8 FOR TABLE testpub_rf_tbl7 WHERE (y > 100); ALTER TABLE testpub_rf_tbl7 ALTER COLUMN y SET EXPRESSION AS (x * testpub_rf_func2()); -ERROR: ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns on tables that are part of a publication +ERROR: ALTER TABLE / SET EXPRESSION is not supported for virtual generated columns in tables that are part of a publication DETAIL: Column "y" of relation "testpub_rf_tbl7" is a virtual generated column. RESET client_min_messages; DROP TABLE testpub_rf_tbl1; @@ -541,7 +548,7 @@ DROP TABLE testpub_rf_tbl2; DROP TABLE testpub_rf_tbl3; DROP TABLE testpub_rf_tbl4; DROP TABLE testpub_rf_tbl5; -DROP TABLE testpub_rf_tbl6; +--DROP TABLE testpub_rf_tbl6; DROP TABLE testpub_rf_schema1.testpub_rf_tbl5; DROP TABLE testpub_rf_schema2.testpub_rf_tbl6; DROP SCHEMA testpub_rf_schema1; diff --git a/src/test/regress/expected/regproc.out b/src/test/regress/expected/regproc.out index 97b917502ca..84c84aef420 100644 --- a/src/test/regress/expected/regproc.out +++ b/src/test/regress/expected/regproc.out @@ -192,6 +192,18 @@ SELECT regnamespace('"pg_catalog"'); pg_catalog (1 row) +SELECT regdatabase('template1'); + regdatabase +------------- + template1 +(1 row) + +SELECT regdatabase('"template1"'); + regdatabase +------------- + template1 +(1 row) + SELECT to_regrole('regress_regrole_test'); to_regrole ---------------------- @@ -216,6 +228,132 @@ SELECT to_regnamespace('"pg_catalog"'); pg_catalog (1 row) +SELECT to_regdatabase('template1'); + to_regdatabase +---------------- + template1 +(1 row) + +SELECT to_regdatabase('"template1"'); + to_regdatabase +---------------- + template1 +(1 row) + +-- special "single dash" case +SELECT regproc('-')::oid; + regproc +--------- + 0 +(1 row) + +SELECT regprocedure('-')::oid; + regprocedure +-------------- + 0 +(1 row) + +SELECT regclass('-')::oid; + regclass +---------- + 0 +(1 row) + +SELECT regcollation('-')::oid; + regcollation +-------------- + 0 +(1 row) + +SELECT regtype('-')::oid; + regtype +--------- + 0 +(1 row) + +SELECT regconfig('-')::oid; + regconfig +----------- + 0 +(1 row) + +SELECT regdictionary('-')::oid; + regdictionary +--------------- + 0 +(1 row) + +SELECT regrole('-')::oid; + regrole +--------- + 0 +(1 row) + +SELECT regnamespace('-')::oid; + regnamespace +-------------- + 0 +(1 row) + +SELECT regdatabase('-')::oid; + regdatabase +------------- + 0 +(1 row) + +SELECT to_regproc('-')::oid; + to_regproc +------------ + 0 +(1 row) + +SELECT to_regprocedure('-')::oid; + to_regprocedure +----------------- + 0 +(1 row) + +SELECT to_regclass('-')::oid; + to_regclass +------------- + 0 +(1 row) + +SELECT to_regcollation('-')::oid; + to_regcollation +----------------- + 0 +(1 row) + +SELECT to_regtype('-')::oid; + to_regtype +------------ + 0 +(1 row) + +SELECT to_regrole('-')::oid; + to_regrole +------------ + 0 +(1 row) + +SELECT to_regnamespace('-')::oid; + to_regnamespace +----------------- + 0 +(1 row) + +SELECT to_regdatabase('-')::oid; + to_regdatabase +---------------- + 0 +(1 row) + +-- constant cannot be used here +CREATE TABLE regrole_test (rolid OID DEFAULT 'regress_regrole_test'::regrole); +ERROR: constant of the type regrole cannot be used here +CREATE TABLE regdatabase_test (datid OID DEFAULT 'template1'::regdatabase); +ERROR: constant of the type regdatabase cannot be used here /* If objects don't exist, raise errors. */ DROP ROLE regress_regrole_test; -- without schemaname @@ -305,6 +443,18 @@ SELECT regnamespace('foo.bar'); ERROR: invalid name syntax LINE 1: SELECT regnamespace('foo.bar'); ^ +SELECT regdatabase('Nonexistent'); +ERROR: database "nonexistent" does not exist +LINE 1: SELECT regdatabase('Nonexistent'); + ^ +SELECT regdatabase('"Nonexistent"'); +ERROR: database "Nonexistent" does not exist +LINE 1: SELECT regdatabase('"Nonexistent"'); + ^ +SELECT regdatabase('foo.bar'); +ERROR: invalid name syntax +LINE 1: SELECT regdatabase('foo.bar'); + ^ /* If objects don't exist, return NULL with no error. */ -- without schemaname SELECT to_regoper('||//'); @@ -447,6 +597,24 @@ SELECT to_regnamespace('foo.bar'); (1 row) +SELECT to_regdatabase('Nonexistent'); + to_regdatabase +---------------- + +(1 row) + +SELECT to_regdatabase('"Nonexistent"'); + to_regdatabase +---------------- + +(1 row) + +SELECT to_regdatabase('foo.bar'); + to_regdatabase +---------------- + +(1 row) + -- Test to_regtypemod SELECT to_regtypemod('text'); to_regtypemod @@ -569,6 +737,12 @@ SELECT * FROM pg_input_error_info('no_such_type', 'regtype'); type "no_such_type" does not exist | | | 42704 (1 row) +SELECT * FROM pg_input_error_info('Nonexistent', 'regdatabase'); + message | detail | hint | sql_error_code +---------------------------------------+--------+------+---------------- + database "nonexistent" does not exist | | | 42704 +(1 row) + -- Some cases that should be soft errors, but are not yet SELECT * FROM pg_input_error_info('incorrect type name syntax', 'regtype'); ERROR: syntax error at or near "type" diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 174f0a68331..788844abd20 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -614,6 +614,73 @@ SELECT 'abcdefg' SIMILAR TO '_bcd%' ESCAPE NULL AS null; SELECT 'abcdefg' SIMILAR TO '_bcd#%' ESCAPE '##' AS error; ERROR: invalid escape string HINT: Escape string must be empty or one character. +-- Characters that should be left alone in character classes when a +-- SIMILAR TO regexp pattern is converted to POSIX style. +-- Underscore "_" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '_[_[:alpha:]_]_'; + QUERY PLAN +------------------------------------------------ + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:.[_[:alpha:]_].)$'::text) +(2 rows) + +-- Percentage "%" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '%[%[:alnum:]%]%'; + QUERY PLAN +-------------------------------------------------- + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:.*[%[:alnum:]%].*)$'::text) +(2 rows) + +-- Dot "." +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '.[.[:alnum:].].'; + QUERY PLAN +-------------------------------------------------- + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:\.[.[:alnum:].]\.)$'::text) +(2 rows) + +-- Dollar "$" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '$[$[:alnum:]$]$'; + QUERY PLAN +-------------------------------------------------- + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:\$[$[:alnum:]$]\$)$'::text) +(2 rows) + +-- Opening parenthesis "(" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '()[([:alnum:](]()'; + QUERY PLAN +------------------------------------------------------ + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:(?:)[([:alnum:](](?:))$'::text) +(2 rows) + +-- Caret "^" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '^[^[:alnum:]^[^^][[^^]][\^][[\^]]\^]^'; + QUERY PLAN +------------------------------------------------------------------------ + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:\^[^[:alnum:]^[^^][[^^]][\^][[\^]]\^]\^)$'::text) +(2 rows) + +-- Closing square bracket "]" at the beginning of character class +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[]%][^]%][^%]%'; + QUERY PLAN +------------------------------------------------ + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:[]%][^]%][^%].*)$'::text) +(2 rows) + +-- Closing square bracket effective after two carets at the beginning +-- of character class. +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[^^]^'; + QUERY PLAN +--------------------------------------- + Seq Scan on text_tbl + Filter: (f1 ~ '^(?:[^^]\^)$'::text) +(2 rows) + -- Test backslash escapes in regexp_replace's replacement string SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3'); regexp_replace diff --git a/src/test/regress/expected/subscription.out b/src/test/regress/expected/subscription.out index 1443e1d9292..529b2241731 100644 --- a/src/test/regress/expected/subscription.out +++ b/src/test/regress/expected/subscription.out @@ -116,18 +116,18 @@ CREATE SUBSCRIPTION regress_testsub4 CONNECTION 'dbname=regress_doesnotexist' PU WARNING: subscription was created, but is not connected HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and refresh the subscription. \dRs+ regress_testsub4 - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN -------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+---------- - regress_testsub4 | regress_subscription_user | f | {testpub} | f | parallel | d | f | none | t | f | f | off | dbname=regress_doesnotexist | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ + regress_testsub4 | regress_subscription_user | f | {testpub} | f | parallel | d | f | none | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub4 SET (origin = any); \dRs+ regress_testsub4 - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN -------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+---------- - regress_testsub4 | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ + regress_testsub4 | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) DROP SUBSCRIPTION regress_testsub3; @@ -145,10 +145,10 @@ ALTER SUBSCRIPTION regress_testsub CONNECTION 'foobar'; ERROR: invalid connection string syntax: missing "=" after "foobar" in connection info string \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+---------- - regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET PUBLICATION testpub2, testpub3 WITH (refresh = false); @@ -157,10 +157,10 @@ ALTER SUBSCRIPTION regress_testsub SET (slot_name = 'newname'); ALTER SUBSCRIPTION regress_testsub SET (password_required = false); ALTER SUBSCRIPTION regress_testsub SET (run_as_owner = true); \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------------+---------- - regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | f | t | f | off | dbname=regress_doesnotexist2 | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | f | t | f | off | dbname=regress_doesnotexist2 | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET (password_required = true); @@ -176,10 +176,10 @@ ERROR: unrecognized subscription parameter: "create_slot" -- ok ALTER SUBSCRIPTION regress_testsub SKIP (lsn = '0/12345'); \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------------+---------- - regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist2 | 0/12345 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist2 | 0/00012345 (1 row) -- ok - with lsn = NONE @@ -188,10 +188,10 @@ ALTER SUBSCRIPTION regress_testsub SKIP (lsn = NONE); ALTER SUBSCRIPTION regress_testsub SKIP (lsn = '0/0'); ERROR: invalid WAL location (LSN): 0/0 \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------------+---------- - regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist2 | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist2 | 0/00000000 (1 row) BEGIN; @@ -223,10 +223,10 @@ ALTER SUBSCRIPTION regress_testsub_foo SET (synchronous_commit = foobar); ERROR: invalid value for parameter "synchronous_commit": "foobar" HINT: Available values: local, remote_write, remote_apply, on, off. \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ----------------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------------+---------- - regress_testsub_foo | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | local | dbname=regress_doesnotexist2 | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +---------------------+---------------------------+---------+---------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+------------------------------+------------ + regress_testsub_foo | regress_subscription_user | f | {testpub2,testpub3} | f | parallel | d | f | any | t | f | f | local | dbname=regress_doesnotexist2 | 0/00000000 (1 row) -- rename back to keep the rest simple @@ -255,19 +255,19 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB WARNING: subscription was created, but is not connected HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and refresh the subscription. \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+---------- - regress_testsub | regress_subscription_user | f | {testpub} | t | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | t | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET (binary = false); ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+---------- - regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) DROP SUBSCRIPTION regress_testsub; @@ -279,27 +279,27 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB WARNING: subscription was created, but is not connected HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and refresh the subscription. \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+---------- - regress_testsub | regress_subscription_user | f | {testpub} | f | on | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | on | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET (streaming = parallel); \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+---------- - regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET (streaming = false); ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+---------- - regress_testsub | regress_subscription_user | f | {testpub} | f | off | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | off | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) -- fail - publication already exists @@ -314,10 +314,10 @@ ALTER SUBSCRIPTION regress_testsub ADD PUBLICATION testpub1, testpub2 WITH (refr ALTER SUBSCRIPTION regress_testsub ADD PUBLICATION testpub1, testpub2 WITH (refresh = false); ERROR: publication "testpub1" is already in subscription "regress_testsub" \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-----------------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+---------- - regress_testsub | regress_subscription_user | f | {testpub,testpub1,testpub2} | f | off | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-----------------------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub,testpub1,testpub2} | f | off | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) -- fail - publication used more than once @@ -332,10 +332,10 @@ ERROR: publication "testpub3" is not in subscription "regress_testsub" -- ok - delete publications ALTER SUBSCRIPTION regress_testsub DROP PUBLICATION testpub1, testpub2 WITH (refresh = false); \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+---------- - regress_testsub | regress_subscription_user | f | {testpub} | f | off | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | off | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) DROP SUBSCRIPTION regress_testsub; @@ -371,19 +371,19 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB WARNING: subscription was created, but is not connected HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and refresh the subscription. \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+---------- - regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | p | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | p | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) -- we can alter streaming when two_phase enabled ALTER SUBSCRIPTION regress_testsub SET (streaming = true); \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+---------- - regress_testsub | regress_subscription_user | f | {testpub} | f | on | p | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | on | p | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); @@ -393,10 +393,10 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB WARNING: subscription was created, but is not connected HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and refresh the subscription. \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+---------- - regress_testsub | regress_subscription_user | f | {testpub} | f | on | p | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | on | p | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); @@ -409,18 +409,18 @@ CREATE SUBSCRIPTION regress_testsub CONNECTION 'dbname=regress_doesnotexist' PUB WARNING: subscription was created, but is not connected HINT: To initiate replication, you must manually create the replication slot, enable the subscription, and refresh the subscription. \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+---------- - regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | f | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET (disable_on_error = true); \dRs+ - List of subscriptions - Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN ------------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+---------- - regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | t | any | t | f | f | off | dbname=regress_doesnotexist | 0/0 + List of subscriptions + Name | Owner | Enabled | Publication | Binary | Streaming | Two-phase commit | Disable on error | Origin | Password required | Run as owner? | Failover | Synchronous commit | Conninfo | Skip LSN +-----------------+---------------------------+---------+-------------+--------+-----------+------------------+------------------+--------+-------------------+---------------+----------+--------------------+-----------------------------+------------ + regress_testsub | regress_subscription_user | f | {testpub} | f | parallel | d | t | any | t | f | f | off | dbname=regress_doesnotexist | 0/00000000 (1 row) ALTER SUBSCRIPTION regress_testsub SET (slot_name = NONE); diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 40d8056fcea..18fed63e738 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -2127,30 +2127,30 @@ explain (verbose, costs off) select ss2.* from int8_tbl t1 left join (int8_tbl t2 left join - (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join + (select coalesce(q1, q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1) on t1.q2 = ss2.q1 order by 1, 2, 3; - QUERY PLAN ----------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------- Sort - Output: (COALESCE(t3.q1)), t4.q1, t4.q2 - Sort Key: (COALESCE(t3.q1)), t4.q1, t4.q2 + Output: (COALESCE(t3.q1, t3.q1)), t4.q1, t4.q2 + Sort Key: (COALESCE(t3.q1, t3.q1)), t4.q1, t4.q2 -> Hash Right Join - Output: (COALESCE(t3.q1)), t4.q1, t4.q2 + Output: (COALESCE(t3.q1, t3.q1)), t4.q1, t4.q2 Hash Cond: (t4.q1 = t1.q2) -> Hash Join - Output: (COALESCE(t3.q1)), t4.q1, t4.q2 + Output: (COALESCE(t3.q1, t3.q1)), t4.q1, t4.q2 Hash Cond: (t2.q2 = t4.q1) -> Hash Left Join - Output: t2.q2, (COALESCE(t3.q1)) + Output: t2.q2, (COALESCE(t3.q1, t3.q1)) Hash Cond: (t2.q1 = t3.q2) -> Seq Scan on public.int8_tbl t2 Output: t2.q1, t2.q2 -> Hash - Output: t3.q2, (COALESCE(t3.q1)) + Output: t3.q2, (COALESCE(t3.q1, t3.q1)) -> Seq Scan on public.int8_tbl t3 - Output: t3.q2, COALESCE(t3.q1) + Output: t3.q2, COALESCE(t3.q1, t3.q1) -> Hash Output: t4.q1, t4.q2 -> Seq Scan on public.int8_tbl t4 @@ -2164,7 +2164,7 @@ order by 1, 2, 3; select ss2.* from int8_tbl t1 left join (int8_tbl t2 left join - (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join + (select coalesce(q1, q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1) on t1.q2 = ss2.q1 order by 1, 2, 3; @@ -2201,32 +2201,32 @@ explain (verbose, costs off) select ss2.* from int8_tbl t1 left join (int8_tbl t2 left join - (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join + (select coalesce(q1, q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1) on t1.q2 = ss2.q1 order by 1, 2, 3; - QUERY PLAN ----------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------- Sort - Output: ((COALESCE(t3.q1))), t4.q1, t4.q2 - Sort Key: ((COALESCE(t3.q1))), t4.q1, t4.q2 + Output: ((COALESCE(t3.q1, t3.q1))), t4.q1, t4.q2 + Sort Key: ((COALESCE(t3.q1, t3.q1))), t4.q1, t4.q2 -> Hash Right Join - Output: ((COALESCE(t3.q1))), t4.q1, t4.q2 + Output: ((COALESCE(t3.q1, t3.q1))), t4.q1, t4.q2 Hash Cond: (t4.q1 = t1.q2) -> Nested Loop - Output: t4.q1, t4.q2, ((COALESCE(t3.q1))) + Output: t4.q1, t4.q2, ((COALESCE(t3.q1, t3.q1))) Join Filter: (t2.q2 = t4.q1) -> Hash Left Join - Output: t2.q2, (COALESCE(t3.q1)) + Output: t2.q2, (COALESCE(t3.q1, t3.q1)) Hash Cond: (t2.q1 = t3.q2) -> Seq Scan on public.int8_tbl t2 Output: t2.q1, t2.q2 -> Hash - Output: t3.q2, (COALESCE(t3.q1)) + Output: t3.q2, (COALESCE(t3.q1, t3.q1)) -> Seq Scan on public.int8_tbl t3 - Output: t3.q2, COALESCE(t3.q1) + Output: t3.q2, COALESCE(t3.q1, t3.q1) -> Seq Scan on public.int8_tbl t4 - Output: t4.q1, t4.q2, (COALESCE(t3.q1)) + Output: t4.q1, t4.q2, (COALESCE(t3.q1, t3.q1)) -> Hash Output: t1.q2 -> Seq Scan on public.int8_tbl t1 @@ -2236,7 +2236,7 @@ order by 1, 2, 3; select ss2.* from int8_tbl t1 left join (int8_tbl t2 left join - (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join + (select coalesce(q1, q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1) on t1.q2 = ss2.q1 order by 1, 2, 3; diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index ae17d028ed3..83228cfca29 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -232,22 +232,3 @@ select * from pg_timezone_abbrevs where abbrev = 'LMT'; LMT | @ 7 hours 52 mins 58 secs ago | f (1 row) -DO $$ -DECLARE - bg_writer_pid int; - r RECORD; -BEGIN - SELECT pid from pg_stat_activity where backend_type='background writer' - INTO bg_writer_pid; - - select type, name, ident - from pg_get_process_memory_contexts(bg_writer_pid, false, 20) - where path = '{1}' into r; - RAISE NOTICE '%', r; - select type, name, ident - from pg_get_process_memory_contexts(pg_backend_pid(), false, 20) - where path = '{1}' into r; - RAISE NOTICE '%', r; -END $$; -NOTICE: (AllocSet,TopMemoryContext,) -NOTICE: (AllocSet,TopMemoryContext,) diff --git a/src/test/regress/expected/triggers.out b/src/test/regress/expected/triggers.out index f245d7f1549..872b9100e1a 100644 --- a/src/test/regress/expected/triggers.out +++ b/src/test/regress/expected/triggers.out @@ -2280,6 +2280,27 @@ select * from parted; drop table parted; drop function parted_trigfunc(); -- +-- Constraint triggers +-- +create constraint trigger crtr + after insert on foo not valid + for each row execute procedure foo (); +ERROR: constraint triggers cannot be marked NOT VALID +LINE 2: after insert on foo not valid + ^ +create constraint trigger crtr + after insert on foo no inherit + for each row execute procedure foo (); +ERROR: constraint triggers cannot be marked NO INHERIT +LINE 2: after insert on foo no inherit + ^ +create constraint trigger crtr + after insert on foo not enforced + for each row execute procedure foo (); +ERROR: constraint triggers cannot be marked NOT ENFORCED +LINE 2: after insert on foo not enforced + ^ +-- -- Constraint triggers and partitioned tables create table parted_constr_ancestor (a int, b text) partition by range (b); @@ -2294,7 +2315,7 @@ create constraint trigger parted_trig after insert on parted_constr_ancestor deferrable for each row execute procedure trigger_notice_ab(); create constraint trigger parted_trig_two after insert on parted_constr - deferrable initially deferred + deferrable initially deferred enforced for each row when (bark(new.b) AND new.a % 2 = 1) execute procedure trigger_notice_ab(); -- The immediate constraint is fired immediately; the WHEN clause of the @@ -3535,8 +3556,8 @@ drop table parent, child; drop function f(); -- Test who runs deferred trigger functions -- setup -create role regress_groot; -create role regress_outis; +create role regress_caller; +create role regress_fn_owner; create function whoami() returns trigger language plpgsql as $$ begin @@ -3544,7 +3565,7 @@ begin return null; end; $$; -alter function whoami() owner to regress_outis; +alter function whoami() owner to regress_fn_owner; create table defer_trig (id integer); grant insert on defer_trig to public; create constraint trigger whoami after insert on defer_trig @@ -3553,23 +3574,23 @@ create constraint trigger whoami after insert on defer_trig execute function whoami(); -- deferred triggers must run as the user that queued the trigger begin; -set role regress_groot; +set role regress_caller; insert into defer_trig values (1); reset role; -set role regress_outis; +set role regress_fn_owner; insert into defer_trig values (2); reset role; commit; -NOTICE: I am regress_groot -NOTICE: I am regress_outis +NOTICE: I am regress_caller +NOTICE: I am regress_fn_owner -- security definer functions override the user who queued the trigger alter function whoami() security definer; begin; -set role regress_groot; +set role regress_caller; insert into defer_trig values (3); reset role; commit; -NOTICE: I am regress_outis +NOTICE: I am regress_fn_owner alter function whoami() security invoker; -- make sure the current user is restored after error create or replace function whoami() returns trigger language plpgsql @@ -3581,11 +3602,11 @@ begin end; $$; begin; -set role regress_groot; +set role regress_caller; insert into defer_trig values (4); reset role; commit; -- error expected -NOTICE: I am regress_groot +NOTICE: I am regress_caller ERROR: division by zero CONTEXT: SQL statement "SELECT 1 / 0" PL/pgSQL function whoami() line 4 at PERFORM @@ -3598,5 +3619,5 @@ select current_user = session_user; -- clean up drop table defer_trig; drop function whoami(); -drop role regress_outis; -drop role regress_groot; +drop role regress_fn_owner; +drop role regress_caller; diff --git a/src/test/regress/expected/type_sanity.out b/src/test/regress/expected/type_sanity.out index dd0c52ab08b..943e56506bf 100644 --- a/src/test/regress/expected/type_sanity.out +++ b/src/test/regress/expected/type_sanity.out @@ -711,6 +711,7 @@ CREATE TABLE tab_core_types AS SELECT 'regtype'::regtype type, 'pg_monitor'::regrole, 'pg_class'::regclass::oid, + 'template1'::regdatabase, '(1,1)'::tid, '2'::xid, '3'::cid, '10:20:10,14,15'::txid_snapshot, '10:20:10,14,15'::pg_snapshot, diff --git a/src/test/regress/expected/without_overlaps.out b/src/test/regress/expected/without_overlaps.out index ea607bed0a4..f3144bdc39c 100644 --- a/src/test/regress/expected/without_overlaps.out +++ b/src/test/regress/expected/without_overlaps.out @@ -1426,7 +1426,7 @@ CREATE TABLE temporal_fk_rng2rng ( CONSTRAINT temporal_fk_rng2rng_fk FOREIGN KEY (parent_id, valid_at) REFERENCES temporal_rng (id, valid_at) ); -ERROR: foreign key must use PERIOD when referencing a primary using WITHOUT OVERLAPS +ERROR: foreign key must use PERIOD when referencing a primary key using WITHOUT OVERLAPS -- (parent_id, valid_at) REFERENCES (id, PERIOD valid_at) -- FOREIGN KEY part should specify PERIOD CREATE TABLE temporal_fk_rng2rng ( @@ -1900,7 +1900,7 @@ CREATE TABLE temporal_fk_mltrng2mltrng ( CONSTRAINT temporal_fk_mltrng2mltrng_fk FOREIGN KEY (parent_id, valid_at) REFERENCES temporal_mltrng (id, valid_at) ); -ERROR: foreign key must use PERIOD when referencing a primary using WITHOUT OVERLAPS +ERROR: foreign key must use PERIOD when referencing a primary key using WITHOUT OVERLAPS -- (parent_id, valid_at) REFERENCES (id, PERIOD valid_at) -- FOREIGN KEY part should specify PERIOD CREATE TABLE temporal_fk_mltrng2mltrng ( diff --git a/src/test/regress/sql/alter_table.sql b/src/test/regress/sql/alter_table.sql index 5ce9d1e429f..fc6e36d0e78 100644 --- a/src/test/regress/sql/alter_table.sql +++ b/src/test/regress/sql/alter_table.sql @@ -3069,6 +3069,23 @@ drop table attbl, atref; /* End test case for bug #17409 */ +/* Test case for bug #18970 */ + +create table attbl(a int); +create table atref(b attbl check ((b).a is not null)); +alter table attbl alter column a type numeric; -- someday this should work +alter table atref drop constraint atref_b_check; + +create statistics atref_stat on ((b).a is not null) from atref; +alter table attbl alter column a type numeric; -- someday this should work +drop statistics atref_stat; + +create index atref_idx on atref (((b).a)); +alter table attbl alter column a type numeric; -- someday this should work +drop table attbl, atref; + +/* End test case for bug #18970 */ + -- Test that ALTER TABLE rewrite preserves a clustered index -- for normal indexes and indexes on constraints. create table alttype_cluster (a int); diff --git a/src/test/regress/sql/btree_index.sql b/src/test/regress/sql/btree_index.sql index 68c61dbc7d1..6aaaa386abc 100644 --- a/src/test/regress/sql/btree_index.sql +++ b/src/test/regress/sql/btree_index.sql @@ -143,38 +143,83 @@ SELECT proname, proargtypes, pronamespace ORDER BY proname DESC, proargtypes DESC, pronamespace DESC LIMIT 1; -- --- Add coverage for RowCompare quals whose rhs row has a NULL that ends scan +-- Forwards scan RowCompare qual whose row arg has a NULL that affects our +-- initial positioning strategy -- explain (costs off) SELECT proname, proargtypes, pronamespace FROM pg_proc - WHERE proname = 'abs' AND (proname, proargtypes) < ('abs', NULL) + WHERE (proname, proargtypes) >= ('abs', NULL) AND proname <= 'abs' ORDER BY proname, proargtypes, pronamespace; SELECT proname, proargtypes, pronamespace FROM pg_proc - WHERE proname = 'abs' AND (proname, proargtypes) < ('abs', NULL) + WHERE (proname, proargtypes) >= ('abs', NULL) AND proname <= 'abs' ORDER BY proname, proargtypes, pronamespace; -- --- Add coverage for backwards scan RowCompare quals whose rhs row has a NULL --- that ends scan +-- Forwards scan RowCompare quals whose row arg has a NULL that ends scan -- explain (costs off) SELECT proname, proargtypes, pronamespace FROM pg_proc - WHERE proname = 'abs' AND (proname, proargtypes) > ('abs', NULL) + WHERE proname >= 'abs' AND (proname, proargtypes) < ('abs', NULL) +ORDER BY proname, proargtypes, pronamespace; + +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE proname >= 'abs' AND (proname, proargtypes) < ('abs', NULL) +ORDER BY proname, proargtypes, pronamespace; + +-- +-- Backwards scan RowCompare qual whose row arg has a NULL that affects our +-- initial positioning strategy +-- +explain (costs off) +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE proname >= 'abs' AND (proname, proargtypes) <= ('abs', NULL) ORDER BY proname DESC, proargtypes DESC, pronamespace DESC; SELECT proname, proargtypes, pronamespace FROM pg_proc - WHERE proname = 'abs' AND (proname, proargtypes) > ('abs', NULL) + WHERE proname >= 'abs' AND (proname, proargtypes) <= ('abs', NULL) ORDER BY proname DESC, proargtypes DESC, pronamespace DESC; -- --- Add coverage for recheck of > key following array advancement on previous --- (left sibling) page that used a high key whose attribute value corresponding --- to the > key was -inf (due to being truncated when the high key was created). +-- Backwards scan RowCompare qual whose row arg has a NULL that ends scan +-- +explain (costs off) +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE (proname, proargtypes) > ('abs', NULL) AND proname <= 'abs' +ORDER BY proname DESC, proargtypes DESC, pronamespace DESC; + +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE (proname, proargtypes) > ('abs', NULL) AND proname <= 'abs' +ORDER BY proname DESC, proargtypes DESC, pronamespace DESC; + +-- Makes B-Tree preprocessing deal with unmarking redundant keys that were +-- initially marked required (test case relies on current row compare +-- preprocessing limitations) +explain (costs off) +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE proname = 'zzzzzz' AND (proname, proargtypes) > ('abs', NULL) + AND pronamespace IN (1, 2, 3) AND proargtypes IN ('26 23', '5077') +ORDER BY proname, proargtypes, pronamespace; + +SELECT proname, proargtypes, pronamespace + FROM pg_proc + WHERE proname = 'zzzzzz' AND (proname, proargtypes) > ('abs', NULL) + AND pronamespace IN (1, 2, 3) AND proargtypes IN ('26 23', '5077') +ORDER BY proname, proargtypes, pronamespace; + +-- +-- Performs a recheck of > key following array advancement on previous (left +-- sibling) page that used a high key whose attribute value corresponding to +-- the > key was -inf (due to being truncated when the high key was created). -- -- XXX This relies on the assumption that tenk1_thous_tenthous has a truncated -- high key "(183, -inf)" on the first page that we'll scan. The test will only diff --git a/src/test/regress/sql/constraints.sql b/src/test/regress/sql/constraints.sql index 337baab7ced..7487723ab84 100644 --- a/src/test/regress/sql/constraints.sql +++ b/src/test/regress/sql/constraints.sql @@ -537,6 +537,9 @@ CREATE TABLE UNIQUE_NOTEN_TBL(i int UNIQUE NOT ENFORCED); ALTER TABLE unique_tbl ALTER CONSTRAINT unique_tbl_i_key ENFORCED; ALTER TABLE unique_tbl ALTER CONSTRAINT unique_tbl_i_key NOT ENFORCED; +-- can't make an existing constraint NOT VALID +ALTER TABLE unique_tbl ALTER CONSTRAINT unique_tbl_i_key NOT VALID; + DROP TABLE unique_tbl; -- @@ -997,6 +1000,9 @@ create table constr_parent3 (a int not null); create table constr_child3 () inherits (constr_parent2, constr_parent3); EXECUTE get_nnconstraint_info('{constr_parent3, constr_child3}'); +COMMENT ON CONSTRAINT constr_parent2_a_not_null ON constr_parent2 IS 'this constraint is invalid'; +COMMENT ON CONSTRAINT constr_parent2_a_not_null ON constr_child2 IS 'this constraint is valid'; + DEALLOCATE get_nnconstraint_info; -- end NOT NULL NOT VALID diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql index f0b88a23db8..a1316c73bac 100644 --- a/src/test/regress/sql/copy.sql +++ b/src/test/regress/sql/copy.sql @@ -94,6 +94,36 @@ this is just a line full of junk that would error out if parsed copy copytest4 to stdout (header); +-- test multi-line header line feature + +create temp table copytest5 (c1 int); + +copy copytest5 from stdin (format csv, header 2); +this is a first header line. +this is a second header line. +1 +2 +\. +copy copytest5 to stdout (header); + +truncate copytest5; +copy copytest5 from stdin (format csv, header 4); +this is a first header line. +this is a second header line. +1 +2 +\. +select count(*) from copytest5; + +truncate copytest5; +copy copytest5 from stdin (format csv, header 5); +this is a first header line. +this is a second header line. +1 +2 +\. +select count(*) from copytest5; + -- test copy from with a partitioned table create table parted_copytest ( a int, diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index 45273557ce0..cef45868db5 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -90,6 +90,9 @@ COPY x to stdout (format BINARY, on_error unsupported); COPY x from stdin (log_verbosity unsupported); COPY x from stdin with (reject_limit 1); COPY x from stdin with (on_error ignore, reject_limit 0); +COPY x from stdin with (header -1); +COPY x from stdin with (header 2.5); +COPY x to stdout with (header 2); -- too many columns in column list: should fail COPY x (a, b, c, d, e, d, c) from stdin; diff --git a/src/test/regress/sql/create_table_like.sql b/src/test/regress/sql/create_table_like.sql index 6e21722aaeb..bf8702116a7 100644 --- a/src/test/regress/sql/create_table_like.sql +++ b/src/test/regress/sql/create_table_like.sql @@ -143,9 +143,10 @@ COMMENT ON INDEX ctlt1_pkey IS 'index pkey'; COMMENT ON INDEX ctlt1_b_key IS 'index b_key'; ALTER TABLE ctlt1 ALTER COLUMN a SET STORAGE MAIN; -CREATE TABLE ctlt2 (c text); +CREATE TABLE ctlt2 (c text NOT NULL); ALTER TABLE ctlt2 ALTER COLUMN c SET STORAGE EXTERNAL; COMMENT ON COLUMN ctlt2.c IS 'C'; +COMMENT ON CONSTRAINT ctlt2_c_not_null ON ctlt2 IS 't2_c_not_null'; CREATE TABLE ctlt3 (a text CHECK (length(a) < 5), c text CHECK (length(c) < 7)); ALTER TABLE ctlt3 ALTER COLUMN c SET STORAGE EXTERNAL; @@ -162,6 +163,7 @@ CREATE TABLE ctlt12_storage (LIKE ctlt1 INCLUDING STORAGE, LIKE ctlt2 INCLUDING \d+ ctlt12_storage CREATE TABLE ctlt12_comments (LIKE ctlt1 INCLUDING COMMENTS, LIKE ctlt2 INCLUDING COMMENTS); \d+ ctlt12_comments +SELECT conname, description FROM pg_description, pg_constraint c WHERE classoid = 'pg_constraint'::regclass AND objoid = c.oid AND c.conrelid = 'ctlt12_comments'::regclass; CREATE TABLE ctlt1_inh (LIKE ctlt1 INCLUDING CONSTRAINTS INCLUDING COMMENTS) INHERITS (ctlt1); \d+ ctlt1_inh SELECT description FROM pg_description, pg_constraint c WHERE classoid = 'pg_constraint'::regclass AND objoid = c.oid AND c.conrelid = 'ctlt1_inh'::regclass; @@ -197,9 +199,19 @@ DROP TABLE ctlt1, ctlt2, ctlt3, ctlt4, ctlt12_storage, ctlt12_comments, ctlt1_in -- LIKE must respect NO INHERIT property of constraints CREATE TABLE noinh_con_copy (a int CHECK (a > 0) NO INHERIT, b int not null, c int not null no inherit); -CREATE TABLE noinh_con_copy1 (LIKE noinh_con_copy INCLUDING CONSTRAINTS); + +COMMENT ON CONSTRAINT noinh_con_copy_b_not_null ON noinh_con_copy IS 'not null b'; +COMMENT ON CONSTRAINT noinh_con_copy_c_not_null ON noinh_con_copy IS 'not null c no inherit'; + +CREATE TABLE noinh_con_copy1 (LIKE noinh_con_copy INCLUDING CONSTRAINTS INCLUDING COMMENTS); \d+ noinh_con_copy1 +SELECT conname, description +FROM pg_description, pg_constraint c +WHERE classoid = 'pg_constraint'::regclass +AND objoid = c.oid AND c.conrelid = 'noinh_con_copy1'::regclass +ORDER BY conname COLLATE "C"; + -- fail, as partitioned tables don't allow NO INHERIT constraints CREATE TABLE noinh_con_copy1_parted (LIKE noinh_con_copy INCLUDING ALL) PARTITION BY LIST (a); diff --git a/src/test/regress/sql/domain.sql b/src/test/regress/sql/domain.sql index b752a63ab5f..b8f5a639712 100644 --- a/src/test/regress/sql/domain.sql +++ b/src/test/regress/sql/domain.sql @@ -602,6 +602,9 @@ insert into domain_test values (1, 2); -- should fail alter table domain_test add column c str_domain; +-- disallow duplicated not-null constraints +create domain int_domain1 as int constraint nn1 not null constraint nn2 not null; + create domain str_domain2 as text check (value <> 'foo') default 'foo'; -- should fail diff --git a/src/test/regress/sql/foreign_key.sql b/src/test/regress/sql/foreign_key.sql index 8159e363022..cfcecb4e911 100644 --- a/src/test/regress/sql/foreign_key.sql +++ b/src/test/regress/sql/foreign_key.sql @@ -1389,22 +1389,44 @@ WHERE conrelid::regclass::text like 'fk_partitioned_fk%' ORDER BY oid::regclass: DROP TABLE fk_partitioned_fk, fk_notpartitioned_pk; --- NOT VALID foreign key on a non-partitioned table referencing a partitioned table +-- NOT VALID and NOT ENFORCED foreign key on a non-partitioned table +-- referencing a partitioned table CREATE TABLE fk_partitioned_pk (a int, b int, PRIMARY KEY (a, b)) PARTITION BY RANGE (a, b); CREATE TABLE fk_partitioned_pk_1 PARTITION OF fk_partitioned_pk FOR VALUES FROM (0,0) TO (1000,1000); +CREATE TABLE fk_partitioned_pk_2 PARTITION OF fk_partitioned_pk FOR VALUES FROM (1000,1000) TO (2000,2000); CREATE TABLE fk_notpartitioned_fk (b int, a int); -ALTER TABLE fk_notpartitioned_fk ADD FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT VALID; - --- Constraint will be invalid. -SELECT conname, convalidated FROM pg_constraint +INSERT INTO fk_partitioned_pk VALUES(100,100), (1000,1000); +INSERT INTO fk_notpartitioned_fk VALUES(100,100), (1000,1000); +ALTER TABLE fk_notpartitioned_fk ADD CONSTRAINT fk_notpartitioned_fk_a_b_fkey + FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT VALID; +ALTER TABLE fk_notpartitioned_fk ADD CONSTRAINT fk_notpartitioned_fk_a_b_fkey2 + FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT ENFORCED; + +-- All constraints will be invalid, and _fkey2 constraints will not be enforced. +SELECT conname, conenforced, convalidated FROM pg_constraint WHERE conrelid = 'fk_notpartitioned_fk'::regclass ORDER BY oid::regclass::text; ALTER TABLE fk_notpartitioned_fk VALIDATE CONSTRAINT fk_notpartitioned_fk_a_b_fkey; +ALTER TABLE fk_notpartitioned_fk ALTER CONSTRAINT fk_notpartitioned_fk_a_b_fkey2 ENFORCED; --- All constraints are now valid. -SELECT conname, convalidated FROM pg_constraint +-- All constraints are now valid and enforced. +SELECT conname, conenforced, convalidated FROM pg_constraint WHERE conrelid = 'fk_notpartitioned_fk'::regclass ORDER BY oid::regclass::text; +-- test a self-referential FK +ALTER TABLE fk_partitioned_pk ADD CONSTRAINT selffk FOREIGN KEY (a, b) REFERENCES fk_partitioned_pk NOT VALID; +CREATE TABLE fk_partitioned_pk_3 PARTITION OF fk_partitioned_pk FOR VALUES FROM (2000,2000) TO (3000,3000) + PARTITION BY RANGE (a); +CREATE TABLE fk_partitioned_pk_3_1 PARTITION OF fk_partitioned_pk_3 FOR VALUES FROM (2000) TO (2100); +SELECT conname, conenforced, convalidated FROM pg_constraint +WHERE conrelid = 'fk_partitioned_pk'::regclass AND contype = 'f' +ORDER BY oid::regclass::text; +ALTER TABLE fk_partitioned_pk_2 VALIDATE CONSTRAINT selffk; +ALTER TABLE fk_partitioned_pk VALIDATE CONSTRAINT selffk; +SELECT conname, conenforced, convalidated FROM pg_constraint +WHERE conrelid = 'fk_partitioned_pk'::regclass AND contype = 'f' +ORDER BY oid::regclass::text; + DROP TABLE fk_notpartitioned_fk, fk_partitioned_pk; -- Test some other exotic foreign key features: MATCH SIMPLE, ON UPDATE/DELETE diff --git a/src/test/regress/sql/generated_stored.sql b/src/test/regress/sql/generated_stored.sql index 4ec155f2da9..f56fde8d4e5 100644 --- a/src/test/regress/sql/generated_stored.sql +++ b/src/test/regress/sql/generated_stored.sql @@ -595,6 +595,19 @@ ALTER TABLE gtest30_1 ALTER COLUMN b DROP EXPRESSION; -- error CREATE TABLE gtest31_1 (a int, b text GENERATED ALWAYS AS ('hello') STORED, c text); CREATE TABLE gtest31_2 (x int, y gtest31_1); ALTER TABLE gtest31_1 ALTER COLUMN b TYPE varchar; -- fails + +-- bug #18970: these cases are unsupported, but make sure they fail cleanly +ALTER TABLE gtest31_2 ADD CONSTRAINT cc CHECK ((y).b IS NOT NULL); +ALTER TABLE gtest31_1 ALTER COLUMN b SET EXPRESSION AS ('hello1'); +ALTER TABLE gtest31_2 DROP CONSTRAINT cc; + +CREATE STATISTICS gtest31_2_stat ON ((y).b is not null) FROM gtest31_2; +ALTER TABLE gtest31_1 ALTER COLUMN b SET EXPRESSION AS ('hello2'); +DROP STATISTICS gtest31_2_stat; + +CREATE INDEX gtest31_2_y_idx ON gtest31_2(((y).b)); +ALTER TABLE gtest31_1 ALTER COLUMN b SET EXPRESSION AS ('hello3'); + DROP TABLE gtest31_1, gtest31_2; -- Check it for a partitioned table, too diff --git a/src/test/regress/sql/generated_virtual.sql b/src/test/regress/sql/generated_virtual.sql index b4eedeee2fb..e2b31853e01 100644 --- a/src/test/regress/sql/generated_virtual.sql +++ b/src/test/regress/sql/generated_virtual.sql @@ -253,10 +253,10 @@ CREATE TABLE gtest4 ( a int, b double_int GENERATED ALWAYS AS ((a * 2, a * 3)) VIRTUAL ); -INSERT INTO gtest4 VALUES (1), (6); -SELECT * FROM gtest4; +--INSERT INTO gtest4 VALUES (1), (6); +--SELECT * FROM gtest4; -DROP TABLE gtest4; +--DROP TABLE gtest4; DROP TYPE double_int; -- using tableoid is allowed @@ -290,20 +290,21 @@ GRANT SELECT (a, c) ON gtest11 TO regress_user11; CREATE FUNCTION gf1(a int) RETURNS int AS $$ SELECT a * 3 $$ IMMUTABLE LANGUAGE SQL; REVOKE ALL ON FUNCTION gf1(int) FROM PUBLIC; -CREATE TABLE gtest12 (a int PRIMARY KEY, b int, c int GENERATED ALWAYS AS (gf1(b)) VIRTUAL); -INSERT INTO gtest12 VALUES (1, 10), (2, 20); -GRANT SELECT (a, c), INSERT ON gtest12 TO regress_user11; +CREATE TABLE gtest12 (a int PRIMARY KEY, b int, c int GENERATED ALWAYS AS (gf1(b)) VIRTUAL); -- fails, user-defined function +--INSERT INTO gtest12 VALUES (1, 10), (2, 20); +--GRANT SELECT (a, c), INSERT ON gtest12 TO regress_user11; SET ROLE regress_user11; SELECT a, b FROM gtest11; -- not allowed SELECT a, c FROM gtest11; -- allowed SELECT gf1(10); -- not allowed -INSERT INTO gtest12 VALUES (3, 30), (4, 40); -- allowed (does not actually invoke the function) -SELECT a, c FROM gtest12; -- currently not allowed because of function permissions, should arguably be allowed +--INSERT INTO gtest12 VALUES (3, 30), (4, 40); -- allowed (does not actually invoke the function) +--SELECT a, c FROM gtest12; -- currently not allowed because of function permissions, should arguably be allowed RESET ROLE; -DROP FUNCTION gf1(int); -- fail -DROP TABLE gtest11, gtest12; +--DROP FUNCTION gf1(int); -- fail +DROP TABLE gtest11; +--DROP TABLE gtest12; DROP FUNCTION gf1(int); DROP USER regress_user11; @@ -453,11 +454,19 @@ CREATE TABLE gtest24r (a int PRIMARY KEY, b gtestdomain1range GENERATED ALWAYS A --INSERT INTO gtest24r (a) VALUES (4); -- ok --INSERT INTO gtest24r (a) VALUES (6); -- error +CREATE TABLE gtest24at (a int PRIMARY KEY); +ALTER TABLE gtest24at ADD COLUMN b gtestdomain1 GENERATED ALWAYS AS (a * 2) VIRTUAL; -- error +CREATE TABLE gtest24ata (a int PRIMARY KEY, b int GENERATED ALWAYS AS (a * 2) VIRTUAL); +ALTER TABLE gtest24ata ALTER COLUMN b TYPE gtestdomain1; -- error + CREATE DOMAIN gtestdomainnn AS int CHECK (VALUE IS NOT NULL); CREATE TABLE gtest24nn (a int, b gtestdomainnn GENERATED ALWAYS AS (a * 2) VIRTUAL); --INSERT INTO gtest24nn (a) VALUES (4); -- ok --INSERT INTO gtest24nn (a) VALUES (NULL); -- error +-- using user-defined type not yet supported +CREATE TABLE gtest24xxx (a gtestdomain1, b gtestdomain1, c int GENERATED ALWAYS AS (greatest(a, b)) VIRTUAL); -- error + -- typed tables (currently not supported) CREATE TYPE gtest_type AS (f1 integer, f2 text, f3 bigint); CREATE TABLE gtest28 OF gtest_type (f1 WITH OPTIONS GENERATED ALWAYS AS (f2 *2) VIRTUAL); @@ -637,6 +646,19 @@ ALTER TABLE gtest30_1 ALTER COLUMN b DROP EXPRESSION; -- error CREATE TABLE gtest31_1 (a int, b text GENERATED ALWAYS AS ('hello') VIRTUAL, c text); CREATE TABLE gtest31_2 (x int, y gtest31_1); ALTER TABLE gtest31_1 ALTER COLUMN b TYPE varchar; -- fails + +-- bug #18970 +ALTER TABLE gtest31_2 ADD CONSTRAINT cc CHECK ((y).b IS NOT NULL); +ALTER TABLE gtest31_1 ALTER COLUMN b SET EXPRESSION AS ('hello1'); +ALTER TABLE gtest31_2 DROP CONSTRAINT cc; + +CREATE STATISTICS gtest31_2_stat ON ((y).b is not null) FROM gtest31_2; +ALTER TABLE gtest31_1 ALTER COLUMN b SET EXPRESSION AS ('hello2'); +DROP STATISTICS gtest31_2_stat; + +CREATE INDEX gtest31_2_y_idx ON gtest31_2(((y).b)); +ALTER TABLE gtest31_1 ALTER COLUMN b SET EXPRESSION AS ('hello3'); + DROP TABLE gtest31_1, gtest31_2; -- Check it for a partitioned table, too @@ -788,7 +810,8 @@ create table gtest32 ( a int primary key, b int generated always as (a * 2), c int generated always as (10 + 10), - d int generated always as (coalesce(a, 100)) + d int generated always as (coalesce(a, 100)), + e int ); insert into gtest32 values (1), (2); @@ -829,7 +852,10 @@ select t2.* from gtest32 t1 left join gtest32 t2 on false; select t2.* from gtest32 t1 left join gtest32 t2 on false; explain (verbose, costs off) -select * from gtest32 t group by grouping sets (a, b, c, d) having c = 20; -select * from gtest32 t group by grouping sets (a, b, c, d) having c = 20; +select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20; +select * from gtest32 t group by grouping sets (a, b, c, d, e) having c = 20; + +-- Ensure that the virtual generated columns in ALTER COLUMN TYPE USING expression are expanded +alter table gtest32 alter column e type bigint using b; drop table gtest32; diff --git a/src/test/regress/sql/horology.sql b/src/test/regress/sql/horology.sql index 1310b432773..8978249a5dc 100644 --- a/src/test/regress/sql/horology.sql +++ b/src/test/regress/sql/horology.sql @@ -102,6 +102,10 @@ SELECT date 'J J 1520447'; SELECT timestamp with time zone 'Y2001M12D27H04M05S06.789+08'; SELECT timestamp with time zone 'Y2001M12D27H04MM05S06.789-08'; +-- More examples we used to accept and should not +SELECT timestamp with time zone 'J2452271 T X03456-08'; +SELECT timestamp with time zone 'J2452271 T X03456.001e6-08'; + -- conflicting fields should throw errors SELECT date '1995-08-06 epoch'; SELECT date '1995-08-06 infinity'; diff --git a/src/test/regress/sql/incremental_sort.sql b/src/test/regress/sql/incremental_sort.sql index f1f8fae5654..bbe658a7588 100644 --- a/src/test/regress/sql/incremental_sort.sql +++ b/src/test/regress/sql/incremental_sort.sql @@ -298,3 +298,27 @@ explain (costs off) select * from (select * from tenk1 order by four) t1 join tenk1 t2 on t1.four = t2.four and t1.two = t2.two order by t1.four, t1.two limit 1; + +-- +-- Test incremental sort for Append/MergeAppend +-- +create table prt_tbl (a int, b int) partition by range (a); +create table prt_tbl_1 partition of prt_tbl for values from (0) to (100); +create table prt_tbl_2 partition of prt_tbl for values from (100) to (200); +insert into prt_tbl select i%200, i from generate_series(1,1000)i; +create index on prt_tbl_1(a); +create index on prt_tbl_2(a, b); +analyze prt_tbl; + +set enable_seqscan to off; +set enable_bitmapscan to off; + +-- Ensure we get an incremental sort for the subpath of Append +explain (costs off) select * from prt_tbl order by a, b; + +-- Ensure we get an incremental sort for the subpath of MergeAppend +explain (costs off) select * from prt_tbl_1 union all select * from prt_tbl_2 order by a, b; + +reset enable_bitmapscan; +reset enable_seqscan; +drop table prt_tbl; diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index cc5128add4d..5f0a475894d 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -1277,6 +1277,23 @@ where t1.unique2 < 42 and t1.stringu1 > t2.stringu2; -- variant that isn't quite a star-schema case +explain (verbose, costs off) +select ss1.d1 from + tenk1 as t1 + inner join tenk1 as t2 + on t1.tenthous = t2.ten + inner join + int8_tbl as i8 + left join int4_tbl as i4 + inner join (select 64::information_schema.cardinal_number as d1 + from tenk1 t3, + lateral (select abs(t3.unique1) + random()) ss0(x) + where t3.fivethous < 0) as ss1 + on i4.f1 = ss1.d1 + on i8.q1 = i4.f1 + on t1.tenthous = ss1.d1 +where t1.unique1 < i4.f1; + select ss1.d1 from tenk1 as t1 inner join tenk1 as t2 @@ -1332,6 +1349,64 @@ select * from (select 1 as x) ss1 left join (select 2 as y) ss2 on (true), lateral (select ss2.y as z limit 1) ss3; +-- This example demonstrates the folly of our old "have_dangerous_phv" logic +begin; +set local from_collapse_limit to 2; +explain (verbose, costs off) +select * from int8_tbl t1 + left join + (select coalesce(t2.q1 + x, 0) from int8_tbl t2, + lateral (select t3.q1 as x from int8_tbl t3, + lateral (select t2.q1, t3.q1 offset 0) s)) + on true; +rollback; + +-- ... not that the initial replacement didn't have some bugs too +begin; +create temp table t(i int primary key); + +explain (verbose, costs off) +select * from t t1 + left join (select 1 as x, * from t t2(i2)) t2ss on t1.i = t2ss.i2 + left join t t3(i3) on false + left join t t4(i4) on t4.i4 > t2ss.x; + +explain (verbose, costs off) +select * from + (select k from + (select i, coalesce(i, j) as k from + (select i from t union all select 0) + join (select 1 as j limit 1) on i = j) + right join (select 2 as x) on true + join (select 3 as y) on i is not null + ), + lateral (select k as kl limit 1); + +rollback; + +-- PHVs containing SubLinks are quite tricky to get right +explain (verbose, costs off) +select * +from int8_tbl i8 + inner join + (select (select true) as x + from int4_tbl i4, lateral (select i4.f1 as y limit 1) ss1 + where i4.f1 = 0) ss2 on true + right join (select false as z) ss3 on true, + lateral (select i8.q2 as q2l where x limit 1) ss4 +where i8.q2 = 123; + +explain (verbose, costs off) +select * +from int8_tbl i8 + inner join + (select (select true) as x + from int4_tbl i4, lateral (select 1 as y limit 1) ss1 + where i4.f1 = 0) ss2 on true + right join (select false as z) ss3 on true, + lateral (select i8.q2 as q2l where x limit 1) ss4 +where i8.q2 = 123; + -- Test proper handling of appendrel PHVs during useless-RTE removal explain (costs off) select * from @@ -1902,13 +1977,13 @@ select * from (select 1 as id) as xx left join (tenk1 as a1 full join (select 1 as id) as yy on (a1.unique1 = yy.id)) - on (xx.id = coalesce(yy.id)); + on (xx.id = coalesce(yy.id, yy.id)); select * from (select 1 as id) as xx left join (tenk1 as a1 full join (select 1 as id) as yy on (a1.unique1 = yy.id)) - on (xx.id = coalesce(yy.id)); + on (xx.id = coalesce(yy.id, yy.id)); -- -- test ability to push constants through outer join clauses @@ -3094,9 +3169,9 @@ select * from int4_tbl i left join lateral (select * from int2_tbl j where i.f1 = j.f1) k on true; explain (verbose, costs off) select * from int4_tbl i left join - lateral (select coalesce(i) from int2_tbl j where i.f1 = j.f1) k on true; + lateral (select coalesce(i, i) from int2_tbl j where i.f1 = j.f1) k on true; select * from int4_tbl i left join - lateral (select coalesce(i) from int2_tbl j where i.f1 = j.f1) k on true; + lateral (select coalesce(i, i) from int2_tbl j where i.f1 = j.f1) k on true; explain (verbose, costs off) select * from int4_tbl a, lateral ( @@ -3562,7 +3637,7 @@ ANALYZE group_tbl; EXPLAIN (COSTS OFF) SELECT 1 FROM group_tbl t1 - LEFT JOIN (SELECT a c1, COALESCE(a) c2 FROM group_tbl t2) s ON TRUE + LEFT JOIN (SELECT a c1, COALESCE(a, a) c2 FROM group_tbl t2) s ON TRUE GROUP BY s.c1, s.c2; DROP TABLE group_tbl; diff --git a/src/test/regress/sql/memoize.sql b/src/test/regress/sql/memoize.sql index c0d47fa875a..8d1cdd6990c 100644 --- a/src/test/regress/sql/memoize.sql +++ b/src/test/regress/sql/memoize.sql @@ -26,6 +26,7 @@ begin ln := regexp_replace(ln, 'Heap Fetches: \d+', 'Heap Fetches: N'); ln := regexp_replace(ln, 'loops=\d+', 'loops=N'); ln := regexp_replace(ln, 'Index Searches: \d+', 'Index Searches: N'); + ln := regexp_replace(ln, 'Memory: \d+kB', 'Memory: NkB'); return next ln; end loop; end; @@ -244,3 +245,29 @@ RESET max_parallel_workers_per_gather; RESET parallel_tuple_cost; RESET parallel_setup_cost; RESET min_parallel_table_scan_size; + +-- Ensure memoize works for ANTI joins +CREATE TABLE tab_anti (a int, b boolean); +INSERT INTO tab_anti SELECT i%3, false FROM generate_series(1,100)i; +ANALYZE tab_anti; + +-- Ensure we get a Memoize plan for ANTI join +SELECT explain_memoize(' +SELECT COUNT(*) FROM tab_anti t1 LEFT JOIN +LATERAL (SELECT DISTINCT ON (a) a, b, t1.a AS x FROM tab_anti t2) t2 +ON t1.a+1 = t2.a +WHERE t2.a IS NULL;', false); + +-- And check we get the expected results. +SELECT COUNT(*) FROM tab_anti t1 LEFT JOIN +LATERAL (SELECT DISTINCT ON (a) a, b, t1.a AS x FROM tab_anti t2) t2 +ON t1.a+1 = t2.a +WHERE t2.a IS NULL; + +-- Ensure we do not add memoize node for SEMI join +EXPLAIN (COSTS OFF) +SELECT * FROM tab_anti t1 WHERE t1.a IN + (SELECT a FROM tab_anti t2 WHERE t2.b IN + (SELECT t1.b FROM tab_anti t3 WHERE t2.a > 1 OFFSET 0)); + +DROP TABLE tab_anti; diff --git a/src/test/regress/sql/merge.sql b/src/test/regress/sql/merge.sql index f7a19c0e7dd..2660b19f238 100644 --- a/src/test/regress/sql/merge.sql +++ b/src/test/regress/sql/merge.sql @@ -1722,6 +1722,55 @@ WHEN MATCHED THEN DELETE; SELECT * FROM new_measurement ORDER BY city_id, logdate; +-- MERGE into inheritance root table +DROP TRIGGER insert_measurement_trigger ON measurement; +ALTER TABLE measurement ADD CONSTRAINT mcheck CHECK (city_id = 0) NO INHERIT; + +EXPLAIN (COSTS OFF) +MERGE INTO measurement m + USING (VALUES (1, '01-17-2007'::date)) nm(city_id, logdate) ON + (m.city_id = nm.city_id and m.logdate=nm.logdate) +WHEN NOT MATCHED THEN INSERT + (city_id, logdate, peaktemp, unitsales) + VALUES (city_id - 1, logdate, 25, 100); + +BEGIN; +MERGE INTO measurement m + USING (VALUES (1, '01-17-2007'::date)) nm(city_id, logdate) ON + (m.city_id = nm.city_id and m.logdate=nm.logdate) +WHEN NOT MATCHED THEN INSERT + (city_id, logdate, peaktemp, unitsales) + VALUES (city_id - 1, logdate, 25, 100); +SELECT * FROM ONLY measurement ORDER BY city_id, logdate; +ROLLBACK; + +ALTER TABLE measurement ENABLE ROW LEVEL SECURITY; +ALTER TABLE measurement FORCE ROW LEVEL SECURITY; +CREATE POLICY measurement_p ON measurement USING (peaktemp IS NOT NULL); + +MERGE INTO measurement m + USING (VALUES (1, '01-17-2007'::date)) nm(city_id, logdate) ON + (m.city_id = nm.city_id and m.logdate=nm.logdate) +WHEN NOT MATCHED THEN INSERT + (city_id, logdate, peaktemp, unitsales) + VALUES (city_id - 1, logdate, NULL, 100); -- should fail + +MERGE INTO measurement m + USING (VALUES (1, '01-17-2007'::date)) nm(city_id, logdate) ON + (m.city_id = nm.city_id and m.logdate=nm.logdate) +WHEN NOT MATCHED THEN INSERT + (city_id, logdate, peaktemp, unitsales) + VALUES (city_id - 1, logdate, 25, 100); -- ok +SELECT * FROM ONLY measurement ORDER BY city_id, logdate; + +MERGE INTO measurement m + USING (VALUES (1, '01-18-2007'::date)) nm(city_id, logdate) ON + (m.city_id = nm.city_id and m.logdate=nm.logdate) +WHEN NOT MATCHED THEN INSERT + (city_id, logdate, peaktemp, unitsales) + VALUES (city_id - 1, logdate, 25, 200) +RETURNING merge_action(), m.*; + DROP TABLE measurement, new_measurement CASCADE; DROP FUNCTION measurement_insert_trigger(); diff --git a/src/test/regress/sql/misc_functions.sql b/src/test/regress/sql/misc_functions.sql index 5f9c77512d1..23792c4132a 100644 --- a/src/test/regress/sql/misc_functions.sql +++ b/src/test/regress/sql/misc_functions.sql @@ -400,9 +400,9 @@ SELECT pg_column_toast_chunk_id(a) IS NULL, DROP TABLE test_chunk_id; DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); --- test stratnum support functions -SELECT gist_stratnum_common(7); -SELECT gist_stratnum_common(3); +-- test stratnum translation support functions +SELECT gist_translate_cmptype_common(7); +SELECT gist_translate_cmptype_common(3); -- relpath tests diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql index b98ae27df56..640c6d92f4c 100644 --- a/src/test/regress/sql/numeric.sql +++ b/src/test/regress/sql/numeric.sql @@ -869,6 +869,8 @@ SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, 0); SELECT width_bucket(5.0::float8, 3.0::float8, 4.0::float8, -5); SELECT width_bucket(3.5::float8, 3.0::float8, 3.0::float8, 888); SELECT width_bucket('NaN', 3.0, 4.0, 888); +SELECT width_bucket('NaN'::float8, 3.0::float8, 4.0::float8, 888); +SELECT width_bucket(0, 'NaN', 4.0, 888); SELECT width_bucket(0::float8, 'NaN', 4.0::float8, 888); SELECT width_bucket(2.0, 3.0, '-inf', 888); SELECT width_bucket(0::float8, '-inf', 4.0::float8, 888); diff --git a/src/test/regress/sql/psql.sql b/src/test/regress/sql/psql.sql index 1a8a83462f0..e2e31245439 100644 --- a/src/test/regress/sql/psql.sql +++ b/src/test/regress/sql/psql.sql @@ -68,11 +68,11 @@ SELECT $1, $2 \parse stmt3 -- Multiple \g calls mean multiple executions \bind_named stmt2 'foo3' \g \bind_named stmt3 'foo4' 'bar4' \g --- \close (extended query protocol) -\close -\close '' -\close stmt2 -\close stmt2 +-- \close_prepared (extended query protocol) +\close_prepared +\close_prepared '' +\close_prepared stmt2 +\close_prepared stmt2 SELECT name, statement FROM pg_prepared_statements ORDER BY name; -- \bind (extended query protocol) @@ -1035,7 +1035,7 @@ select \if false \\ (bogus \else \\ 42 \endif \\ forty_two; \C arg1 \c arg1 arg2 arg3 arg4 \cd arg1 - \close stmt1 + \close_prepared stmt1 \conninfo \copy arg1 arg2 arg3 arg4 arg5 arg6 \copyright diff --git a/src/test/regress/sql/psql_pipeline.sql b/src/test/regress/sql/psql_pipeline.sql index 16e1e1e84cd..6788dceee2e 100644 --- a/src/test/regress/sql/psql_pipeline.sql +++ b/src/test/regress/sql/psql_pipeline.sql @@ -105,106 +105,6 @@ INSERT INTO psql_pipeline VALUES ($1) \bind 1 \sendpipeline COMMIT \bind \sendpipeline \endpipeline --- COPY FROM STDIN --- with \sendpipeline and \bind -\startpipeline -SELECT $1 \bind 'val1' \sendpipeline -COPY psql_pipeline FROM STDIN \bind \sendpipeline -\endpipeline -2 test2 -\. --- with semicolon -\startpipeline -SELECT 'val1'; -COPY psql_pipeline FROM STDIN; -\endpipeline -20 test2 -\. - --- COPY FROM STDIN with \flushrequest + \getresults --- with \sendpipeline and \bind -\startpipeline -SELECT $1 \bind 'val1' \sendpipeline -COPY psql_pipeline FROM STDIN \bind \sendpipeline -\flushrequest -\getresults -3 test3 -\. -\endpipeline --- with semicolon -\startpipeline -SELECT 'val1'; -COPY psql_pipeline FROM STDIN; -\flushrequest -\getresults -30 test3 -\. -\endpipeline - --- COPY FROM STDIN with \syncpipeline + \getresults --- with \bind and \sendpipeline -\startpipeline -SELECT $1 \bind 'val1' \sendpipeline -COPY psql_pipeline FROM STDIN \bind \sendpipeline -\syncpipeline -\getresults -4 test4 -\. -\endpipeline --- with semicolon -\startpipeline -SELECT 'val1'; -COPY psql_pipeline FROM STDIN; -\syncpipeline -\getresults -40 test4 -\. -\endpipeline - --- COPY TO STDOUT --- with \bind and \sendpipeline -\startpipeline -SELECT $1 \bind 'val1' \sendpipeline -copy psql_pipeline TO STDOUT \bind \sendpipeline -\endpipeline --- with semicolon -\startpipeline -SELECT 'val1'; -copy psql_pipeline TO STDOUT; -\endpipeline - --- COPY TO STDOUT with \flushrequest + \getresults --- with \bind and \sendpipeline -\startpipeline -SELECT $1 \bind 'val1' \sendpipeline -copy psql_pipeline TO STDOUT \bind \sendpipeline -\flushrequest -\getresults -\endpipeline --- with semicolon -\startpipeline -SELECT 'val1'; -copy psql_pipeline TO STDOUT; -\flushrequest -\getresults -\endpipeline - --- COPY TO STDOUT with \syncpipeline + \getresults --- with \bind and \sendpipeline -\startpipeline -SELECT $1 \bind 'val1' \sendpipeline -copy psql_pipeline TO STDOUT \bind \sendpipeline -\syncpipeline -\getresults -\endpipeline --- with semicolon -\startpipeline -SELECT 'val1'; -copy psql_pipeline TO STDOUT; -\syncpipeline -\getresults -\endpipeline - -- Use \parse and \bind_named \startpipeline SELECT $1 \parse '' @@ -406,21 +306,21 @@ SELECT $1 \bind \sendpipeline SELECT $1 \bind 1 \sendpipeline SELECT $1 \parse a \bind_named a 1 \sendpipeline -\close a +\close_prepared a \flushrequest \getresults -- Pipeline is aborted. SELECT $1 \bind 1 \sendpipeline SELECT $1 \parse a \bind_named a 1 \sendpipeline -\close a +\close_prepared a -- Sync allows pipeline to recover. \syncpipeline \getresults SELECT $1 \bind 1 \sendpipeline SELECT $1 \parse a \bind_named a 1 \sendpipeline -\close a +\close_prepared a \flushrequest \getresults \endpipeline diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql index 68001de4000..c9e309190df 100644 --- a/src/test/regress/sql/publication.sql +++ b/src/test/regress/sql/publication.sql @@ -262,6 +262,9 @@ ALTER PUBLICATION testpub6 SET TABLES IN SCHEMA testpub_rf_schema2, TABLE testpu RESET client_min_messages; \dRp+ testpub6 -- fail - virtual generated column uses user-defined function +-- (Actually, this already fails at CREATE TABLE rather than at CREATE +-- PUBLICATION, but let's keep the test in case the former gets +-- relaxed sometime.) CREATE TABLE testpub_rf_tbl6 (id int PRIMARY KEY, x int, y int GENERATED ALWAYS AS (x * testpub_rf_func2()) VIRTUAL); CREATE PUBLICATION testpub7 FOR TABLE testpub_rf_tbl6 WHERE (y > 100); -- test that SET EXPRESSION is rejected, because it could affect a row filter @@ -276,7 +279,7 @@ DROP TABLE testpub_rf_tbl2; DROP TABLE testpub_rf_tbl3; DROP TABLE testpub_rf_tbl4; DROP TABLE testpub_rf_tbl5; -DROP TABLE testpub_rf_tbl6; +--DROP TABLE testpub_rf_tbl6; DROP TABLE testpub_rf_schema1.testpub_rf_tbl5; DROP TABLE testpub_rf_schema2.testpub_rf_tbl6; DROP SCHEMA testpub_rf_schema1; diff --git a/src/test/regress/sql/regproc.sql b/src/test/regress/sql/regproc.sql index 232289ac398..cfec8f8c754 100644 --- a/src/test/regress/sql/regproc.sql +++ b/src/test/regress/sql/regproc.sql @@ -47,11 +47,42 @@ SELECT regrole('regress_regrole_test'); SELECT regrole('"regress_regrole_test"'); SELECT regnamespace('pg_catalog'); SELECT regnamespace('"pg_catalog"'); +SELECT regdatabase('template1'); +SELECT regdatabase('"template1"'); SELECT to_regrole('regress_regrole_test'); SELECT to_regrole('"regress_regrole_test"'); SELECT to_regnamespace('pg_catalog'); SELECT to_regnamespace('"pg_catalog"'); +SELECT to_regdatabase('template1'); +SELECT to_regdatabase('"template1"'); + +-- special "single dash" case + +SELECT regproc('-')::oid; +SELECT regprocedure('-')::oid; +SELECT regclass('-')::oid; +SELECT regcollation('-')::oid; +SELECT regtype('-')::oid; +SELECT regconfig('-')::oid; +SELECT regdictionary('-')::oid; +SELECT regrole('-')::oid; +SELECT regnamespace('-')::oid; +SELECT regdatabase('-')::oid; + +SELECT to_regproc('-')::oid; +SELECT to_regprocedure('-')::oid; +SELECT to_regclass('-')::oid; +SELECT to_regcollation('-')::oid; +SELECT to_regtype('-')::oid; +SELECT to_regrole('-')::oid; +SELECT to_regnamespace('-')::oid; +SELECT to_regdatabase('-')::oid; + +-- constant cannot be used here + +CREATE TABLE regrole_test (rolid OID DEFAULT 'regress_regrole_test'::regrole); +CREATE TABLE regdatabase_test (datid OID DEFAULT 'template1'::regdatabase); /* If objects don't exist, raise errors. */ @@ -88,6 +119,9 @@ SELECT regrole('foo.bar'); SELECT regnamespace('Nonexistent'); SELECT regnamespace('"Nonexistent"'); SELECT regnamespace('foo.bar'); +SELECT regdatabase('Nonexistent'); +SELECT regdatabase('"Nonexistent"'); +SELECT regdatabase('foo.bar'); /* If objects don't exist, return NULL with no error. */ @@ -122,6 +156,9 @@ SELECT to_regrole('foo.bar'); SELECT to_regnamespace('Nonexistent'); SELECT to_regnamespace('"Nonexistent"'); SELECT to_regnamespace('foo.bar'); +SELECT to_regdatabase('Nonexistent'); +SELECT to_regdatabase('"Nonexistent"'); +SELECT to_regdatabase('foo.bar'); -- Test to_regtypemod SELECT to_regtypemod('text'); @@ -147,6 +184,7 @@ SELECT * FROM pg_input_error_info('ng_catalog.abs(numeric)', 'regprocedure'); SELECT * FROM pg_input_error_info('ng_catalog.abs(numeric', 'regprocedure'); SELECT * FROM pg_input_error_info('regress_regrole_test', 'regrole'); SELECT * FROM pg_input_error_info('no_such_type', 'regtype'); +SELECT * FROM pg_input_error_info('Nonexistent', 'regdatabase'); -- Some cases that should be soft errors, but are not yet SELECT * FROM pg_input_error_info('incorrect type name syntax', 'regtype'); diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index f7b325baadf..2577a42987d 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -197,6 +197,26 @@ SELECT 'abcd\efg' SIMILAR TO '_bcd\%' ESCAPE '' AS true; SELECT 'abcdefg' SIMILAR TO '_bcd%' ESCAPE NULL AS null; SELECT 'abcdefg' SIMILAR TO '_bcd#%' ESCAPE '##' AS error; +-- Characters that should be left alone in character classes when a +-- SIMILAR TO regexp pattern is converted to POSIX style. +-- Underscore "_" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '_[_[:alpha:]_]_'; +-- Percentage "%" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '%[%[:alnum:]%]%'; +-- Dot "." +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '.[.[:alnum:].].'; +-- Dollar "$" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '$[$[:alnum:]$]$'; +-- Opening parenthesis "(" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '()[([:alnum:](]()'; +-- Caret "^" +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '^[^[:alnum:]^[^^][[^^]][\^][[\^]]\^]^'; +-- Closing square bracket "]" at the beginning of character class +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[]%][^]%][^%]%'; +-- Closing square bracket effective after two carets at the beginning +-- of character class. +EXPLAIN (COSTS OFF) SELECT * FROM TEXT_TBL WHERE f1 SIMILAR TO '[^^]^'; + -- Test backslash escapes in regexp_replace's replacement string SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3'); SELECT regexp_replace('foobarrbazz', E'(.)\\1', E'X\\&Y', 'g'); diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index fec38ef85a6..d9a841fbc9f 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -1041,7 +1041,7 @@ explain (verbose, costs off) select ss2.* from int8_tbl t1 left join (int8_tbl t2 left join - (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join + (select coalesce(q1, q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1) on t1.q2 = ss2.q1 order by 1, 2, 3; @@ -1049,7 +1049,7 @@ order by 1, 2, 3; select ss2.* from int8_tbl t1 left join (int8_tbl t2 left join - (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join + (select coalesce(q1, q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1) on t1.q2 = ss2.q1 order by 1, 2, 3; @@ -1059,7 +1059,7 @@ explain (verbose, costs off) select ss2.* from int8_tbl t1 left join (int8_tbl t2 left join - (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join + (select coalesce(q1, q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1) on t1.q2 = ss2.q1 order by 1, 2, 3; @@ -1067,7 +1067,7 @@ order by 1, 2, 3; select ss2.* from int8_tbl t1 left join (int8_tbl t2 left join - (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join + (select coalesce(q1, q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1) on t1.q2 = ss2.q1 order by 1, 2, 3; diff --git a/src/test/regress/sql/sysviews.sql b/src/test/regress/sql/sysviews.sql index d0917b6868e..66179f026b3 100644 --- a/src/test/regress/sql/sysviews.sql +++ b/src/test/regress/sql/sysviews.sql @@ -101,21 +101,3 @@ select count(distinct utc_offset) >= 24 as ok from pg_timezone_abbrevs; -- One specific case we can check without much fear of breakage -- is the historical local-mean-time value used for America/Los_Angeles. select * from pg_timezone_abbrevs where abbrev = 'LMT'; - -DO $$ -DECLARE - bg_writer_pid int; - r RECORD; -BEGIN - SELECT pid from pg_stat_activity where backend_type='background writer' - INTO bg_writer_pid; - - select type, name, ident - from pg_get_process_memory_contexts(bg_writer_pid, false, 20) - where path = '{1}' into r; - RAISE NOTICE '%', r; - select type, name, ident - from pg_get_process_memory_contexts(pg_backend_pid(), false, 20) - where path = '{1}' into r; - RAISE NOTICE '%', r; -END $$; diff --git a/src/test/regress/sql/triggers.sql b/src/test/regress/sql/triggers.sql index d3d242dd29b..d674b25c83b 100644 --- a/src/test/regress/sql/triggers.sql +++ b/src/test/regress/sql/triggers.sql @@ -1577,6 +1577,19 @@ drop table parted; drop function parted_trigfunc(); -- +-- Constraint triggers +-- +create constraint trigger crtr + after insert on foo not valid + for each row execute procedure foo (); +create constraint trigger crtr + after insert on foo no inherit + for each row execute procedure foo (); +create constraint trigger crtr + after insert on foo not enforced + for each row execute procedure foo (); + +-- -- Constraint triggers and partitioned tables create table parted_constr_ancestor (a int, b text) partition by range (b); @@ -1591,7 +1604,7 @@ create constraint trigger parted_trig after insert on parted_constr_ancestor deferrable for each row execute procedure trigger_notice_ab(); create constraint trigger parted_trig_two after insert on parted_constr - deferrable initially deferred + deferrable initially deferred enforced for each row when (bark(new.b) AND new.a % 2 = 1) execute procedure trigger_notice_ab(); @@ -2701,8 +2714,8 @@ drop function f(); -- Test who runs deferred trigger functions -- setup -create role regress_groot; -create role regress_outis; +create role regress_caller; +create role regress_fn_owner; create function whoami() returns trigger language plpgsql as $$ begin @@ -2710,7 +2723,7 @@ begin return null; end; $$; -alter function whoami() owner to regress_outis; +alter function whoami() owner to regress_fn_owner; create table defer_trig (id integer); grant insert on defer_trig to public; @@ -2721,10 +2734,10 @@ create constraint trigger whoami after insert on defer_trig -- deferred triggers must run as the user that queued the trigger begin; -set role regress_groot; +set role regress_caller; insert into defer_trig values (1); reset role; -set role regress_outis; +set role regress_fn_owner; insert into defer_trig values (2); reset role; commit; @@ -2732,7 +2745,7 @@ commit; -- security definer functions override the user who queued the trigger alter function whoami() security definer; begin; -set role regress_groot; +set role regress_caller; insert into defer_trig values (3); reset role; commit; @@ -2749,7 +2762,7 @@ end; $$; begin; -set role regress_groot; +set role regress_caller; insert into defer_trig values (4); reset role; commit; -- error expected @@ -2758,5 +2771,5 @@ select current_user = session_user; -- clean up drop table defer_trig; drop function whoami(); -drop role regress_outis; -drop role regress_groot; +drop role regress_fn_owner; +drop role regress_caller; diff --git a/src/test/regress/sql/type_sanity.sql b/src/test/regress/sql/type_sanity.sql index c94dd83d306..df795759bb4 100644 --- a/src/test/regress/sql/type_sanity.sql +++ b/src/test/regress/sql/type_sanity.sql @@ -539,6 +539,7 @@ CREATE TABLE tab_core_types AS SELECT 'regtype'::regtype type, 'pg_monitor'::regrole, 'pg_class'::regclass::oid, + 'template1'::regdatabase, '(1,1)'::tid, '2'::xid, '3'::cid, '10:20:10,14,15'::txid_snapshot, '10:20:10,14,15'::pg_snapshot, diff --git a/src/test/ssl/meson.build b/src/test/ssl/meson.build index cf8b2b9303a..d8e0fb518e0 100644 --- a/src/test/ssl/meson.build +++ b/src/test/ssl/meson.build @@ -7,7 +7,7 @@ tests += { 'tap': { 'env': { 'with_ssl': ssl_library, - 'OPENSSL': openssl.found() ? openssl.path() : '', + 'OPENSSL': openssl.found() ? openssl.full_path() : '', }, 'tests': [ 't/001_ssltests.pl', diff --git a/src/test/ssl/t/SSL/Server.pm b/src/test/ssl/t/SSL/Server.pm index 96f0f201e9c..efbd0dafaf6 100644 --- a/src/test/ssl/t/SSL/Server.pm +++ b/src/test/ssl/t/SSL/Server.pm @@ -318,7 +318,8 @@ sub switch_server_cert $node->append_conf('sslconfig.conf', "ssl=on"); $node->append_conf('sslconfig.conf', $backend->set_server_cert(\%params)); # use lists of ECDH curves and cipher suites for syntax testing - $node->append_conf('sslconfig.conf', 'ssl_groups=X25519:prime256v1:secp521r1'); + $node->append_conf('sslconfig.conf', + 'ssl_groups=X25519:prime256v1:secp521r1'); $node->append_conf('sslconfig.conf', 'ssl_tls13_ciphers=TLS_AES_256_GCM_SHA384:TLS_AES_128_GCM_SHA256'); diff --git a/src/test/subscription/t/007_ddl.pl b/src/test/subscription/t/007_ddl.pl index 7d12bcbddb6..2a45fb13739 100644 --- a/src/test/subscription/t/007_ddl.pl +++ b/src/test/subscription/t/007_ddl.pl @@ -70,7 +70,8 @@ ok( $stderr =~ ); # Cleanup -$node_publisher->safe_psql('postgres', qq[ +$node_publisher->safe_psql( + 'postgres', qq[ DROP PUBLICATION mypub; SELECT pg_drop_replication_slot('mysub'); ]); @@ -86,32 +87,38 @@ sub test_swap my ($table_name, $pubname, $appname) = @_; # Confirms tuples can be replicated - $node_publisher->safe_psql('postgres', "INSERT INTO $table_name VALUES (1);"); + $node_publisher->safe_psql('postgres', + "INSERT INTO $table_name VALUES (1);"); $node_publisher->wait_for_catchup($appname); my $result = - $node_subscriber->safe_psql('postgres', "SELECT a FROM $table_name"); - is($result, qq(1), 'check replication worked well before renaming a publication'); + $node_subscriber->safe_psql('postgres', "SELECT a FROM $table_name"); + is($result, qq(1), + 'check replication worked well before renaming a publication'); # Swap the name of publications; $pubname <-> pub_empty - $node_publisher->safe_psql('postgres', qq[ + $node_publisher->safe_psql( + 'postgres', qq[ ALTER PUBLICATION $pubname RENAME TO tap_pub_tmp; ALTER PUBLICATION pub_empty RENAME TO $pubname; ALTER PUBLICATION tap_pub_tmp RENAME TO pub_empty; ]); # Insert the data again - $node_publisher->safe_psql('postgres', "INSERT INTO $table_name VALUES (2);"); + $node_publisher->safe_psql('postgres', + "INSERT INTO $table_name VALUES (2);"); $node_publisher->wait_for_catchup($appname); # Confirms the second tuple won't be replicated because $pubname does not # contains relations anymore. $result = - $node_subscriber->safe_psql('postgres', "SELECT a FROM $table_name ORDER BY a"); + $node_subscriber->safe_psql('postgres', + "SELECT a FROM $table_name ORDER BY a"); is($result, qq(1), 'check the tuple inserted after the RENAME was not replicated'); # Restore the name of publications because it can be called several times - $node_publisher->safe_psql('postgres', qq[ + $node_publisher->safe_psql( + 'postgres', qq[ ALTER PUBLICATION $pubname RENAME TO tap_pub_tmp; ALTER PUBLICATION pub_empty RENAME TO $pubname; ALTER PUBLICATION tap_pub_tmp RENAME TO pub_empty; @@ -124,7 +131,8 @@ $node_publisher->safe_psql('postgres', $ddl); $node_subscriber->safe_psql('postgres', $ddl); # Create publications and a subscription -$node_publisher->safe_psql('postgres', qq[ +$node_publisher->safe_psql( + 'postgres', qq[ CREATE PUBLICATION pub_empty; CREATE PUBLICATION pub_for_tab FOR TABLE test1; CREATE PUBLICATION pub_for_all_tables FOR ALL TABLES; @@ -139,19 +147,20 @@ test_swap('test1', 'pub_for_tab', 'tap_sub'); # Switches a publication which includes all tables $node_subscriber->safe_psql('postgres', - "ALTER SUBSCRIPTION tap_sub SET PUBLICATION pub_for_all_tables;" -); + "ALTER SUBSCRIPTION tap_sub SET PUBLICATION pub_for_all_tables;"); $node_subscriber->wait_for_subscription_sync($node_publisher, 'tap_sub'); # Confirms RENAME command works well for ALL TABLES publication test_swap('test2', 'pub_for_all_tables', 'tap_sub'); # Cleanup -$node_publisher->safe_psql('postgres', qq[ +$node_publisher->safe_psql( + 'postgres', qq[ DROP PUBLICATION pub_empty, pub_for_tab, pub_for_all_tables; DROP TABLE test1, test2; ]); -$node_subscriber->safe_psql('postgres', qq[ +$node_subscriber->safe_psql( + 'postgres', qq[ DROP SUBSCRIPTION tap_sub; DROP TABLE test1, test2; ]); diff --git a/src/test/subscription/t/013_partition.pl b/src/test/subscription/t/013_partition.pl index 61b0cb4aa1a..4f78dd48815 100644 --- a/src/test/subscription/t/013_partition.pl +++ b/src/test/subscription/t/013_partition.pl @@ -51,8 +51,7 @@ $node_subscriber1->safe_psql('postgres', ); # make a BRIN index to test aminsertcleanup logic in subscriber $node_subscriber1->safe_psql('postgres', - "CREATE INDEX tab1_c_brin_idx ON tab1 USING brin (c)" -); + "CREATE INDEX tab1_c_brin_idx ON tab1 USING brin (c)"); $node_subscriber1->safe_psql('postgres', "CREATE TABLE tab1_1 (b text, c text DEFAULT 'sub1_tab1', a int NOT NULL)" ); diff --git a/src/test/subscription/t/021_twophase.pl b/src/test/subscription/t/021_twophase.pl index 61c427aed21..b8e4242d1f1 100644 --- a/src/test/subscription/t/021_twophase.pl +++ b/src/test/subscription/t/021_twophase.pl @@ -373,7 +373,14 @@ $result = $node_publisher->safe_psql('postgres', "SELECT count(*) FROM tab_copy;"); is($result, qq(6), 'publisher inserted data'); +# Wait for both subscribers to catchup $node_publisher->wait_for_catchup($appname_copy); +$node_publisher->wait_for_catchup($appname); + +# Make sure there are no prepared transactions on the subscriber +$result = $node_subscriber->safe_psql('postgres', + "SELECT count(*) FROM pg_prepared_xacts;"); +is($result, qq(0), 'should be no prepared transactions on subscriber'); $result = $node_subscriber->safe_psql('postgres', "SELECT count(*) FROM tab_copy;"); diff --git a/src/test/subscription/t/024_add_drop_pub.pl b/src/test/subscription/t/024_add_drop_pub.pl index e995d8b3839..b396abe5599 100644 --- a/src/test/subscription/t/024_add_drop_pub.pl +++ b/src/test/subscription/t/024_add_drop_pub.pl @@ -108,11 +108,12 @@ $node_publisher->poll_query_until('postgres', my $offset = -s $node_publisher->logfile; -$node_publisher->safe_psql('postgres',"INSERT INTO tab_3 values(1)"); +$node_publisher->safe_psql('postgres', "INSERT INTO tab_3 values(1)"); # Verify that a warning is logged. $node_publisher->wait_for_log( - qr/WARNING: ( [A-Z0-9]+:)? skipped loading publication: tap_pub_3/, $offset); + qr/WARNING: ( [A-Z0-9]+:)? skipped loading publication "tap_pub_3"/, + $offset); $node_publisher->safe_psql('postgres', "CREATE PUBLICATION tap_pub_3 FOR TABLE tab_3"); @@ -128,10 +129,11 @@ $node_publisher->wait_for_catchup('tap_sub'); # Verify that the insert operation gets replicated to subscriber after # publication is created. -$result = $node_subscriber->safe_psql('postgres', - "SELECT * FROM tab_3"); -is($result, qq(1 -2), 'check that the incremental data is replicated after the publication is created'); +$result = $node_subscriber->safe_psql('postgres', "SELECT * FROM tab_3"); +is( $result, qq(1 +2), + 'check that the incremental data is replicated after the publication is created' +); # shutdown $node_subscriber->stop('fast'); diff --git a/src/test/subscription/t/035_conflicts.pl b/src/test/subscription/t/035_conflicts.pl index 2a7a8239a29..d78a6bac16a 100644 --- a/src/test/subscription/t/035_conflicts.pl +++ b/src/test/subscription/t/035_conflicts.pl @@ -26,7 +26,8 @@ $node_publisher->safe_psql('postgres', "CREATE TABLE conf_tab (a int PRIMARY KEY, b int UNIQUE, c int UNIQUE);"); $node_publisher->safe_psql('postgres', - "CREATE TABLE conf_tab_2 (a int PRIMARY KEY, b int UNIQUE, c int UNIQUE);"); + "CREATE TABLE conf_tab_2 (a int PRIMARY KEY, b int UNIQUE, c int UNIQUE);" +); # Create same table on subscriber $node_subscriber->safe_psql('postgres', diff --git a/src/tools/ci/pg_ci_base.conf b/src/tools/ci/pg_ci_base.conf index 9cec5c2910d..695e0a0d6ec 100644 --- a/src/tools/ci/pg_ci_base.conf +++ b/src/tools/ci/pg_ci_base.conf @@ -10,5 +10,5 @@ log_autovacuum_min_duration = 0 log_checkpoints = true log_connections = all log_disconnections = true -log_line_prefix = '%m [%p][%b] %q[%a][%v:%x] ' +log_line_prefix = '%m %b[%p] %q%a ' log_lock_waits = true diff --git a/src/tools/git_changelog b/src/tools/git_changelog index b8bd874f208..c25e399a87f 100755 --- a/src/tools/git_changelog +++ b/src/tools/git_changelog @@ -59,6 +59,7 @@ require IPC::Open2; # (We could get this from "git branches", but not worth the trouble.) # NB: master must be first! my @BRANCHES = qw(master + REL_18_STABLE REL_17_STABLE REL_16_STABLE REL_15_STABLE REL_14_STABLE REL_13_STABLE REL_12_STABLE REL_11_STABLE REL_10_STABLE REL9_6_STABLE REL9_5_STABLE REL9_4_STABLE REL9_3_STABLE REL9_2_STABLE REL9_1_STABLE REL9_0_STABLE diff --git a/src/tools/pgflex b/src/tools/pgflex index 3986b06874e..b8d9aa0086f 100755 --- a/src/tools/pgflex +++ b/src/tools/pgflex @@ -48,7 +48,7 @@ os.chdir(args.privatedir) # contents. Set FLEX_TMP_DIR to the target private directory to avoid # that. That environment variable isn't consulted on other platforms, so we # don't even need to make this conditional. -env = {'FLEX_TMP_DIR': args.privatedir} +os.environ['FLEX_TMP_DIR'] = args.privatedir # build flex invocation command = [args.flex, '-o', args.output_file] @@ -58,7 +58,7 @@ command += args.flex_flags command += [args.input_file] # create .c file from .l file -sp = subprocess.run(command, env=env) +sp = subprocess.run(command) if sp.returncode != 0: sys.exit(sp.returncode) diff --git a/src/tools/pgindent/pgindent b/src/tools/pgindent/pgindent index 54e138b598d..b7d71808924 100755 --- a/src/tools/pgindent/pgindent +++ b/src/tools/pgindent/pgindent @@ -73,11 +73,14 @@ if ($sourcedir) # might make them so. For the moment we just hardwire a list of names # to add and a list of names to exclude; eventually this may need to be # easier to configure. Note that the typedefs need trailing newlines. -my @additional = ("bool\n"); +my @additional = map { "$_\n" } qw( + bool regex_t regmatch_t regoff +); my %excluded = map { +"$_\n" => 1 } qw( - ANY FD_SET U abs allocfunc boolean date digit ilist interval iterator other - pointer printfunc reference string timestamp type wrap + FD_SET LookupSet boolean date duration + element_type inquiry iterator other + pointer reference rep string timestamp type wrap ); # globals diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 9ea573fae21..83192038571 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -6,6 +6,7 @@ ASN1_INTEGER ASN1_OBJECT ASN1_OCTET_STRING ASN1_STRING +ATAlterConstraint AV A_ArrayExpr A_Const @@ -47,7 +48,6 @@ AggSplit AggState AggStatePerAgg AggStatePerGroup -AggStatePerGroupData AggStatePerHash AggStatePerPhase AggStatePerTrans @@ -74,6 +74,7 @@ AlterDatabaseSetStmt AlterDatabaseStmt AlterDefaultPrivilegesStmt AlterDomainStmt +AlterDomainType AlterEnumStmt AlterEventTrigStmt AlterExtensionContentsStmt @@ -161,7 +162,6 @@ ArrayType AsyncQueueControl AsyncQueueEntry AsyncRequest -ATAlterConstraint AttInMetadata AttStatsSlot AttoptCacheEntry @@ -174,8 +174,8 @@ AttrNumber AttributeOpts AuthRequest AuthToken -AutoPrewarmSharedState AutoPrewarmReadStreamData +AutoPrewarmSharedState AutoVacOpts AutoVacuumShmemStruct AutoVacuumWorkItem @@ -222,7 +222,6 @@ BTScanInsertData BTScanKeyPreproc BTScanOpaque BTScanOpaqueData -BTScanPos BTScanPosData BTScanPosItem BTShared @@ -270,8 +269,8 @@ BitmapAndPath BitmapAndState BitmapHeapPath BitmapHeapScan -BitmapHeapScanInstrumentation BitmapHeapScanDesc +BitmapHeapScanInstrumentation BitmapHeapScanState BitmapIndexScan BitmapIndexScanState @@ -341,8 +340,8 @@ BufFile Buffer BufferAccessStrategy BufferAccessStrategyType -BufferCacheNumaRec BufferCacheNumaContext +BufferCacheNumaRec BufferCachePagesContext BufferCachePagesRec BufferDesc @@ -382,6 +381,9 @@ CTEMaterialize CTESearchClause CURL CURLM +CURLMcode +CURLMsg +CURLcode CURLoption CV CachedExpression @@ -520,7 +522,6 @@ CopyFormatOptions CopyFromRoutine CopyFromState CopyFromStateData -CopyHeaderChoice CopyInsertMethod CopyLogVerbosityChoice CopyMethod @@ -600,6 +601,7 @@ DR_intorel DR_printtup DR_sqlfunction DR_transientrel +DSMREntryType DSMRegistryCtxStruct DSMRegistryEntry DWORD @@ -628,6 +630,7 @@ DefElem DefElemAction DefaultACLInfo DefineStmt +DefnDumperPtr DeleteStmt DependencyGenerator DependencyGeneratorData @@ -677,9 +680,8 @@ DumpableObjectType DumpableObjectWithAcl DynamicFileList DynamicZoneAbbrev -EC_KEY -ECDerivesKey ECDerivesEntry +ECDerivesKey EDGE ENGINE EOM_flatten_into_method @@ -761,10 +763,12 @@ ExpandedRange ExpandedRecordFieldInfo ExpandedRecordHeader ExplainDirectModify_function +ExplainExtensionOption ExplainForeignModify_function ExplainForeignScan_function ExplainFormat ExplainOneQuery_hook_type +ExplainOptionHandler ExplainSerializeOption ExplainState ExplainStmt @@ -792,6 +796,7 @@ FDWCollateState FD_SET FILE FILETIME +FPI FSMAddress FSMPage FSMPageData @@ -801,12 +806,12 @@ FastPathStrongRelationLockData FdwInfo FdwRoutine FetchDirection +FetchDirectionKeywords FetchStmt FieldSelect FieldStore File FileBackupMethod -FileCopyMethod FileFdwExecutionState FileFdwPlanState FileNameMap @@ -1190,6 +1195,7 @@ HeapCheckContext HeapCheckReadStreamData HeapPageFreeze HeapScanDesc +HeapScanDescData HeapTuple HeapTupleData HeapTupleFields @@ -1249,6 +1255,7 @@ IndexClause IndexClauseSet IndexDeleteCounts IndexDeletePrefetchState +IndexDoCheckCallback IndexElem IndexFetchHeapData IndexFetchTableData @@ -1279,13 +1286,15 @@ InheritableSocket InitSampleScan_function InitializeDSMForeignScan_function InitializeWorkerForeignScan_function +InjIoErrorState InjectionPointCacheEntry InjectionPointCallback InjectionPointCondition InjectionPointConditionType +InjectionPointData InjectionPointEntry -InjectionPointsCtl InjectionPointSharedState +InjectionPointsCtl InlineCodeBlock InsertStmt Instrumentation @@ -1302,7 +1311,6 @@ IntoClause InvalMessageArray InvalidationInfo InvalidationMsgsGroup -IoMethod IoMethodOps IpcMemoryId IpcMemoryKey @@ -1492,8 +1500,7 @@ LLVMOrcResourceTrackerRef LLVMOrcSymbolStringPoolRef LLVMOrcThreadSafeContextRef LLVMOrcThreadSafeModuleRef -LLVMPassManagerBuilderRef -LLVMPassManagerRef +LLVMPassBuilderOptionsRef LLVMTargetMachineRef LLVMTargetRef LLVMTypeRef @@ -1563,6 +1570,7 @@ LoadStmt LocalBufferLookupEnt LocalPgBackendStatus LocalTransactionId +Location LocationIndex LocationLen LockAcquireResult @@ -1582,7 +1590,6 @@ LockTupleMode LockViewRecurse_context LockWaitPolicy LockingClause -LogConnectionOption LogOpts LogStmtLevel LogicalDecodeBeginCB @@ -1633,6 +1640,7 @@ LogicalSlotInfo LogicalSlotInfoArr LogicalTape LogicalTapeSet +LookupSet LsnReadQueue LsnReadQueueNextFun LsnReadQueueNextStatus @@ -1657,8 +1665,8 @@ ManyTestResourceKind Material MaterialPath MaterialState -MdfdVec MdPathStr +MdfdVec Memoize MemoizeEntry MemoizeInstrumentation @@ -1672,12 +1680,9 @@ MemoryContextCallback MemoryContextCallbackFunction MemoryContextCounters MemoryContextData +MemoryContextId MemoryContextMethodID MemoryContextMethods -MemoryStatsBackendState -MemoryStatsContextId -MemoryStatsCtl -MemoryStatsEntry MemoryStatsPrintFunc MergeAction MergeActionState @@ -1734,6 +1739,9 @@ Name NameData NameHashEntry NamedArgExpr +NamedDSAState +NamedDSHState +NamedDSMState NamedLWLockTranche NamedLWLockTrancheRequest NamedTuplestoreScan @@ -1769,6 +1777,7 @@ NumericSortSupport NumericSumAccum NumericVar OAuthValidatorCallbacks +OAuthValidatorModuleInit OM_uint32 OP OSAPerGroupState @@ -1838,7 +1847,6 @@ PGCALL2 PGCRYPTO_SHA_t PGChecksummablePage PGContextVisibility -PGErrorVerbosity PGEvent PGEventConnDestroy PGEventConnReset @@ -1876,7 +1884,6 @@ PGTargetServerType PGTernaryBool PGTransactionStatusType PGVerbosity -PG_Locale_Strategy PG_Lock_Status PG_init_t PGauthData @@ -1908,7 +1915,6 @@ PLpgSQL_exception PLpgSQL_exception_block PLpgSQL_execstate PLpgSQL_expr -PLpgSQL_func_hashkey PLpgSQL_function PLpgSQL_getdiag_kind PLpgSQL_if_elsif @@ -2159,10 +2165,10 @@ PermutationStepBlockerType PgAioBackend PgAioCtl PgAioHandle -PgAioHandleCallbackID -PgAioHandleCallbackStage PgAioHandleCallbackComplete +PgAioHandleCallbackID PgAioHandleCallbackReport +PgAioHandleCallbackStage PgAioHandleCallbacks PgAioHandleCallbacksEntry PgAioHandleFlags @@ -2175,6 +2181,7 @@ PgAioReturn PgAioTargetData PgAioTargetID PgAioTargetInfo +PgAioUringCaps PgAioUringContext PgAioWaitRef PgArchData @@ -2207,9 +2214,9 @@ PgStatShared_Common PgStatShared_Database PgStatShared_Function PgStatShared_HashEntry +PgStatShared_IO PgStatShared_InjectionPoint PgStatShared_InjectionPointFixed -PgStatShared_IO PgStatShared_Relation PgStatShared_ReplSlot PgStatShared_SLRU @@ -2230,7 +2237,6 @@ PgStat_FunctionCallUsage PgStat_FunctionCounts PgStat_HashKey PgStat_IO -PgStat_Kind PgStat_KindInfo PgStat_LocalState PgStat_PendingDroppedStatsItem @@ -2358,12 +2364,12 @@ PushFilter PushFilterOps PushFunction PyCFunction -PyMappingMethods PyMethodDef PyModuleDef PyObject -PySequenceMethods PyTypeObject +PyType_Slot +PyType_Spec Py_ssize_t QPRS_STATE QTN2QTState @@ -2477,6 +2483,7 @@ RelOptInfo RelOptKind RelPathStr RelStatsInfo +RelSyncCallbackFunction RelToCheck RelToCluster RelabelType @@ -2629,7 +2636,6 @@ SQLDropObject SQLFunctionCache SQLFunctionCachePtr SQLFunctionHashEntry -SQLFunctionLink SQLFunctionParseInfo SQLFunctionParseInfoPtr SQLValueFunction @@ -2641,6 +2647,7 @@ STARTUPINFO STRLEN SV SYNCHRONIZATION_BARRIER +SYSTEM_INFO SampleScan SampleScanGetSampleSize_function SampleScanState @@ -2728,6 +2735,7 @@ SharedIncrementalSortInfo SharedIndexScanInstrumentation SharedInvalCatalogMsg SharedInvalCatcacheMsg +SharedInvalRelSyncMsg SharedInvalRelcacheMsg SharedInvalRelmapMsg SharedInvalSmgrMsg @@ -2767,7 +2775,7 @@ SingleBoundSortItem Size SkipPages SkipSupport -SkipSupportData +SkipSupportIncDec SlabBlock SlabContext SlabSlot @@ -2993,6 +3001,7 @@ TarMethodData TarMethodFile TargetEntry TclExceptionNameMap +Tcl_CmdInfo Tcl_DString Tcl_FileProc Tcl_HashEntry @@ -3000,8 +3009,10 @@ Tcl_HashTable Tcl_Interp Tcl_NotifierProcs Tcl_Obj +Tcl_Size Tcl_Time TempNamespaceStatus +TestDSMRegistryHashEntry TestDSMRegistryStruct TestDecodingData TestDecodingTxnData @@ -3145,6 +3156,7 @@ UnicodeNormalizationQC Unique UniquePath UniquePathMethod +UniqueRelInfo UniqueState UnlistenStmt UnresolvedTup @@ -3175,8 +3187,11 @@ VacuumRelation VacuumStmt ValidIOData ValidateIndexState -ValidatorModuleState ValidatorModuleResult +ValidatorModuleState +ValidatorShutdownCB +ValidatorStartupCB +ValidatorValidateCB ValuesScan ValuesScanState Var @@ -3381,10 +3396,9 @@ _resultmap _stringlist access_vector_t acquireLocksOnSubLinks_context -add_nulling_relids_context addFkConstraintSides +add_nulling_relids_context adjust_appendrel_attrs_context -allocfunc amadjustmembers_function ambeginscan_function ambuild_function @@ -3396,6 +3410,7 @@ amcostestimate_function amendscan_function amestimateparallelscan_function amgetbitmap_function +amgettreeheight_function amgettuple_function aminitparallelscan_function aminsert_function @@ -3406,13 +3421,27 @@ amparallelrescan_function amproperty_function amrescan_function amrestrpos_function -amtranslate_strategy_function amtranslatestrategy; -amtranslate_cmptype_function amtranslatecmptype; +amtranslate_cmptype_function +amtranslate_strategy_function amvacuumcleanup_function amvalidate_function array_iter array_unnest_fctx assign_collations_context +astreamer +astreamer_archive_context +astreamer_extractor +astreamer_gzip_decompressor +astreamer_gzip_writer +astreamer_lz4_frame +astreamer_member +astreamer_ops +astreamer_plain_writer +astreamer_recovery_injector +astreamer_tar_archiver +astreamer_tar_parser +astreamer_verify +astreamer_zstd_frame auth_password_hook_typ autovac_table av_relation @@ -3439,20 +3468,6 @@ bbsink_shell bbsink_state bbsink_throttle bbsink_zstd -astreamer -astreamer_archive_context -astreamer_extractor -astreamer_gzip_decompressor -astreamer_gzip_writer -astreamer_lz4_frame -astreamer_member -astreamer_ops -astreamer_plain_writer -astreamer_recovery_injector -astreamer_tar_archiver -astreamer_tar_parser -astreamer_verify -astreamer_zstd_frame bgworker_main_type bh_node_type binaryheap @@ -3467,6 +3482,8 @@ bloom_filter boolKEY brin_column_state brin_serialize_callback_type +btree_gin_convert_function +btree_gin_leftmost_function bytea cached_re_str canonicalize_state @@ -3492,6 +3509,13 @@ colormaprange compare_context config_handle config_var_value +conn_errorMessage_func +conn_oauth_client_id_func +conn_oauth_client_secret_func +conn_oauth_discovery_uri_func +conn_oauth_issuer_id_func +conn_oauth_scope_func +conn_sasl_state_func contain_aggs_of_level_context contain_placeholder_references_context convert_testexpr_context @@ -3508,6 +3532,9 @@ create_upper_paths_hook_type createdb_failure_params crosstab_HashEnt crosstab_cat_desc +curl_infotype +curl_socket_t +curl_version_info_data datapagemap_iterator_t datapagemap_t dateKEY @@ -3519,9 +3546,8 @@ deparse_columns deparse_context deparse_expr_cxt deparse_namespace -destructor +derives_hash dev_t -digit disassembledLeaf dlist_head dlist_iter @@ -3559,18 +3585,23 @@ dsm_handle dsm_op dsm_segment dsm_segment_detach_callback +duration eLogType ean13 eary ec_matches_callback_type ec_member_foreign_arg ec_member_matches_arg +element_type emit_log_hook_type eval_const_expressions_context exec_thread_arg execution_state exit_function explain_get_index_name_hook_type +explain_per_node_hook_type +explain_per_plan_hook_type +explain_validate_options_hook_type f_smgr fasthash_state fd_set @@ -3653,7 +3684,6 @@ gss_key_value_set_desc gss_name_t gtrgm_consistent_cache gzFile -hashfunc hbaPort heap_page_items_state help_handler @@ -3675,17 +3705,21 @@ init_function inline_cte_walker_context inline_error_callback_arg ino_t +inquiry instr_time int128 int16 int16KEY +int16_t int2vector int32 int32KEY int32_t int64 int64KEY +int64_t int8 +int8_t int8x16_t internalPQconninfoOption intptr_t @@ -3717,6 +3751,7 @@ lclContext lclTocEntry leafSegmentInfo leaf_item +libpq_gettext_func libpq_source line_t lineno_t @@ -3773,6 +3808,7 @@ mxact mxtruncinfo needs_fmgr_hook_type network_sortsupport_state +nl_item nodeitem normal_rand_fctx nsphash_hash @@ -3790,6 +3826,7 @@ openssl_tls_init_hook_typ ossl_EVP_cipher_func other output_type +overexplain_options pagetable_hash pagetable_iterator pairingheap @@ -3809,7 +3846,6 @@ pg_atomic_flag pg_atomic_uint32 pg_atomic_uint64 pg_be_sasl_mech -pg_case_map pg_category_range pg_checksum_context pg_checksum_raw_context @@ -3833,7 +3869,6 @@ pg_funcptr_t pg_gssinfo pg_hmac_ctx pg_hmac_errno -pg_int64 pg_local_to_utf_combined pg_locale_t pg_mb_radix_tree @@ -3902,7 +3937,8 @@ plperl_query_entry plpgsql_CastExprHashEntry plpgsql_CastHashEntry plpgsql_CastHashKey -plpgsql_HashEnt +plpgsql_expr_walker_callback +plpgsql_stmt_walker_callback pltcl_call_state pltcl_interp_desc pltcl_proc_desc @@ -3925,7 +3961,6 @@ printTextLineFormat printTextLineWrap printTextRule printXheaderWidthType -printfunc priv_map process_file_callback_t process_sublinks_context @@ -3965,12 +4000,9 @@ reduce_outer_joins_pass1_state reduce_outer_joins_pass2_state reference regex_arc_t -regex_t regexp regexp_matches_ctx registered_buffer -regmatch_t -regoff_t regproc relopt_bool relopt_enum @@ -3989,6 +4021,7 @@ remoteConnHashEnt remoteDep remove_nulling_relids_context rendezvousHashEntry +rep replace_rte_variables_callback replace_rte_variables_context report_error_fn @@ -4007,6 +4040,7 @@ rt_node_class_test_elem rt_radix_tree saophash_hash save_buffer +save_locale_t scram_state scram_state_enum script_error_callback_arg @@ -4014,6 +4048,8 @@ security_class_t sem_t sepgsql_context_info_t sequence_magic +set_conn_altsock_func +set_conn_oauth_token_func set_join_pathlist_hook_type set_rel_pathlist_hook_type shared_ts_iter @@ -4134,6 +4170,7 @@ uint32_t uint32x4_t uint64 uint64_t +uint64x2_t uint8 uint8_t uint8x16_t @@ -4143,7 +4180,6 @@ unicodeStyleColumnFormat unicodeStyleFormat unicodeStyleRowFormat unicode_linestyle -UniqueRelInfo unit_conversion unlogged_relation_entry utf_local_conversion_func @@ -4286,6 +4322,7 @@ xmlGenericErrorFunc xmlNodePtr xmlNodeSetPtr xmlParserCtxtPtr +xmlParserErrors xmlParserInputPtr xmlSaveCtxt xmlSaveCtxtPtr @@ -4306,6 +4343,3 @@ yyscan_t z_stream z_streamp zic_t -ExplainExtensionOption -ExplainOptionHandler -overexplain_options diff --git a/src/tools/valgrind.supp b/src/tools/valgrind.supp index 7ea464c8094..2ad5b81526d 100644 --- a/src/tools/valgrind.supp +++ b/src/tools/valgrind.supp @@ -180,3 +180,17 @@ Memcheck:Cond fun:PyObject_Realloc } + +# NUMA introspection requires touching memory first, and some of it may +# be marked as noacess (e.g. unpinned buffers). So just ignore that. +{ + pg_numa_touch_mem_if_required + Memcheck:Addr4 + fun:pg_numa_touch_mem_if_required +} + +{ + pg_numa_touch_mem_if_required + Memcheck:Addr8 + fun:pg_numa_touch_mem_if_required +} diff --git a/src/tools/version_stamp.pl b/src/tools/version_stamp.pl index c3509474d83..a9d2d0910f3 100755 --- a/src/tools/version_stamp.pl +++ b/src/tools/version_stamp.pl @@ -25,7 +25,7 @@ use warnings FATAL => 'all'; # Major version is hard-wired into the script. We update it when we branch # a new development version. -my $majorversion = 18; +my $majorversion = 19; # Validate argument and compute derived variables my $minor = shift; |