diff options
Diffstat (limited to 'src/backend')
50 files changed, 3427 insertions, 2268 deletions
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index 75979530897..14036c27e87 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -431,7 +431,7 @@ static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis); static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, bool sharelock, Buffer vmbuffer); -static void lazy_scan_prune(LVRelState *vacrel, Buffer buf, +static int lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, Buffer vmbuffer, bool all_visible_according_to_vm, bool *has_lpdead_items, bool *vm_page_frozen); @@ -1245,6 +1245,7 @@ lazy_scan_heap(LVRelState *vacrel) Buffer buf; Page page; uint8 blk_info = 0; + int ndeleted = 0; bool has_lpdead_items; void *per_buffer_data = NULL; bool vm_page_frozen = false; @@ -1387,10 +1388,10 @@ lazy_scan_heap(LVRelState *vacrel) * line pointers previously marked LP_DEAD. */ if (got_cleanup_lock) - lazy_scan_prune(vacrel, buf, blkno, page, - vmbuffer, - blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM, - &has_lpdead_items, &vm_page_frozen); + ndeleted = lazy_scan_prune(vacrel, buf, blkno, page, + vmbuffer, + blk_info & VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM, + &has_lpdead_items, &vm_page_frozen); /* * Count an eagerly scanned page as a failure or a success. @@ -1481,7 +1482,7 @@ lazy_scan_heap(LVRelState *vacrel) * table has indexes. There will only be newly-freed space if we * held the cleanup lock and lazy_scan_prune() was called. */ - if (got_cleanup_lock && vacrel->nindexes == 0 && has_lpdead_items && + if (got_cleanup_lock && vacrel->nindexes == 0 && ndeleted > 0 && blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES) { FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, @@ -1936,8 +1937,10 @@ cmpOffsetNumbers(const void *a, const void *b) * *vm_page_frozen is set to true if the page is newly set all-frozen in the * VM. The caller currently only uses this for determining whether an eagerly * scanned page was successfully set all-frozen. + * + * Returns the number of tuples deleted from the page during HOT pruning. */ -static void +static int lazy_scan_prune(LVRelState *vacrel, Buffer buf, BlockNumber blkno, @@ -2208,6 +2211,8 @@ lazy_scan_prune(LVRelState *vacrel, *vm_page_frozen = true; } } + + return presult.ndeleted; } /* diff --git a/src/backend/access/nbtree/nbtpreprocesskeys.c b/src/backend/access/nbtree/nbtpreprocesskeys.c index a136e4bbfdf..21c519cd108 100644 --- a/src/backend/access/nbtree/nbtpreprocesskeys.c +++ b/src/backend/access/nbtree/nbtpreprocesskeys.c @@ -16,6 +16,7 @@ #include "postgres.h" #include "access/nbtree.h" +#include "common/int.h" #include "lib/qunique.h" #include "utils/array.h" #include "utils/lsyscache.h" @@ -56,6 +57,8 @@ static void _bt_skiparray_strat_decrement(IndexScanDesc scan, ScanKey arraysk, BTArrayKeyInfo *array); static void _bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk, BTArrayKeyInfo *array); +static void _bt_unmark_keys(IndexScanDesc scan, int *keyDataMap); +static int _bt_reorder_array_cmp(const void *a, const void *b); static ScanKey _bt_preprocess_array_keys(IndexScanDesc scan, int *new_numberOfKeys); static void _bt_preprocess_array_keys_final(IndexScanDesc scan, int *keyDataMap); static int _bt_num_array_keys(IndexScanDesc scan, Oid *skip_eq_ops_out, @@ -96,7 +99,7 @@ static int _bt_compare_array_elements(const void *a, const void *b, void *arg); * incomplete sets of cross-type operators, we may fail to detect redundant * or contradictory keys, but we can survive that.) * - * The output keys must be sorted by index attribute. Presently we expect + * Required output keys are sorted by index attribute. Presently we expect * (but verify) that the input keys are already so sorted --- this is done * by match_clauses_to_index() in indxpath.c. Some reordering of the keys * within each attribute may be done as a byproduct of the processing here. @@ -127,29 +130,36 @@ static int _bt_compare_array_elements(const void *a, const void *b, void *arg); * This has the potential to be much more efficient than a full index scan * (though it behaves like a full scan when there's many distinct "x" values). * - * If possible, redundant keys are eliminated: we keep only the tightest + * Typically, redundant keys are eliminated: we keep only the tightest * >/>= bound and the tightest </<= bound, and if there's an = key then * that's the only one returned. (So, we return either a single = key, * or one or two boundary-condition keys for each attr.) However, if we * cannot compare two keys for lack of a suitable cross-type operator, - * we cannot eliminate either. If there are two such keys of the same - * operator strategy, the second one is just pushed into the output array - * without further processing here. We may also emit both >/>= or both - * </<= keys if we can't compare them. The logic about required keys still - * works if we don't eliminate redundant keys. - * - * Note that one reason we need direction-sensitive required-key flags is - * precisely that we may not be able to eliminate redundant keys. Suppose - * we have "x > 4::int AND x > 10::bigint", and we are unable to determine - * which key is more restrictive for lack of a suitable cross-type operator. - * _bt_first will arbitrarily pick one of the keys to do the initial - * positioning with. If it picks x > 4, then the x > 10 condition will fail - * until we reach index entries > 10; but we can't stop the scan just because - * x > 10 is failing. On the other hand, if we are scanning backwards, then - * failure of either key is indeed enough to stop the scan. (In general, when - * inequality keys are present, the initial-positioning code only promises to - * position before the first possible match, not exactly at the first match, - * for a forward scan; or after the last match for a backward scan.) + * we cannot eliminate either key. + * + * When all redundant keys could not be eliminated, we'll output a key array + * that can more or less be treated as if it had no redundant keys. Suppose + * we have "x > 4::int AND x > 10::bigint AND x < 70", and we are unable to + * determine which > key is more restrictive for lack of a suitable cross-type + * operator. We'll arbitrarily pick one of the > keys; the other > key won't + * be marked required. Obviously, the scan will be less efficient if we + * choose x > 4 over x > 10 -- but it can still largely proceed as if there + * was only a single > condition. "x > 10" will be placed at the end of the + * so->keyData[] output array. It'll always be evaluated last, after the keys + * that could be marked required in the usual way (after "x > 4 AND x < 70"). + * This can sometimes result in so->keyData[] keys that aren't even in index + * attribute order (if the qual involves multiple attributes). The scan's + * required keys will still be in attribute order, though, so it can't matter. + * + * This scheme ensures that _bt_first always uses the same set of keys at the + * start of a forwards scan as those _bt_checkkeys uses to determine when to + * end a similar backwards scan (and vice-versa). _bt_advance_array_keys + * depends on this: it expects to be able to reliably predict what the next + * _bt_first call will do by testing whether _bt_checkkeys' routines report + * that the final tuple on the page is past the end of matches for the scan's + * keys with the scan direction flipped. If it is (if continuescan=false), + * then it follows that calling _bt_first will, at a minimum, relocate the + * scan to the very next leaf page (in the current scan direction). * * As a byproduct of this work, we can detect contradictory quals such * as "x = 1 AND x > 2". If we see that, we return so->qual_ok = false, @@ -188,7 +198,8 @@ _bt_preprocess_keys(IndexScanDesc scan) int numberOfEqualCols; ScanKey inkeys; BTScanKeyPreproc xform[BTMaxStrategyNumber]; - bool test_result; + bool test_result, + redundant_key_kept = false; AttrNumber attno; ScanKey arrayKeyData; int *keyDataMap = NULL; @@ -388,7 +399,8 @@ _bt_preprocess_keys(IndexScanDesc scan) xform[j].inkey = NULL; xform[j].inkeyi = -1; } - /* else, cannot determine redundancy, keep both keys */ + else + redundant_key_kept = true; } /* track number of attrs for which we have "=" keys */ numberOfEqualCols++; @@ -409,6 +421,8 @@ _bt_preprocess_keys(IndexScanDesc scan) else xform[BTLessStrategyNumber - 1].inkey = NULL; } + else + redundant_key_kept = true; } /* try to keep only one of >, >= */ @@ -426,6 +440,8 @@ _bt_preprocess_keys(IndexScanDesc scan) else xform[BTGreaterStrategyNumber - 1].inkey = NULL; } + else + redundant_key_kept = true; } /* @@ -466,25 +482,6 @@ _bt_preprocess_keys(IndexScanDesc scan) /* check strategy this key's operator corresponds to */ j = inkey->sk_strategy - 1; - /* if row comparison, push it directly to the output array */ - if (inkey->sk_flags & SK_ROW_HEADER) - { - ScanKey outkey = &so->keyData[new_numberOfKeys++]; - - memcpy(outkey, inkey, sizeof(ScanKeyData)); - if (arrayKeyData) - keyDataMap[new_numberOfKeys - 1] = i; - if (numberOfEqualCols == attno - 1) - _bt_mark_scankey_required(outkey); - - /* - * We don't support RowCompare using equality; such a qual would - * mess up the numberOfEqualCols tracking. - */ - Assert(j != (BTEqualStrategyNumber - 1)); - continue; - } - if (inkey->sk_strategy == BTEqualStrategyNumber && (inkey->sk_flags & SK_SEARCHARRAY)) { @@ -593,9 +590,8 @@ _bt_preprocess_keys(IndexScanDesc scan) * the new scan key. * * Note: We do things this way around so that our arrays are - * always in the same order as their corresponding scan keys, - * even with incomplete opfamilies. _bt_advance_array_keys - * depends on this. + * always in the same order as their corresponding scan keys. + * _bt_preprocess_array_keys_final expects this. */ ScanKey outkey = &so->keyData[new_numberOfKeys++]; @@ -607,6 +603,7 @@ _bt_preprocess_keys(IndexScanDesc scan) xform[j].inkey = inkey; xform[j].inkeyi = i; xform[j].arrayidx = arrayidx; + redundant_key_kept = true; } } } @@ -622,6 +619,15 @@ _bt_preprocess_keys(IndexScanDesc scan) if (arrayKeyData) _bt_preprocess_array_keys_final(scan, keyDataMap); + /* + * If there are remaining redundant inequality keys, we must make sure + * that each index attribute has no more than one required >/>= key, and + * no more than one required </<= key. Attributes that have one or more + * required = keys now must keep only one required key (the first = key). + */ + if (unlikely(redundant_key_kept) && so->qual_ok) + _bt_unmark_keys(scan, keyDataMap); + /* Could pfree arrayKeyData/keyDataMap now, but not worth the cycles */ } @@ -746,9 +752,12 @@ _bt_fix_scankey_strategy(ScanKey skey, int16 *indoption) * * Depending on the operator type, the key may be required for both scan * directions or just one. Also, if the key is a row comparison header, - * we have to mark its first subsidiary ScanKey as required. (Subsequent - * subsidiary ScanKeys are normally for lower-order columns, and thus - * cannot be required, since they're after the first non-equality scankey.) + * we have to mark the appropriate subsidiary ScanKeys as required. In such + * cases, the first subsidiary key is required, but subsequent ones are + * required only as long as they correspond to successive index columns and + * match the leading column as to sort direction. Otherwise the row + * comparison ordering is different from the index ordering and so we can't + * stop the scan on the basis of those lower-order columns. * * Note: when we set required-key flag bits in a subsidiary scankey, we are * scribbling on a data structure belonging to the index AM's caller, not on @@ -786,12 +795,25 @@ _bt_mark_scankey_required(ScanKey skey) if (skey->sk_flags & SK_ROW_HEADER) { ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument); + AttrNumber attno = skey->sk_attno; /* First subkey should be same column/operator as the header */ - Assert(subkey->sk_flags & SK_ROW_MEMBER); - Assert(subkey->sk_attno == skey->sk_attno); + Assert(subkey->sk_attno == attno); Assert(subkey->sk_strategy == skey->sk_strategy); - subkey->sk_flags |= addflags; + + for (;;) + { + Assert(subkey->sk_flags & SK_ROW_MEMBER); + if (subkey->sk_attno != attno) + break; /* non-adjacent key, so not required */ + if (subkey->sk_strategy != skey->sk_strategy) + break; /* wrong direction, so not required */ + subkey->sk_flags |= addflags; + if (subkey->sk_flags & SK_ROW_END) + break; + subkey++; + attno++; + } } } @@ -847,8 +869,7 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op, cmp_op; StrategyNumber strat; - Assert(!((leftarg->sk_flags | rightarg->sk_flags) & - (SK_ROW_HEADER | SK_ROW_MEMBER))); + Assert(!((leftarg->sk_flags | rightarg->sk_flags) & SK_ROW_MEMBER)); /* * First, deal with cases where one or both args are NULL. This should @@ -925,6 +946,16 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op, } /* + * We don't yet know how to determine redundancy when it involves a row + * compare key (barring simple cases involving IS NULL/IS NOT NULL) + */ + if ((leftarg->sk_flags | rightarg->sk_flags) & SK_ROW_HEADER) + { + Assert(!((leftarg->sk_flags | rightarg->sk_flags) & SK_BT_SKIP)); + return false; + } + + /* * If either leftarg or rightarg are equality-type array scankeys, we need * specialized handling (since by now we know that IS NULL wasn't used) */ @@ -1468,6 +1499,283 @@ _bt_skiparray_strat_increment(IndexScanDesc scan, ScanKey arraysk, } /* + * _bt_unmark_keys() -- make superfluous required keys nonrequired after all + * + * When _bt_preprocess_keys fails to eliminate one or more redundant keys, it + * calls here to make sure that no index attribute has more than one > or >= + * key marked required, and no more than one required < or <= key. Attributes + * with = keys will always get one = key as their required key. All other + * keys that were initially marked required get "unmarked" here. That way, + * _bt_first and _bt_checkkeys will reliably agree on which keys to use to + * start and/or to end the scan. + * + * We also relocate keys that become/started out nonrequired to the end of + * so->keyData[]. That way, _bt_first and _bt_checkkeys cannot fail to reach + * a required key due to some earlier nonrequired key getting in the way. + * + * Only call here when _bt_compare_scankey_args returned false at least once + * (otherwise, calling here will just waste cycles). + */ +static void +_bt_unmark_keys(IndexScanDesc scan, int *keyDataMap) +{ + BTScanOpaque so = (BTScanOpaque) scan->opaque; + AttrNumber attno; + bool *unmarkikey; + int nunmark, + nunmarked, + nkept, + firsti; + ScanKey keepKeys, + unmarkKeys; + FmgrInfo *keepOrderProcs = NULL, + *unmarkOrderProcs = NULL; + bool haveReqEquals, + haveReqForward, + haveReqBackward; + + /* + * Do an initial pass over so->keyData[] that determines which keys to + * keep as required. We expect so->keyData[] to still be in attribute + * order when we're called (though we don't expect any particular order + * among each attribute's keys). + * + * When both equality and inequality keys remain on a single attribute, we + * *must* make sure that exactly one of the equalities remains required. + * Any requiredness markings that we might leave on later keys/attributes + * are predicated on there being required = keys on all prior columns. + */ + unmarkikey = palloc0(so->numberOfKeys * sizeof(bool)); + nunmark = 0; + + /* Set things up for first key's attribute */ + attno = so->keyData[0].sk_attno; + firsti = 0; + haveReqEquals = false; + haveReqForward = false; + haveReqBackward = false; + for (int i = 0; i < so->numberOfKeys; i++) + { + ScanKey origkey = &so->keyData[i]; + + if (origkey->sk_attno != attno) + { + /* Reset for next attribute */ + attno = origkey->sk_attno; + firsti = i; + + haveReqEquals = false; + haveReqForward = false; + haveReqBackward = false; + } + + /* Equalities get priority over inequalities */ + if (haveReqEquals) + { + /* + * We already found the first "=" key for this attribute. We've + * already decided that all its other keys will be unmarked. + */ + Assert(!(origkey->sk_flags & SK_SEARCHNULL)); + unmarkikey[i] = true; + nunmark++; + continue; + } + else if ((origkey->sk_flags & SK_BT_REQFWD) && + (origkey->sk_flags & SK_BT_REQBKWD)) + { + /* + * Found the first "=" key for attno. All other attno keys will + * be unmarked. + */ + Assert(origkey->sk_strategy == BTEqualStrategyNumber); + + haveReqEquals = true; + for (int j = firsti; j < i; j++) + { + /* Unmark any prior inequality keys on attno after all */ + if (!unmarkikey[j]) + { + unmarkikey[j] = true; + nunmark++; + } + } + continue; + } + + /* Deal with inequalities next */ + if ((origkey->sk_flags & SK_BT_REQFWD) && !haveReqForward) + { + haveReqForward = true; + continue; + } + else if ((origkey->sk_flags & SK_BT_REQBKWD) && !haveReqBackward) + { + haveReqBackward = true; + continue; + } + + /* + * We have either a redundant inequality key that will be unmarked, or + * we have a key that wasn't marked required in the first place + */ + unmarkikey[i] = true; + nunmark++; + } + + /* Should only be called when _bt_compare_scankey_args reported failure */ + Assert(nunmark > 0); + + /* + * Next, allocate temp arrays: one for required keys that'll remain + * required, the other for all remaining keys + */ + unmarkKeys = palloc(nunmark * sizeof(ScanKeyData)); + keepKeys = palloc((so->numberOfKeys - nunmark) * sizeof(ScanKeyData)); + nunmarked = 0; + nkept = 0; + if (so->numArrayKeys) + { + unmarkOrderProcs = palloc(nunmark * sizeof(FmgrInfo)); + keepOrderProcs = palloc((so->numberOfKeys - nunmark) * sizeof(FmgrInfo)); + } + + /* + * Next, copy the contents of so->keyData[] into the appropriate temp + * array. + * + * Scans with = array keys need us to maintain invariants around the order + * of so->orderProcs[] and so->arrayKeys[] relative to so->keyData[]. See + * _bt_preprocess_array_keys_final for a full explanation. + */ + for (int i = 0; i < so->numberOfKeys; i++) + { + ScanKey origkey = &so->keyData[i]; + ScanKey unmark; + + if (!unmarkikey[i]) + { + /* + * Key gets to keep its original requiredness markings. + * + * Key will stay in its original position, unless we're going to + * unmark an earlier key (in which case this key gets moved back). + */ + memcpy(keepKeys + nkept, origkey, sizeof(ScanKeyData)); + + if (so->numArrayKeys) + { + keyDataMap[i] = nkept; + memcpy(keepOrderProcs + nkept, &so->orderProcs[i], + sizeof(FmgrInfo)); + } + + nkept++; + continue; + } + + /* + * Key will be unmarked as needed, and moved to the end of the array, + * next to other keys that will become (or always were) nonrequired + */ + unmark = unmarkKeys + nunmarked; + memcpy(unmark, origkey, sizeof(ScanKeyData)); + + if (so->numArrayKeys) + { + keyDataMap[i] = (so->numberOfKeys - nunmark) + nunmarked; + memcpy(&unmarkOrderProcs[nunmarked], &so->orderProcs[i], + sizeof(FmgrInfo)); + } + + /* + * Preprocessing only generates skip arrays when it knows that they'll + * be the only required = key on the attr. We'll never unmark them. + */ + Assert(!(unmark->sk_flags & SK_BT_SKIP)); + + /* + * Also shouldn't have to unmark an IS NULL or an IS NOT NULL key. + * They aren't cross-type, so an incomplete opfamily can't matter. + */ + Assert(!(unmark->sk_flags & SK_ISNULL) || + !(unmark->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))); + + /* Clear requiredness flags on redundant key (and on any subkeys) */ + unmark->sk_flags &= ~(SK_BT_REQFWD | SK_BT_REQBKWD); + if (unmark->sk_flags & SK_ROW_HEADER) + { + ScanKey subkey = (ScanKey) DatumGetPointer(unmark->sk_argument); + + Assert(subkey->sk_strategy == unmark->sk_strategy); + for (;;) + { + Assert(subkey->sk_flags & SK_ROW_MEMBER); + subkey->sk_flags &= ~(SK_BT_REQFWD | SK_BT_REQBKWD); + if (subkey->sk_flags & SK_ROW_END) + break; + subkey++; + } + } + + nunmarked++; + } + + /* Copy both temp arrays back into so->keyData[] to reorder */ + Assert(nkept == so->numberOfKeys - nunmark); + Assert(nunmarked == nunmark); + memcpy(so->keyData, keepKeys, sizeof(ScanKeyData) * nkept); + memcpy(so->keyData + nkept, unmarkKeys, sizeof(ScanKeyData) * nunmarked); + + /* Done with temp arrays */ + pfree(unmarkikey); + pfree(keepKeys); + pfree(unmarkKeys); + + /* + * Now copy so->orderProcs[] temp entries needed by scans with = array + * keys back (just like with the so->keyData[] temp arrays) + */ + if (so->numArrayKeys) + { + memcpy(so->orderProcs, keepOrderProcs, sizeof(FmgrInfo) * nkept); + memcpy(so->orderProcs + nkept, unmarkOrderProcs, + sizeof(FmgrInfo) * nunmarked); + + /* Also fix-up array->scan_key references */ + for (int arridx = 0; arridx < so->numArrayKeys; arridx++) + { + BTArrayKeyInfo *array = &so->arrayKeys[arridx]; + + array->scan_key = keyDataMap[array->scan_key]; + } + + /* + * Sort so->arrayKeys[] based on its new BTArrayKeyInfo.scan_key + * offsets, so that its order matches so->keyData[] order as expected + */ + qsort(so->arrayKeys, so->numArrayKeys, sizeof(BTArrayKeyInfo), + _bt_reorder_array_cmp); + + /* Done with temp arrays */ + pfree(unmarkOrderProcs); + pfree(keepOrderProcs); + } +} + +/* + * qsort comparator for reordering so->arrayKeys[] BTArrayKeyInfo entries + */ +static int +_bt_reorder_array_cmp(const void *a, const void *b) +{ + BTArrayKeyInfo *arraya = (BTArrayKeyInfo *) a; + BTArrayKeyInfo *arrayb = (BTArrayKeyInfo *) b; + + return pg_cmp_s32(arraya->scan_key, arrayb->scan_key); +} + +/* * _bt_preprocess_array_keys() -- Preprocess SK_SEARCHARRAY scan keys * * If there are any SK_SEARCHARRAY scan keys, deconstruct the array(s) and diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 36544ecfd58..4af1ff1e9e5 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -960,46 +960,51 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) /*---------- * Examine the scan keys to discover where we need to start the scan. + * The selected scan keys (at most one per index column) are remembered by + * storing their addresses into the local startKeys[] array. The final + * startKeys[] entry's strategy is set in strat_total. (Actually, there + * are a couple of cases where we force a less/more restrictive strategy.) * - * We want to identify the keys that can be used as starting boundaries; - * these are =, >, or >= keys for a forward scan or =, <, <= keys for - * a backwards scan. We can use keys for multiple attributes so long as - * the prior attributes had only =, >= (resp. =, <=) keys. Once we accept - * a > or < boundary or find an attribute with no boundary (which can be - * thought of as the same as "> -infinity"), we can't use keys for any - * attributes to its right, because it would break our simplistic notion - * of what initial positioning strategy to use. + * We must use the key that was marked required (in the direction opposite + * our own scan's) during preprocessing. Each index attribute can only + * have one such required key. In general, the keys that we use to find + * an initial position when scanning forwards are the same keys that end + * the scan on the leaf level when scanning backwards (and vice-versa). * * When the scan keys include cross-type operators, _bt_preprocess_keys - * may not be able to eliminate redundant keys; in such cases we will - * arbitrarily pick a usable one for each attribute. This is correct - * but possibly not optimal behavior. (For example, with keys like - * "x >= 4 AND x >= 5" we would elect to scan starting at x=4 when - * x=5 would be more efficient.) Since the situation only arises given - * a poorly-worded query plus an incomplete opfamily, live with it. + * may not be able to eliminate redundant keys; in such cases it will + * arbitrarily pick a usable key for each attribute (and scan direction), + * ensuring that there is no more than one key required in each direction. + * We stop considering further keys once we reach the first nonrequired + * key (which must come after all required keys), so this can't affect us. + * + * The required keys that we use as starting boundaries have to be =, >, + * or >= keys for a forward scan or =, <, <= keys for a backwards scan. + * We can use keys for multiple attributes so long as the prior attributes + * had only =, >= (resp. =, <=) keys. These rules are very similar to the + * rules that preprocessing used to determine which keys to mark required. + * We cannot always use every required key as a positioning key, though. + * Skip arrays necessitate independently applying our own rules here. + * Skip arrays are always generally considered = array keys, but we'll + * nevertheless treat them as inequalities at certain points of the scan. + * When that happens, it _might_ have implications for the number of + * required keys that we can safely use for initial positioning purposes. * - * When both equality and inequality keys appear for a single attribute - * (again, only possible when cross-type operators appear), we *must* - * select one of the equality keys for the starting point, because - * _bt_checkkeys() will stop the scan as soon as an equality qual fails. - * For example, if we have keys like "x >= 4 AND x = 10" and we elect to - * start at x=4, we will fail and stop before reaching x=10. If multiple - * equality quals survive preprocessing, however, it doesn't matter which - * one we use --- by definition, they are either redundant or - * contradictory. + * For example, a forward scan with a skip array on its leading attribute + * (with no low_compare/high_compare) will have at least two required scan + * keys, but we won't use any of them as boundary keys during the scan's + * initial call here. Our positioning key during the first call here can + * be thought of as representing "> -infinity". Similarly, if such a skip + * array's low_compare is "a > 'foo'", then we position using "a > 'foo'" + * during the scan's initial call here; a lower-order key such as "b = 42" + * can't be used until the "a" array advances beyond MINVAL/low_compare. * - * In practice we rarely see any "attribute boundary key gaps" here. - * Preprocessing can usually backfill skip array keys for any attributes - * that were omitted from the original scan->keyData[] input keys. All - * array keys are always considered = keys, but we'll sometimes need to - * treat the current key value as if we were using an inequality strategy. - * This happens with range skip arrays, which store inequality keys in the - * array's low_compare/high_compare fields (used to find the first/last - * set of matches, when = key will lack a usable sk_argument value). - * These are always preferred over any redundant "standard" inequality - * keys on the same column (per the usual rule about preferring = keys). - * Note also that any column with an = skip array key can never have an - * additional, contradictory = key. + * On the other hand, if such a skip array's low_compare was "a >= 'foo'", + * then we _can_ use "a >= 'foo' AND b = 42" during the initial call here. + * A subsequent call here might have us use "a = 'fop' AND b = 42". Note + * that we treat = and >= as equivalent when scanning forwards (just as we + * treat = and <= as equivalent when scanning backwards). We effectively + * do the same thing (though with a distinct "a" element/value) each time. * * All keys (with the exception of SK_SEARCHNULL keys and SK_BT_SKIP * array keys whose array is "null_elem=true") imply a NOT NULL qualifier. @@ -1011,21 +1016,20 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * traversing a lot of null entries at the start of the scan. * * In this loop, row-comparison keys are treated the same as keys on their - * first (leftmost) columns. We'll add on lower-order columns of the row - * comparison below, if possible. + * first (leftmost) columns. We'll add all lower-order columns of the row + * comparison that were marked required during preprocessing below. * - * The selected scan keys (at most one per index column) are remembered by - * storing their addresses into the local startKeys[] array. - * - * _bt_checkkeys/_bt_advance_array_keys decide whether and when to start - * the next primitive index scan (for scans with array keys) based in part - * on an understanding of how it'll enable us to reposition the scan. - * They're directly aware of how we'll sometimes cons up an explicit - * SK_SEARCHNOTNULL key. They'll even end primitive scans by applying a - * symmetric "deduce NOT NULL" rule of their own. This allows top-level - * scans to skip large groups of NULLs through repeated deductions about - * key strictness (for a required inequality key) and whether NULLs in the - * key's index column are stored last or first (relative to non-NULLs). + * _bt_advance_array_keys needs to know exactly how we'll reposition the + * scan (should it opt to schedule another primitive index scan). It is + * critical that primscans only be scheduled when they'll definitely make + * some useful progress. _bt_advance_array_keys does this by calling + * _bt_checkkeys routines that report whether a tuple is past the end of + * matches for the scan's keys (given the scan's current array elements). + * If the page's final tuple is "after the end of matches" for a scan that + * uses the *opposite* scan direction, then it must follow that it's also + * "before the start of matches" for the actual current scan direction. + * It is therefore essential that all of our initial positioning rules are + * symmetric with _bt_checkkeys's corresponding continuescan=false rule. * If you update anything here, _bt_checkkeys/_bt_advance_array_keys might * need to be kept in sync. *---------- @@ -1034,18 +1038,17 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) if (so->numberOfKeys > 0) { AttrNumber curattr; - ScanKey chosen; + ScanKey bkey; ScanKey impliesNN; ScanKey cur; /* - * chosen is the so-far-chosen key for the current attribute, if any. - * We don't cast the decision in stone until we reach keys for the - * next attribute. + * bkey will be set to the key that preprocessing left behind as the + * boundary key for this attribute, in this scan direction (if any) */ cur = so->keyData; curattr = 1; - chosen = NULL; + bkey = NULL; /* Also remember any scankey that implies a NOT NULL constraint */ impliesNN = NULL; @@ -1058,23 +1061,29 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) { if (i >= so->numberOfKeys || cur->sk_attno != curattr) { + /* Done looking for the curattr boundary key */ + Assert(bkey == NULL || + (bkey->sk_attno == curattr && + (bkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))); + Assert(impliesNN == NULL || + (impliesNN->sk_attno == curattr && + (impliesNN->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)))); + /* - * Done looking at keys for curattr. - * * If this is a scan key for a skip array whose current * element is MINVAL, choose low_compare (when scanning * backwards it'll be MAXVAL, and we'll choose high_compare). * - * Note: if the array's low_compare key makes 'chosen' NULL, + * Note: if the array's low_compare key makes 'bkey' NULL, * then we behave as if the array's first element is -inf, * except when !array->null_elem implies a usable NOT NULL * constraint. */ - if (chosen != NULL && - (chosen->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL))) + if (bkey != NULL && + (bkey->sk_flags & (SK_BT_MINVAL | SK_BT_MAXVAL))) { - int ikey = chosen - so->keyData; - ScanKey skipequalitykey = chosen; + int ikey = bkey - so->keyData; + ScanKey skipequalitykey = bkey; BTArrayKeyInfo *array = NULL; for (int arridx = 0; arridx < so->numArrayKeys; arridx++) @@ -1087,35 +1096,35 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) if (ScanDirectionIsForward(dir)) { Assert(!(skipequalitykey->sk_flags & SK_BT_MAXVAL)); - chosen = array->low_compare; + bkey = array->low_compare; } else { Assert(!(skipequalitykey->sk_flags & SK_BT_MINVAL)); - chosen = array->high_compare; + bkey = array->high_compare; } - Assert(chosen == NULL || - chosen->sk_attno == skipequalitykey->sk_attno); + Assert(bkey == NULL || + bkey->sk_attno == skipequalitykey->sk_attno); if (!array->null_elem) impliesNN = skipequalitykey; else - Assert(chosen == NULL && impliesNN == NULL); + Assert(bkey == NULL && impliesNN == NULL); } /* * If we didn't find a usable boundary key, see if we can * deduce a NOT NULL key */ - if (chosen == NULL && impliesNN != NULL && + if (bkey == NULL && impliesNN != NULL && ((impliesNN->sk_flags & SK_BT_NULLS_FIRST) ? ScanDirectionIsForward(dir) : ScanDirectionIsBackward(dir))) { /* Yes, so build the key in notnullkeys[keysz] */ - chosen = ¬nullkeys[keysz]; - ScanKeyEntryInitialize(chosen, + bkey = ¬nullkeys[keysz]; + ScanKeyEntryInitialize(bkey, (SK_SEARCHNOTNULL | SK_ISNULL | (impliesNN->sk_flags & (SK_BT_DESC | SK_BT_NULLS_FIRST))), @@ -1130,12 +1139,12 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) } /* - * If we still didn't find a usable boundary key, quit; else - * save the boundary key pointer in startKeys. + * If preprocessing didn't leave a usable boundary key, quit; + * else save the boundary key pointer in startKeys[] */ - if (chosen == NULL) + if (bkey == NULL) break; - startKeys[keysz++] = chosen; + startKeys[keysz++] = bkey; /* * We can only consider adding more boundary keys when the one @@ -1143,7 +1152,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * (during backwards scans we can only do so when the key that * we just added to startKeys[] uses the = or <= strategy) */ - strat_total = chosen->sk_strategy; + strat_total = bkey->sk_strategy; if (strat_total == BTGreaterStrategyNumber || strat_total == BTLessStrategyNumber) break; @@ -1154,19 +1163,19 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * make strat_total > or < (and stop adding boundary keys). * This can only happen with opclasses that lack skip support. */ - if (chosen->sk_flags & (SK_BT_NEXT | SK_BT_PRIOR)) + if (bkey->sk_flags & (SK_BT_NEXT | SK_BT_PRIOR)) { - Assert(chosen->sk_flags & SK_BT_SKIP); + Assert(bkey->sk_flags & SK_BT_SKIP); Assert(strat_total == BTEqualStrategyNumber); if (ScanDirectionIsForward(dir)) { - Assert(!(chosen->sk_flags & SK_BT_PRIOR)); + Assert(!(bkey->sk_flags & SK_BT_PRIOR)); strat_total = BTGreaterStrategyNumber; } else { - Assert(!(chosen->sk_flags & SK_BT_NEXT)); + Assert(!(bkey->sk_flags & SK_BT_NEXT)); strat_total = BTLessStrategyNumber; } @@ -1180,24 +1189,30 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) /* * Done if that was the last scan key output by preprocessing. - * Also done if there is a gap index attribute that lacks a - * usable key (only possible when preprocessing was unable to - * generate a skip array key to "fill in the gap"). + * Also done if we've now examined all keys marked required. */ if (i >= so->numberOfKeys || - cur->sk_attno != curattr + 1) + !(cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))) break; /* * Reset for next attr. */ + Assert(cur->sk_attno == curattr + 1); curattr = cur->sk_attno; - chosen = NULL; + bkey = NULL; impliesNN = NULL; } /* - * Can we use this key as a starting boundary for this attr? + * If we've located the starting boundary key for curattr, we have + * no interest in curattr's other required key + */ + if (bkey != NULL) + continue; + + /* + * Is this key the starting boundary key for curattr? * * If not, does it imply a NOT NULL constraint? (Because * SK_SEARCHNULL keys are always assigned BTEqualStrategyNumber, @@ -1207,27 +1222,20 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) { case BTLessStrategyNumber: case BTLessEqualStrategyNumber: - if (chosen == NULL) - { - if (ScanDirectionIsBackward(dir)) - chosen = cur; - else - impliesNN = cur; - } + if (ScanDirectionIsBackward(dir)) + bkey = cur; + else if (impliesNN == NULL) + impliesNN = cur; break; case BTEqualStrategyNumber: - /* override any non-equality choice */ - chosen = cur; + bkey = cur; break; case BTGreaterEqualStrategyNumber: case BTGreaterStrategyNumber: - if (chosen == NULL) - { - if (ScanDirectionIsForward(dir)) - chosen = cur; - else - impliesNN = cur; - } + if (ScanDirectionIsForward(dir)) + bkey = cur; + else if (impliesNN == NULL) + impliesNN = cur; break; } } @@ -1253,16 +1261,18 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) Assert(keysz <= INDEX_MAX_KEYS); for (int i = 0; i < keysz; i++) { - ScanKey cur = startKeys[i]; + ScanKey bkey = startKeys[i]; - Assert(cur->sk_attno == i + 1); + Assert(bkey->sk_attno == i + 1); - if (cur->sk_flags & SK_ROW_HEADER) + if (bkey->sk_flags & SK_ROW_HEADER) { /* * Row comparison header: look to the first row member instead */ - ScanKey subkey = (ScanKey) DatumGetPointer(cur->sk_argument); + ScanKey subkey = (ScanKey) DatumGetPointer(bkey->sk_argument); + bool loosen_strat = false, + tighten_strat = false; /* * Cannot be a NULL in the first row member: _bt_preprocess_keys @@ -1270,122 +1280,160 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * ever getting this far */ Assert(subkey->sk_flags & SK_ROW_MEMBER); - Assert(subkey->sk_attno == cur->sk_attno); + Assert(subkey->sk_attno == bkey->sk_attno); Assert(!(subkey->sk_flags & SK_ISNULL)); /* + * This is either a > or >= key (during backwards scans it is + * either < or <=) that was marked required during preprocessing. + * Later so->keyData[] keys can't have been marked required, so + * our row compare header key must be the final startKeys[] entry. + */ + Assert(subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)); + Assert(i == keysz - 1); + + /* * The member scankeys are already in insertion format (ie, they * have sk_func = 3-way-comparison function) */ memcpy(inskey.scankeys + i, subkey, sizeof(ScanKeyData)); /* - * If the row comparison is the last positioning key we accepted, - * try to add additional keys from the lower-order row members. - * (If we accepted independent conditions on additional index - * columns, we use those instead --- doesn't seem worth trying to - * determine which is more restrictive.) Note that this is OK - * even if the row comparison is of ">" or "<" type, because the - * condition applied to all but the last row member is effectively - * ">=" or "<=", and so the extra keys don't break the positioning - * scheme. But, by the same token, if we aren't able to use all - * the row members, then the part of the row comparison that we - * did use has to be treated as just a ">=" or "<=" condition, and - * so we'd better adjust strat_total accordingly. + * Now look to later row compare members. + * + * If there's an "index attribute gap" between two row compare + * members, the second member won't have been marked required, and + * so can't be used as a starting boundary key here. The part of + * the row comparison that we do still use has to be treated as a + * ">=" or "<=" condition. For example, a qual "(a, c) > (1, 42)" + * with an omitted intervening index attribute "b" will use an + * insertion scan key "a >= 1". Even the first "a = 1" tuple on + * the leaf level might satisfy the row compare qual. + * + * We're able to use a _more_ restrictive strategy when we reach a + * NULL row compare member, since they're always unsatisfiable. + * For example, a qual "(a, b, c) >= (1, NULL, 77)" will use an + * insertion scan key "a > 1". All tuples where "a = 1" cannot + * possibly satisfy the row compare qual, so this is safe. */ - if (i == keysz - 1) + Assert(!(subkey->sk_flags & SK_ROW_END)); + for (;;) { - bool used_all_subkeys = false; + subkey++; + Assert(subkey->sk_flags & SK_ROW_MEMBER); - Assert(!(subkey->sk_flags & SK_ROW_END)); - for (;;) + if (subkey->sk_flags & SK_ISNULL) { - subkey++; - Assert(subkey->sk_flags & SK_ROW_MEMBER); - if (subkey->sk_attno != keysz + 1) - break; /* out-of-sequence, can't use it */ - if (subkey->sk_strategy != cur->sk_strategy) - break; /* wrong direction, can't use it */ - if (subkey->sk_flags & SK_ISNULL) - break; /* can't use null keys */ - Assert(keysz < INDEX_MAX_KEYS); - memcpy(inskey.scankeys + keysz, subkey, - sizeof(ScanKeyData)); - keysz++; - if (subkey->sk_flags & SK_ROW_END) - { - used_all_subkeys = true; - break; - } + /* + * NULL member key, can only use earlier keys. + * + * We deliberately avoid checking if this key is marked + * required. All earlier keys are required, and this key + * is unsatisfiable either way, so we can't miss anything. + */ + tighten_strat = true; + break; } - if (!used_all_subkeys) + + if (!(subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))) { - switch (strat_total) - { - case BTLessStrategyNumber: - strat_total = BTLessEqualStrategyNumber; - break; - case BTGreaterStrategyNumber: - strat_total = BTGreaterEqualStrategyNumber; - break; - } + /* nonrequired member key, can only use earlier keys */ + loosen_strat = true; + break; } - break; /* done with outer loop */ + + Assert(subkey->sk_attno == keysz + 1); + Assert(subkey->sk_strategy == bkey->sk_strategy); + Assert(keysz < INDEX_MAX_KEYS); + + memcpy(inskey.scankeys + keysz, subkey, + sizeof(ScanKeyData)); + keysz++; + if (subkey->sk_flags & SK_ROW_END) + break; } - } - else - { - /* - * Ordinary comparison key. Transform the search-style scan key - * to an insertion scan key by replacing the sk_func with the - * appropriate btree comparison function. - * - * If scankey operator is not a cross-type comparison, we can use - * the cached comparison function; otherwise gotta look it up in - * the catalogs. (That can't lead to infinite recursion, since no - * indexscan initiated by syscache lookup will use cross-data-type - * operators.) - * - * We support the convention that sk_subtype == InvalidOid means - * the opclass input type; this is a hack to simplify life for - * ScanKeyInit(). - */ - if (cur->sk_subtype == rel->rd_opcintype[i] || - cur->sk_subtype == InvalidOid) + Assert(!(loosen_strat && tighten_strat)); + if (loosen_strat) { - FmgrInfo *procinfo; - - procinfo = index_getprocinfo(rel, cur->sk_attno, BTORDER_PROC); - ScanKeyEntryInitializeWithInfo(inskey.scankeys + i, - cur->sk_flags, - cur->sk_attno, - InvalidStrategy, - cur->sk_subtype, - cur->sk_collation, - procinfo, - cur->sk_argument); + /* Use less restrictive strategy (and fewer member keys) */ + switch (strat_total) + { + case BTLessStrategyNumber: + strat_total = BTLessEqualStrategyNumber; + break; + case BTGreaterStrategyNumber: + strat_total = BTGreaterEqualStrategyNumber; + break; + } } - else + if (tighten_strat) { - RegProcedure cmp_proc; - - cmp_proc = get_opfamily_proc(rel->rd_opfamily[i], - rel->rd_opcintype[i], - cur->sk_subtype, - BTORDER_PROC); - if (!RegProcedureIsValid(cmp_proc)) - elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"", - BTORDER_PROC, rel->rd_opcintype[i], cur->sk_subtype, - cur->sk_attno, RelationGetRelationName(rel)); - ScanKeyEntryInitialize(inskey.scankeys + i, - cur->sk_flags, - cur->sk_attno, - InvalidStrategy, - cur->sk_subtype, - cur->sk_collation, - cmp_proc, - cur->sk_argument); + /* Use more restrictive strategy (and fewer member keys) */ + switch (strat_total) + { + case BTLessEqualStrategyNumber: + strat_total = BTLessStrategyNumber; + break; + case BTGreaterEqualStrategyNumber: + strat_total = BTGreaterStrategyNumber; + break; + } } + + /* done adding to inskey (row comparison keys always come last) */ + break; + } + + /* + * Ordinary comparison key/search-style key. + * + * Transform the search-style scan key to an insertion scan key by + * replacing the sk_func with the appropriate btree 3-way-comparison + * function. + * + * If scankey operator is not a cross-type comparison, we can use the + * cached comparison function; otherwise gotta look it up in the + * catalogs. (That can't lead to infinite recursion, since no + * indexscan initiated by syscache lookup will use cross-data-type + * operators.) + * + * We support the convention that sk_subtype == InvalidOid means the + * opclass input type; this hack simplifies life for ScanKeyInit(). + */ + if (bkey->sk_subtype == rel->rd_opcintype[i] || + bkey->sk_subtype == InvalidOid) + { + FmgrInfo *procinfo; + + procinfo = index_getprocinfo(rel, bkey->sk_attno, BTORDER_PROC); + ScanKeyEntryInitializeWithInfo(inskey.scankeys + i, + bkey->sk_flags, + bkey->sk_attno, + InvalidStrategy, + bkey->sk_subtype, + bkey->sk_collation, + procinfo, + bkey->sk_argument); + } + else + { + RegProcedure cmp_proc; + + cmp_proc = get_opfamily_proc(rel->rd_opfamily[i], + rel->rd_opcintype[i], + bkey->sk_subtype, BTORDER_PROC); + if (!RegProcedureIsValid(cmp_proc)) + elog(ERROR, "missing support function %d(%u,%u) for attribute %d of index \"%s\"", + BTORDER_PROC, rel->rd_opcintype[i], bkey->sk_subtype, + bkey->sk_attno, RelationGetRelationName(rel)); + ScanKeyEntryInitialize(inskey.scankeys + i, + bkey->sk_flags, + bkey->sk_attno, + InvalidStrategy, + bkey->sk_subtype, + bkey->sk_collation, + cmp_proc, + bkey->sk_argument); } } @@ -1474,6 +1522,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) if (!BufferIsValid(so->currPos.buf)) { + Assert(!so->needPrimScan); + /* * We only get here if the index is completely empty. Lock relation * because nothing finer to lock exists. Without a buffer lock, it's @@ -1492,7 +1542,6 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) if (!BufferIsValid(so->currPos.buf)) { - Assert(!so->needPrimScan); _bt_parallel_done(scan); return false; } diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index c71d1b6f2e1..9aed207995f 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -44,7 +44,6 @@ static bool _bt_array_decrement(Relation rel, ScanKey skey, BTArrayKeyInfo *arra static bool _bt_array_increment(Relation rel, ScanKey skey, BTArrayKeyInfo *array); static bool _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir, bool *skip_array_set); -static void _bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir); static bool _bt_tuple_before_array_skeys(IndexScanDesc scan, ScanDirection dir, IndexTuple tuple, TupleDesc tupdesc, int tupnatts, bool readpagetup, int sktrig, bool *scanBehind); @@ -52,7 +51,6 @@ static bool _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate, IndexTuple tuple, int tupnatts, TupleDesc tupdesc, int sktrig, bool sktrig_required); #ifdef USE_ASSERT_CHECKING -static bool _bt_verify_arrays_bt_first(IndexScanDesc scan, ScanDirection dir); static bool _bt_verify_keys_with_arraykeys(IndexScanDesc scan); #endif static bool _bt_oppodir_checkkeys(IndexScanDesc scan, ScanDirection dir, @@ -1035,73 +1033,6 @@ _bt_advance_array_keys_increment(IndexScanDesc scan, ScanDirection dir, } /* - * _bt_rewind_nonrequired_arrays() -- Rewind SAOP arrays not marked required - * - * Called when _bt_advance_array_keys decides to start a new primitive index - * scan on the basis of the current scan position being before the position - * that _bt_first is capable of repositioning the scan to by applying an - * inequality operator required in the opposite-to-scan direction only. - * - * Although equality strategy scan keys (for both arrays and non-arrays alike) - * are either marked required in both directions or in neither direction, - * there is a sense in which non-required arrays behave like required arrays. - * With a qual such as "WHERE a IN (100, 200) AND b >= 3 AND c IN (5, 6, 7)", - * the scan key on "c" is non-required, but nevertheless enables positioning - * the scan at the first tuple >= "(100, 3, 5)" on the leaf level during the - * first descent of the tree by _bt_first. Later on, there could also be a - * second descent, that places the scan right before tuples >= "(200, 3, 5)". - * _bt_first must never be allowed to build an insertion scan key whose "c" - * entry is set to a value other than 5, the "c" array's first element/value. - * (Actually, it's the first in the current scan direction. This example uses - * a forward scan.) - * - * Calling here resets the array scan key elements for the scan's non-required - * arrays. This is strictly necessary for correctness in a subset of cases - * involving "required in opposite direction"-triggered primitive index scans. - * Not all callers are at risk of _bt_first using a non-required array like - * this, but advancement always resets the arrays when another primitive scan - * is scheduled, just to keep things simple. Array advancement even makes - * sure to reset non-required arrays during scans that have no inequalities. - * (Advancement still won't call here when there are no inequalities, though - * that's just because it's all handled indirectly instead.) - * - * Note: _bt_verify_arrays_bt_first is called by an assertion to enforce that - * everybody got this right. - * - * Note: In practice almost all SAOP arrays are marked required during - * preprocessing (if necessary by generating skip arrays). It is hardly ever - * truly necessary to call here, but consistently doing so is simpler. - */ -static void -_bt_rewind_nonrequired_arrays(IndexScanDesc scan, ScanDirection dir) -{ - Relation rel = scan->indexRelation; - BTScanOpaque so = (BTScanOpaque) scan->opaque; - int arrayidx = 0; - - for (int ikey = 0; ikey < so->numberOfKeys; ikey++) - { - ScanKey cur = so->keyData + ikey; - BTArrayKeyInfo *array = NULL; - - if (!(cur->sk_flags & SK_SEARCHARRAY) || - cur->sk_strategy != BTEqualStrategyNumber) - continue; - - array = &so->arrayKeys[arrayidx++]; - Assert(array->scan_key == ikey); - - if ((cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))) - continue; - - Assert(array->num_elems != -1); /* No non-required skip arrays */ - - _bt_array_set_low_or_high(rel, cur, array, - ScanDirectionIsForward(dir)); - } -} - -/* * _bt_tuple_before_array_skeys() -- too early to advance required arrays? * * We always compare the tuple using the current array keys (which we assume @@ -1380,8 +1311,6 @@ _bt_start_prim_scan(IndexScanDesc scan, ScanDirection dir) */ if (so->needPrimScan) { - Assert(_bt_verify_arrays_bt_first(scan, dir)); - /* * Flag was set -- must call _bt_first again, which will reset the * scan's needPrimScan flag @@ -2007,14 +1936,7 @@ _bt_advance_array_keys(IndexScanDesc scan, BTReadPageState *pstate, */ else if (has_required_opposite_direction_only && pstate->finaltup && unlikely(!_bt_oppodir_checkkeys(scan, dir, pstate->finaltup))) - { - /* - * Make sure that any SAOP arrays that were not marked required by - * preprocessing are reset to their first element for this direction - */ - _bt_rewind_nonrequired_arrays(scan, dir); goto new_prim_scan; - } continue_scan: @@ -2045,8 +1967,6 @@ continue_scan: */ so->oppositeDirCheck = has_required_opposite_direction_only; - _bt_rewind_nonrequired_arrays(scan, dir); - /* * skip by setting "look ahead" mechanism's offnum for forwards scans * (backwards scans check scanBehind flag directly instead) @@ -2143,48 +2063,6 @@ end_toplevel_scan: #ifdef USE_ASSERT_CHECKING /* - * Verify that the scan's qual state matches what we expect at the point that - * _bt_start_prim_scan is about to start a just-scheduled new primitive scan. - * - * We enforce a rule against non-required array scan keys: they must start out - * with whatever element is the first for the scan's current scan direction. - * See _bt_rewind_nonrequired_arrays comments for an explanation. - */ -static bool -_bt_verify_arrays_bt_first(IndexScanDesc scan, ScanDirection dir) -{ - BTScanOpaque so = (BTScanOpaque) scan->opaque; - int arrayidx = 0; - - for (int ikey = 0; ikey < so->numberOfKeys; ikey++) - { - ScanKey cur = so->keyData + ikey; - BTArrayKeyInfo *array = NULL; - int first_elem_dir; - - if (!(cur->sk_flags & SK_SEARCHARRAY) || - cur->sk_strategy != BTEqualStrategyNumber) - continue; - - array = &so->arrayKeys[arrayidx++]; - - if (((cur->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir)) || - ((cur->sk_flags & SK_BT_REQBKWD) && ScanDirectionIsBackward(dir))) - continue; - - if (ScanDirectionIsForward(dir)) - first_elem_dir = 0; - else - first_elem_dir = array->num_elems - 1; - - if (array->cur_elem != first_elem_dir) - return false; - } - - return _bt_verify_keys_with_arraykeys(scan); -} - -/* * Verify that the scan's "so->keyData[]" scan keys are in agreement with * its array key state */ @@ -2194,6 +2072,7 @@ _bt_verify_keys_with_arraykeys(IndexScanDesc scan) BTScanOpaque so = (BTScanOpaque) scan->opaque; int last_sk_attno = InvalidAttrNumber, arrayidx = 0; + bool nonrequiredseen = false; if (!so->qual_ok) return false; @@ -2217,8 +2096,16 @@ _bt_verify_keys_with_arraykeys(IndexScanDesc scan) if (array->num_elems != -1 && cur->sk_argument != array->elem_values[array->cur_elem]) return false; - if (last_sk_attno > cur->sk_attno) - return false; + if (cur->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) + { + if (last_sk_attno > cur->sk_attno) + return false; + if (nonrequiredseen) + return false; + } + else + nonrequiredseen = true; + last_sk_attno = cur->sk_attno; } @@ -2551,37 +2438,12 @@ _bt_set_startikey(IndexScanDesc scan, BTReadPageState *pstate) if (!(key->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD))) { /* Scan key isn't marked required (corner case) */ - Assert(!(key->sk_flags & SK_ROW_HEADER)); break; /* unsafe */ } if (key->sk_flags & SK_ROW_HEADER) { - /* - * RowCompare inequality. - * - * Only the first subkey from a RowCompare can ever be marked - * required (that happens when the row header is marked required). - * There is no simple, general way for us to transitively deduce - * whether or not every tuple on the page satisfies a RowCompare - * key based only on firsttup and lasttup -- so we just give up. - */ - if (!start_past_saop_eq && !so->skipScan) - break; /* unsafe to go further */ - - /* - * We have to be even more careful with RowCompares that come - * after an array: we assume it's unsafe to even bypass the array. - * Calling _bt_start_array_keys to recover the scan's arrays - * following use of forcenonrequired mode isn't compatible with - * _bt_check_rowcompare's continuescan=false behavior with NULL - * row compare members. _bt_advance_array_keys must not make a - * decision on the basis of a key not being satisfied in the - * opposite-to-scan direction until the scan reaches a leaf page - * where the same key begins to be satisfied in scan direction. - * The _bt_first !used_all_subkeys behavior makes this limitation - * hard to work around some other way. - */ - return; /* completely unsafe to set pstate.startikey */ + /* RowCompare inequalities currently aren't supported */ + break; /* "unsafe" */ } if (key->sk_strategy != BTEqualStrategyNumber) { @@ -3078,6 +2940,31 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, Assert(subkey->sk_flags & SK_ROW_MEMBER); + /* When a NULL row member is compared, the row never matches */ + if (subkey->sk_flags & SK_ISNULL) + { + /* + * Unlike the simple-scankey case, this isn't a disallowed case + * (except when it's the first row element that has the NULL arg). + * But it can never match. If all the earlier row comparison + * columns are required for the scan direction, we can stop the + * scan, because there can't be another tuple that will succeed. + */ + Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument)); + subkey--; + if (forcenonrequired) + { + /* treating scan's keys as non-required */ + } + else if ((subkey->sk_flags & SK_BT_REQFWD) && + ScanDirectionIsForward(dir)) + *continuescan = false; + else if ((subkey->sk_flags & SK_BT_REQBKWD) && + ScanDirectionIsBackward(dir)) + *continuescan = false; + return false; + } + if (subkey->sk_attno > tupnatts) { /* @@ -3087,11 +2974,7 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, * attribute passes the qual. */ Assert(BTreeTupleIsPivot(tuple)); - cmpresult = 0; - if (subkey->sk_flags & SK_ROW_END) - break; - subkey++; - continue; + return true; } datum = index_getattr(tuple, @@ -3101,6 +2984,8 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, if (isNull) { + int reqflags; + if (forcenonrequired) { /* treating scan's keys as non-required */ @@ -3111,15 +2996,35 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, * Since NULLs are sorted before non-NULLs, we know we have * reached the lower limit of the range of values for this * index attr. On a backward scan, we can stop if this qual - * is one of the "must match" subset. We can stop regardless - * of whether the qual is > or <, so long as it's required, - * because it's not possible for any future tuples to pass. On - * a forward scan, however, we must keep going, because we may - * have initially positioned to the start of the index. - * (_bt_advance_array_keys also relies on this behavior during - * forward scans.) + * is one of the "must match" subset. However, on a forwards + * scan, we must keep going, because we may have initially + * positioned to the start of the index. + * + * All required NULLS FIRST > row members can use NULL tuple + * values to end backwards scans, just like with other values. + * A qual "WHERE (a, b, c) > (9, 42, 'foo')" can terminate a + * backwards scan upon reaching the index's rightmost "a = 9" + * tuple whose "b" column contains a NULL (if not sooner). + * Since "b" is NULLS FIRST, we can treat its NULLs as "<" 42. + */ + reqflags = SK_BT_REQBKWD; + + /* + * When a most significant required NULLS FIRST < row compare + * member sees NULL tuple values during a backwards scan, it + * signals the end of matches for the whole row compare/scan. + * A qual "WHERE (a, b, c) < (9, 42, 'foo')" will terminate a + * backwards scan upon reaching the rightmost tuple whose "a" + * column has a NULL. The "a" NULL value is "<" 9, and yet + * our < row compare will still end the scan. (This isn't + * safe with later/lower-order row members. Notice that it + * can only happen with an "a" NULL some time after the scan + * completely stops needing to use its "b" and "c" members.) */ - if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) && + if (subkey == (ScanKey) DatumGetPointer(skey->sk_argument)) + reqflags |= SK_BT_REQFWD; /* safe, first row member */ + + if ((subkey->sk_flags & reqflags) && ScanDirectionIsBackward(dir)) *continuescan = false; } @@ -3129,15 +3034,35 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, * Since NULLs are sorted after non-NULLs, we know we have * reached the upper limit of the range of values for this * index attr. On a forward scan, we can stop if this qual is - * one of the "must match" subset. We can stop regardless of - * whether the qual is > or <, so long as it's required, - * because it's not possible for any future tuples to pass. On - * a backward scan, however, we must keep going, because we - * may have initially positioned to the end of the index. - * (_bt_advance_array_keys also relies on this behavior during - * backward scans.) + * one of the "must match" subset. However, on a backward + * scan, we must keep going, because we may have initially + * positioned to the end of the index. + * + * All required NULLS LAST < row members can use NULL tuple + * values to end forwards scans, just like with other values. + * A qual "WHERE (a, b, c) < (9, 42, 'foo')" can terminate a + * forwards scan upon reaching the index's leftmost "a = 9" + * tuple whose "b" column contains a NULL (if not sooner). + * Since "b" is NULLS LAST, we can treat its NULLs as ">" 42. */ - if ((subkey->sk_flags & (SK_BT_REQFWD | SK_BT_REQBKWD)) && + reqflags = SK_BT_REQFWD; + + /* + * When a most significant required NULLS LAST > row compare + * member sees NULL tuple values during a forwards scan, it + * signals the end of matches for the whole row compare/scan. + * A qual "WHERE (a, b, c) > (9, 42, 'foo')" will terminate a + * forwards scan upon reaching the leftmost tuple whose "a" + * column has a NULL. The "a" NULL value is ">" 9, and yet + * our > row compare will end the scan. (This isn't safe with + * later/lower-order row members. Notice that it can only + * happen with an "a" NULL some time after the scan completely + * stops needing to use its "b" and "c" members.) + */ + if (subkey == (ScanKey) DatumGetPointer(skey->sk_argument)) + reqflags |= SK_BT_REQBKWD; /* safe, first row member */ + + if ((subkey->sk_flags & reqflags) && ScanDirectionIsForward(dir)) *continuescan = false; } @@ -3148,30 +3073,6 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, int tupnatts, return false; } - if (subkey->sk_flags & SK_ISNULL) - { - /* - * Unlike the simple-scankey case, this isn't a disallowed case - * (except when it's the first row element that has the NULL arg). - * But it can never match. If all the earlier row comparison - * columns are required for the scan direction, we can stop the - * scan, because there can't be another tuple that will succeed. - */ - Assert(subkey != (ScanKey) DatumGetPointer(skey->sk_argument)); - subkey--; - if (forcenonrequired) - { - /* treating scan's keys as non-required */ - } - else if ((subkey->sk_flags & SK_BT_REQFWD) && - ScanDirectionIsForward(dir)) - *continuescan = false; - else if ((subkey->sk_flags & SK_BT_REQBKWD) && - ScanDirectionIsBackward(dir)) - *continuescan = false; - return false; - } - /* Perform the test --- three-way comparison not bool operator */ cmpresult = DatumGetInt32(FunctionCall2Coll(&subkey->sk_func, subkey->sk_collation, diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index 113fae1437a..225ff7ca9f2 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -707,6 +707,13 @@ ActivateCommitTs(void) TransactionId xid; int64 pageno; + /* + * During bootstrap, we should not register commit timestamps so skip the + * activation in this case. + */ + if (IsBootstrapProcessingMode()) + return; + /* If we've done this already, there's nothing to do */ LWLockAcquire(CommitTsLock, LW_EXCLUSIVE); if (commitTsShared->commitTsActive) diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c index 6ce979f2d8b..93d38914854 100644 --- a/src/backend/access/transam/xlogrecovery.c +++ b/src/backend/access/transam/xlogrecovery.c @@ -4994,13 +4994,25 @@ check_recovery_target_timeline(char **newval, void **extra, GucSource source) rttg = RECOVERY_TARGET_TIMELINE_LATEST; else { + char *endp; + uint64 timeline; + rttg = RECOVERY_TARGET_TIMELINE_NUMERIC; errno = 0; - strtoul(*newval, NULL, 0); - if (errno == EINVAL || errno == ERANGE) + timeline = strtou64(*newval, &endp, 0); + + if (*endp != '\0' || errno == EINVAL || errno == ERANGE) + { + GUC_check_errdetail("\"%s\" is not a valid number.", + "recovery_target_timeline"); + return false; + } + + if (timeline < 1 || timeline > PG_UINT32_MAX) { - GUC_check_errdetail("\"recovery_target_timeline\" is not a valid number."); + GUC_check_errdetail("\"%s\" must be between %u and %u.", + "recovery_target_timeline", 1, UINT_MAX); return false; } } diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c index 6db864892d0..fc8638c1b61 100644 --- a/src/backend/bootstrap/bootstrap.c +++ b/src/backend/bootstrap/bootstrap.c @@ -109,6 +109,8 @@ static const struct typinfo TypInfo[] = { F_REGROLEIN, F_REGROLEOUT}, {"regnamespace", REGNAMESPACEOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid, F_REGNAMESPACEIN, F_REGNAMESPACEOUT}, + {"regdatabase", REGDATABASEOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid, + F_REGDATABASEIN, F_REGDATABASEOUT}, {"text", TEXTOID, 0, -1, false, TYPALIGN_INT, TYPSTORAGE_EXTENDED, DEFAULT_COLLATION_OID, F_TEXTIN, F_TEXTOUT}, {"oid", OIDOID, 0, 4, true, TYPALIGN_INT, TYPSTORAGE_PLAIN, InvalidOid, diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 18316a3968b..7dded634eb8 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -1850,6 +1850,17 @@ find_expr_references_walker(Node *node, errmsg("constant of the type %s cannot be used here", "regrole"))); break; + + /* + * Dependencies for regdatabase should be shared among all + * databases, so explicitly inhibit to have dependencies. + */ + case REGDATABASEOID: + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("constant of the type %s cannot be used here", + "regdatabase"))); + break; } } return false; diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 08f780a2e63..e5dbbe61b81 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -895,7 +895,7 @@ CREATE VIEW pg_stat_activity AS S.wait_event, S.state, S.backend_xid, - s.backend_xmin, + S.backend_xmin, S.query_id, S.query, S.backend_type diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index 74ae42b19a7..fae9c41db65 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -322,11 +322,13 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt, } /* - * Extract a CopyHeaderChoice value from a DefElem. This is like - * defGetBoolean() but also accepts the special value "match". + * Extract the CopyFormatOptions.header_line value from a DefElem. + * + * Parses the HEADER option for COPY, which can be a boolean, a non-negative + * integer (number of lines to skip), or the special value "match". */ -static CopyHeaderChoice -defGetCopyHeaderChoice(DefElem *def, bool is_from) +static int +defGetCopyHeaderOption(DefElem *def, bool is_from) { /* * If no parameter value given, assume "true" is meant. @@ -335,20 +337,27 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from) return COPY_HEADER_TRUE; /* - * Allow 0, 1, "true", "false", "on", "off", or "match". + * Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or + * "match". */ switch (nodeTag(def->arg)) { case T_Integer: - switch (intVal(def->arg)) { - case 0: - return COPY_HEADER_FALSE; - case 1: - return COPY_HEADER_TRUE; - default: - /* otherwise, error out below */ - break; + int ival = intVal(def->arg); + + if (ival < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("a negative integer value cannot be " + "specified for %s", def->defname))); + + if (!is_from && ival > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot use multi-line header in COPY TO"))); + + return ival; } break; default: @@ -381,7 +390,8 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from) } ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("%s requires a Boolean value or \"match\"", + errmsg("%s requires a Boolean value, a non-negative integer, " + "or the string \"match\"", def->defname))); return COPY_HEADER_FALSE; /* keep compiler quiet */ } @@ -566,7 +576,7 @@ ProcessCopyOptions(ParseState *pstate, if (header_specified) errorConflictingDefElem(defel, pstate); header_specified = true; - opts_out->header_line = defGetCopyHeaderChoice(defel, is_from); + opts_out->header_line = defGetCopyHeaderOption(defel, is_from); } else if (strcmp(defel->defname, "quote") == 0) { @@ -769,7 +779,7 @@ ProcessCopyOptions(ParseState *pstate, errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim))); /* Check header */ - if (opts_out->binary && opts_out->header_line) + if (opts_out->binary && opts_out->header_line != COPY_HEADER_FALSE) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), /*- translator: %s is the name of a COPY option, e.g. ON_ERROR */ diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index f5fc346e201..b1ae97b833d 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -771,21 +771,30 @@ static pg_attribute_always_inline bool NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv) { int fldct; - bool done; + bool done = false; /* only available for text or csv input */ Assert(!cstate->opts.binary); /* on input check that the header line is correct if needed */ - if (cstate->cur_lineno == 0 && cstate->opts.header_line) + if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE) { ListCell *cur; TupleDesc tupDesc; + int lines_to_skip = cstate->opts.header_line; + + /* If set to "match", one header line is skipped */ + if (cstate->opts.header_line == COPY_HEADER_MATCH) + lines_to_skip = 1; tupDesc = RelationGetDescr(cstate->rel); - cstate->cur_lineno++; - done = CopyReadLine(cstate, is_csv); + for (int i = 0; i < lines_to_skip; i++) + { + cstate->cur_lineno++; + if ((done = CopyReadLine(cstate, is_csv))) + break; + } if (cstate->opts.header_line == COPY_HEADER_MATCH) { @@ -1538,7 +1547,7 @@ GetDecimalFromHex(char hex) if (isdigit((unsigned char) hex)) return hex - '0'; else - return tolower((unsigned char) hex) - 'a' + 10; + return pg_ascii_tolower((unsigned char) hex) - 'a' + 10; } /* diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index ea6f18f2c80..67b94b91cae 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc) cstate->file_encoding); /* if a header has been requested send the line */ - if (cstate->opts.header_line) + if (cstate->opts.header_line == COPY_HEADER_TRUE) { ListCell *cur; bool hdr_delim = false; diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index b8837f26cb4..cb811520c29 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -2711,8 +2711,7 @@ MergeAttributes(List *columns, const List *supers, char relpersistence, RelationGetRelationName(relation)))); /* If existing rel is temp, it must belong to this session */ - if (relation->rd_rel->relpersistence == RELPERSISTENCE_TEMP && - !relation->rd_islocaltemp) + if (RELATION_IS_OTHER_TEMP(relation)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg(!is_partition @@ -15488,6 +15487,14 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode) Oid relid; relid = IndexGetRelation(oldId, false); + + /* + * As above, make sure we have lock on the index's table if it's not + * the same table. + */ + if (relid != tab->relid) + LockRelationOid(relid, AccessExclusiveLock); + ATPostAlterTypeParse(oldId, relid, InvalidOid, (char *) lfirst(def_item), wqueue, lockmode, tab->rewrite); @@ -15504,6 +15511,20 @@ ATPostAlterTypeCleanup(List **wqueue, AlteredTableInfo *tab, LOCKMODE lockmode) Oid relid; relid = StatisticsGetRelation(oldId, false); + + /* + * As above, make sure we have lock on the statistics object's table + * if it's not the same table. However, we take + * ShareUpdateExclusiveLock here, aligning with the lock level used in + * CreateStatistics and RemoveStatisticsById. + * + * CAUTION: this should be done after all cases that grab + * AccessExclusiveLock, else we risk causing deadlock due to needing + * to promote our table lock. + */ + if (relid != tab->relid) + LockRelationOid(relid, ShareUpdateExclusiveLock); + ATPostAlterTypeParse(oldId, relid, InvalidOid, (char *) lfirst(def_item), wqueue, lockmode, tab->rewrite); @@ -15727,7 +15748,7 @@ ATPostAlterTypeParse(Oid oldId, Oid oldRelId, Oid refRelId, char *cmd, { AlterDomainStmt *stmt = (AlterDomainStmt *) stm; - if (stmt->subtype == 'C') /* ADD CONSTRAINT */ + if (stmt->subtype == AD_AddConstraint) { Constraint *con = castNode(Constraint, stmt->def); AlterTableCmd *cmd = makeNode(AlterTableCmd); @@ -17230,15 +17251,13 @@ ATExecAddInherit(Relation child_rel, RangeVar *parent, LOCKMODE lockmode) RelationGetRelationName(parent_rel)))); /* If parent rel is temp, it must belong to this session */ - if (parent_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && - !parent_rel->rd_islocaltemp) + if (RELATION_IS_OTHER_TEMP(parent_rel)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot inherit from temporary relation of another session"))); /* Ditto for the child */ - if (child_rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && - !child_rel->rd_islocaltemp) + if (RELATION_IS_OTHER_TEMP(child_rel)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot inherit to temporary relation of another session"))); @@ -20309,15 +20328,13 @@ ATExecAttachPartition(List **wqueue, Relation rel, PartitionCmd *cmd, RelationGetRelationName(rel)))); /* If the parent is temp, it must belong to this session */ - if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && - !rel->rd_islocaltemp) + if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot attach as partition of temporary relation of another session"))); /* Ditto for the partition */ - if (attachrel->rd_rel->relpersistence == RELPERSISTENCE_TEMP && - !attachrel->rd_islocaltemp) + if (RELATION_IS_OTHER_TEMP(attachrel)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot attach temporary relation of another session as partition"))); diff --git a/src/backend/commands/typecmds.c b/src/backend/commands/typecmds.c index 45ae7472ab5..26d985193ae 100644 --- a/src/backend/commands/typecmds.c +++ b/src/backend/commands/typecmds.c @@ -939,11 +939,19 @@ DefineDomain(ParseState *pstate, CreateDomainStmt *stmt) break; case CONSTR_NOTNULL: - if (nullDefined && !typNotNull) + if (nullDefined) + { + if (!typNotNull) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting NULL/NOT NULL constraints"), + parser_errposition(pstate, constr->location)); + ereport(ERROR, - errcode(ERRCODE_SYNTAX_ERROR), - errmsg("conflicting NULL/NOT NULL constraints"), + errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("redundant NOT NULL constraint definition"), parser_errposition(pstate, constr->location)); + } if (constr->is_no_inherit) ereport(ERROR, errcode(ERRCODE_INVALID_OBJECT_DEFINITION), diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index bdf862b2406..ca33a854278 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -279,7 +279,7 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) * executor is performing an UPDATE that could not use an * optimization like heapam's HOT (in more general terms a * call to table_tuple_update() took place and set - * 'update_indexes' to TUUI_All). Receiving this hint makes + * 'update_indexes' to TU_All). Receiving this hint makes * us consider if we should pass down the 'indexUnchanged' * hint in turn. That's something that we figure out for * each index_insert() call iff 'update' is true. @@ -290,7 +290,7 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) * HOT has been applied and any updated columns are indexed * only by summarizing indexes (or in more general terms a * call to table_tuple_update() took place and set - * 'update_indexes' to TUUI_Summarizing). We can (and must) + * 'update_indexes' to TU_Summarizing). We can (and must) * therefore only update the indexes that have * 'amsummarizing' = true. * diff --git a/src/backend/jit/llvm/meson.build b/src/backend/jit/llvm/meson.build index c8e06dfbe35..805fbd69006 100644 --- a/src/backend/jit/llvm/meson.build +++ b/src/backend/jit/llvm/meson.build @@ -53,7 +53,7 @@ llvm_irgen_args = [ if ccache.found() llvm_irgen_command = ccache - llvm_irgen_args = [clang.path()] + llvm_irgen_args + llvm_irgen_args = [clang.full_path()] + llvm_irgen_args else llvm_irgen_command = clang endif diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index 7aa8f5d799c..ebedc5574ca 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -154,13 +154,17 @@ add_paths_to_joinrel(PlannerInfo *root, /* * See if the inner relation is provably unique for this outer rel. * - * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't - * matter since the executor can make the equivalent optimization anyway; - * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we - * must be considering a semijoin whose inner side is not provably unique - * (else reduce_unique_semijoins would've simplified it), so there's no - * point in calling innerrel_is_unique. However, if the LHS covers all of - * the semijoin's min_lefthand, then it's appropriate to set inner_unique + * We have some special cases: for JOIN_SEMI, it doesn't matter since the + * executor can make the equivalent optimization anyway. It also doesn't + * help enable use of Memoize, since a semijoin with a provably unique + * inner side should have been reduced to an inner join in that case. + * Therefore, we need not expend planner cycles on proofs. (For + * JOIN_ANTI, although it doesn't help the executor for the same reason, + * it can benefit Memoize paths.) For JOIN_UNIQUE_INNER, we must be + * considering a semijoin whose inner side is not provably unique (else + * reduce_unique_semijoins would've simplified it), so there's no point in + * calling innerrel_is_unique. However, if the LHS covers all of the + * semijoin's min_lefthand, then it's appropriate to set inner_unique * because the path produced by create_unique_path will be unique relative * to the LHS. (If we have an LHS that's only part of the min_lefthand, * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid @@ -169,12 +173,6 @@ add_paths_to_joinrel(PlannerInfo *root, switch (jointype) { case JOIN_SEMI: - case JOIN_ANTI: - - /* - * XXX it may be worth proving this to allow a Memoize to be - * considered for Nested Loop Semi/Anti Joins. - */ extra.inner_unique = false; /* well, unproven */ break; case JOIN_UNIQUE_INNER: @@ -715,16 +713,21 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel, return NULL; /* - * Currently we don't do this for SEMI and ANTI joins unless they're - * marked as inner_unique. This is because nested loop SEMI/ANTI joins - * don't scan the inner node to completion, which will mean memoize cannot - * mark the cache entry as complete. - * - * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique - * = true. Should we? See add_paths_to_joinrel() + * Currently we don't do this for SEMI and ANTI joins, because nested loop + * SEMI/ANTI joins don't scan the inner node to completion, which means + * memoize cannot mark the cache entry as complete. Nor can we mark the + * cache entry as complete after fetching the first inner tuple, because + * if that tuple and the current outer tuple don't satisfy the join + * clauses, a second inner tuple that satisfies the parameters would find + * the cache entry already marked as complete. The only exception is when + * the inner relation is provably unique, as in that case, there won't be + * a second matching tuple and we can safely mark the cache entry as + * complete after fetching the first inner tuple. Note that in such + * cases, the SEMI join should have been reduced to an inner join by + * reduce_unique_semijoins. */ - if (!extra->inner_unique && (jointype == JOIN_SEMI || - jointype == JOIN_ANTI)) + if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) && + !extra->inner_unique) return NULL; /* diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 26a3e050086..f45131c34c5 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -3333,6 +3333,13 @@ eval_const_expressions_mutator(Node *node, -1, coalesceexpr->coalescecollid); + /* + * If there's exactly one surviving argument, we no longer + * need COALESCE at all: the result is that argument + */ + if (list_length(newargs) == 1) + return (Node *) linitial(newargs); + newcoalesce = makeNode(CoalesceExpr); newcoalesce->coalescetype = coalesceexpr->coalescetype; newcoalesce->coalescecollid = coalesceexpr->coalescecollid; diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 50f53159d58..70a0d832a11 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -2668,6 +2668,12 @@ alter_table_cmd: c->alterDeferrability = true; if ($4 & CAS_NO_INHERIT) c->alterInheritability = true; + /* handle unsupported case with specific error message */ + if ($4 & CAS_NOT_VALID) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("constraints cannot be altered to be NOT VALID"), + parser_errposition(@4)); processCASbits($4, @4, "FOREIGN KEY", &c->deferrable, &c->initdeferred, @@ -6035,6 +6041,26 @@ CreateTrigStmt: EXECUTE FUNCTION_or_PROCEDURE func_name '(' TriggerFuncArgs ')' { CreateTrigStmt *n = makeNode(CreateTrigStmt); + bool dummy; + + if (($11 & CAS_NOT_VALID) != 0) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("constraint triggers cannot be marked %s", + "NOT VALID"), + parser_errposition(@11)); + if (($11 & CAS_NO_INHERIT) != 0) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("constraint triggers cannot be marked %s", + "NO INHERIT"), + parser_errposition(@11)); + if (($11 & CAS_NOT_ENFORCED) != 0) + ereport(ERROR, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("constraint triggers cannot be marked %s", + "NOT ENFORCED"), + parser_errposition(@11)); n->replace = $2; if (n->replace) /* not supported, see CreateTrigger */ @@ -6054,7 +6080,7 @@ CreateTrigStmt: n->whenClause = $15; n->transitionRels = NIL; processCASbits($11, @11, "TRIGGER", - &n->deferrable, &n->initdeferred, NULL, + &n->deferrable, &n->initdeferred, &dummy, NULL, NULL, yyscanner); n->constrrel = $10; $$ = (Node *) n; @@ -7477,6 +7503,8 @@ fetch_args: cursor_name n->portalname = $1; n->direction = FETCH_FORWARD; n->howMany = 1; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_NONE; $$ = (Node *) n; } | from_in cursor_name @@ -7486,6 +7514,19 @@ fetch_args: cursor_name n->portalname = $2; n->direction = FETCH_FORWARD; n->howMany = 1; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_NONE; + $$ = (Node *) n; + } + | SignedIconst opt_from_in cursor_name + { + FetchStmt *n = makeNode(FetchStmt); + + n->portalname = $3; + n->direction = FETCH_FORWARD; + n->howMany = $1; + n->location = @1; + n->direction_keyword = FETCH_KEYWORD_NONE; $$ = (Node *) n; } | NEXT opt_from_in cursor_name @@ -7495,6 +7536,8 @@ fetch_args: cursor_name n->portalname = $3; n->direction = FETCH_FORWARD; n->howMany = 1; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_NEXT; $$ = (Node *) n; } | PRIOR opt_from_in cursor_name @@ -7504,6 +7547,8 @@ fetch_args: cursor_name n->portalname = $3; n->direction = FETCH_BACKWARD; n->howMany = 1; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_PRIOR; $$ = (Node *) n; } | FIRST_P opt_from_in cursor_name @@ -7513,6 +7558,8 @@ fetch_args: cursor_name n->portalname = $3; n->direction = FETCH_ABSOLUTE; n->howMany = 1; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_FIRST; $$ = (Node *) n; } | LAST_P opt_from_in cursor_name @@ -7522,6 +7569,8 @@ fetch_args: cursor_name n->portalname = $3; n->direction = FETCH_ABSOLUTE; n->howMany = -1; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_LAST; $$ = (Node *) n; } | ABSOLUTE_P SignedIconst opt_from_in cursor_name @@ -7531,6 +7580,8 @@ fetch_args: cursor_name n->portalname = $4; n->direction = FETCH_ABSOLUTE; n->howMany = $2; + n->location = @2; + n->direction_keyword = FETCH_KEYWORD_ABSOLUTE; $$ = (Node *) n; } | RELATIVE_P SignedIconst opt_from_in cursor_name @@ -7540,15 +7591,8 @@ fetch_args: cursor_name n->portalname = $4; n->direction = FETCH_RELATIVE; n->howMany = $2; - $$ = (Node *) n; - } - | SignedIconst opt_from_in cursor_name - { - FetchStmt *n = makeNode(FetchStmt); - - n->portalname = $3; - n->direction = FETCH_FORWARD; - n->howMany = $1; + n->location = @2; + n->direction_keyword = FETCH_KEYWORD_RELATIVE; $$ = (Node *) n; } | ALL opt_from_in cursor_name @@ -7558,6 +7602,8 @@ fetch_args: cursor_name n->portalname = $3; n->direction = FETCH_FORWARD; n->howMany = FETCH_ALL; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_ALL; $$ = (Node *) n; } | FORWARD opt_from_in cursor_name @@ -7567,6 +7613,8 @@ fetch_args: cursor_name n->portalname = $3; n->direction = FETCH_FORWARD; n->howMany = 1; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_FORWARD; $$ = (Node *) n; } | FORWARD SignedIconst opt_from_in cursor_name @@ -7576,6 +7624,8 @@ fetch_args: cursor_name n->portalname = $4; n->direction = FETCH_FORWARD; n->howMany = $2; + n->location = @2; + n->direction_keyword = FETCH_KEYWORD_FORWARD; $$ = (Node *) n; } | FORWARD ALL opt_from_in cursor_name @@ -7585,6 +7635,8 @@ fetch_args: cursor_name n->portalname = $4; n->direction = FETCH_FORWARD; n->howMany = FETCH_ALL; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_FORWARD_ALL; $$ = (Node *) n; } | BACKWARD opt_from_in cursor_name @@ -7594,6 +7646,8 @@ fetch_args: cursor_name n->portalname = $3; n->direction = FETCH_BACKWARD; n->howMany = 1; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_BACKWARD; $$ = (Node *) n; } | BACKWARD SignedIconst opt_from_in cursor_name @@ -7603,6 +7657,8 @@ fetch_args: cursor_name n->portalname = $4; n->direction = FETCH_BACKWARD; n->howMany = $2; + n->location = @2; + n->direction_keyword = FETCH_KEYWORD_BACKWARD; $$ = (Node *) n; } | BACKWARD ALL opt_from_in cursor_name @@ -7612,6 +7668,8 @@ fetch_args: cursor_name n->portalname = $4; n->direction = FETCH_BACKWARD; n->howMany = FETCH_ALL; + n->location = -1; + n->direction_keyword = FETCH_KEYWORD_BACKWARD_ALL; $$ = (Node *) n; } ; @@ -11627,7 +11685,7 @@ AlterDomainStmt: { AlterDomainStmt *n = makeNode(AlterDomainStmt); - n->subtype = 'T'; + n->subtype = AD_AlterDefault; n->typeName = $3; n->def = $4; $$ = (Node *) n; @@ -11637,7 +11695,7 @@ AlterDomainStmt: { AlterDomainStmt *n = makeNode(AlterDomainStmt); - n->subtype = 'N'; + n->subtype = AD_DropNotNull; n->typeName = $3; $$ = (Node *) n; } @@ -11646,7 +11704,7 @@ AlterDomainStmt: { AlterDomainStmt *n = makeNode(AlterDomainStmt); - n->subtype = 'O'; + n->subtype = AD_SetNotNull; n->typeName = $3; $$ = (Node *) n; } @@ -11655,7 +11713,7 @@ AlterDomainStmt: { AlterDomainStmt *n = makeNode(AlterDomainStmt); - n->subtype = 'C'; + n->subtype = AD_AddConstraint; n->typeName = $3; n->def = $5; $$ = (Node *) n; @@ -11665,7 +11723,7 @@ AlterDomainStmt: { AlterDomainStmt *n = makeNode(AlterDomainStmt); - n->subtype = 'X'; + n->subtype = AD_DropConstraint; n->typeName = $3; n->name = $6; n->behavior = $7; @@ -11677,7 +11735,7 @@ AlterDomainStmt: { AlterDomainStmt *n = makeNode(AlterDomainStmt); - n->subtype = 'X'; + n->subtype = AD_DropConstraint; n->typeName = $3; n->name = $8; n->behavior = $9; @@ -11689,7 +11747,7 @@ AlterDomainStmt: { AlterDomainStmt *n = makeNode(AlterDomainStmt); - n->subtype = 'V'; + n->subtype = AD_ValidateConstraint; n->typeName = $3; n->name = $6; $$ = (Node *) n; diff --git a/src/backend/postmaster/pgarch.c b/src/backend/postmaster/pgarch.c index 7e622ae4bd2..78e39e5f866 100644 --- a/src/backend/postmaster/pgarch.c +++ b/src/backend/postmaster/pgarch.c @@ -718,15 +718,15 @@ pgarch_readyXlog(char *xlog) /* * Store the file in our max-heap if it has a high enough priority. */ - if (arch_files->arch_heap->bh_size < NUM_FILES_PER_DIRECTORY_SCAN) + if (binaryheap_size(arch_files->arch_heap) < NUM_FILES_PER_DIRECTORY_SCAN) { /* If the heap isn't full yet, quickly add it. */ - arch_file = arch_files->arch_filenames[arch_files->arch_heap->bh_size]; + arch_file = arch_files->arch_filenames[binaryheap_size(arch_files->arch_heap)]; strcpy(arch_file, basename); binaryheap_add_unordered(arch_files->arch_heap, CStringGetDatum(arch_file)); /* If we just filled the heap, make it a valid one. */ - if (arch_files->arch_heap->bh_size == NUM_FILES_PER_DIRECTORY_SCAN) + if (binaryheap_size(arch_files->arch_heap) == NUM_FILES_PER_DIRECTORY_SCAN) binaryheap_build(arch_files->arch_heap); } else if (ready_file_comparator(binaryheap_first(arch_files->arch_heap), @@ -744,21 +744,21 @@ pgarch_readyXlog(char *xlog) FreeDir(rldir); /* If no files were found, simply return. */ - if (arch_files->arch_heap->bh_size == 0) + if (binaryheap_empty(arch_files->arch_heap)) return false; /* * If we didn't fill the heap, we didn't make it a valid one. Do that * now. */ - if (arch_files->arch_heap->bh_size < NUM_FILES_PER_DIRECTORY_SCAN) + if (binaryheap_size(arch_files->arch_heap) < NUM_FILES_PER_DIRECTORY_SCAN) binaryheap_build(arch_files->arch_heap); /* * Fill arch_files array with the files to archive in ascending order of * priority. */ - arch_files->arch_files_size = arch_files->arch_heap->bh_size; + arch_files->arch_files_size = binaryheap_size(arch_files->arch_heap); for (int i = 0; i < arch_files->arch_files_size; i++) arch_files->arch_files[i] = DatumGetCString(binaryheap_remove_first(arch_files->arch_heap)); diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c index 78193cfb964..d9eab5357bc 100644 --- a/src/backend/regex/regc_pg_locale.c +++ b/src/backend/regex/regc_pg_locale.c @@ -20,58 +20,13 @@ #include "common/unicode_category.h" #include "utils/pg_locale.h" -/* - * For the libc provider, to provide as much functionality as possible on a - * variety of platforms without going so far as to implement everything from - * scratch, we use several implementation strategies depending on the - * situation: - * - * 1. In C/POSIX collations, we use hard-wired code. We can't depend on - * the <ctype.h> functions since those will obey LC_CTYPE. Note that these - * collations don't give a fig about multibyte characters. - * - * 2. When working in UTF8 encoding, we use the <wctype.h> functions. - * This assumes that every platform uses Unicode codepoints directly - * as the wchar_t representation of Unicode. (XXX: ICU makes this assumption - * even for non-UTF8 encodings, which may be a problem.) On some platforms - * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF. - * - * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar - * values up to 255, and punt for values above that. This is 100% correct - * only in single-byte encodings such as LATINn. However, non-Unicode - * multibyte encodings are mostly Far Eastern character sets for which the - * properties being tested here aren't very relevant for higher code values - * anyway. The difficulty with using the <wctype.h> functions with - * non-Unicode multibyte encodings is that we can have no certainty that - * the platform's wchar_t representation matches what we do in pg_wchar - * conversions. - * - * As a special case, in the "default" collation, (2) and (3) force ASCII - * letters to follow ASCII upcase/downcase rules, while in a non-default - * collation we just let the library functions do what they will. The case - * where this matters is treatment of I/i in Turkish, and the behavior is - * meant to match the upper()/lower() SQL functions. - * - * We store the active collation setting in static variables. In principle - * it could be passed down to here via the regex library's "struct vars" data - * structure; but that would require somewhat invasive changes in the regex - * library, and right now there's no real benefit to be gained from that. - * - * NB: the coding here assumes pg_wchar is an unsigned type. - */ - -typedef enum -{ - PG_REGEX_STRATEGY_C, /* C locale (encoding independent) */ - PG_REGEX_STRATEGY_BUILTIN, /* built-in Unicode semantics */ - PG_REGEX_STRATEGY_LIBC_WIDE, /* Use locale_t <wctype.h> functions */ - PG_REGEX_STRATEGY_LIBC_1BYTE, /* Use locale_t <ctype.h> functions */ - PG_REGEX_STRATEGY_ICU, /* Use ICU uchar.h functions */ -} PG_Locale_Strategy; - -static PG_Locale_Strategy pg_regex_strategy; static pg_locale_t pg_regex_locale; +static struct pg_locale_struct dummy_c_locale = { + .collate_is_c = true, + .ctype_is_c = true, +}; + /* * Hard-wired character properties for C locale */ @@ -228,7 +183,6 @@ void pg_set_regex_collation(Oid collation) { pg_locale_t locale = 0; - PG_Locale_Strategy strategy; if (!OidIsValid(collation)) { @@ -249,8 +203,7 @@ pg_set_regex_collation(Oid collation) * catalog access is available, so we can't call * pg_newlocale_from_collation(). */ - strategy = PG_REGEX_STRATEGY_C; - locale = 0; + locale = &dummy_c_locale; } else { @@ -267,113 +220,41 @@ pg_set_regex_collation(Oid collation) * C/POSIX collations use this path regardless of database * encoding */ - strategy = PG_REGEX_STRATEGY_C; - locale = 0; - } - else if (locale->provider == COLLPROVIDER_BUILTIN) - { - Assert(GetDatabaseEncoding() == PG_UTF8); - strategy = PG_REGEX_STRATEGY_BUILTIN; - } -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - { - strategy = PG_REGEX_STRATEGY_ICU; - } -#endif - else - { - Assert(locale->provider == COLLPROVIDER_LIBC); - if (GetDatabaseEncoding() == PG_UTF8) - strategy = PG_REGEX_STRATEGY_LIBC_WIDE; - else - strategy = PG_REGEX_STRATEGY_LIBC_1BYTE; + locale = &dummy_c_locale; } } - pg_regex_strategy = strategy; pg_regex_locale = locale; } static int pg_wc_isdigit(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISDIGIT)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isdigit(c, !pg_regex_locale->info.builtin.casemap_full); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswdigit_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isdigit_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isdigit(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISDIGIT)); + else + return pg_regex_locale->ctype->wc_isdigit(c, pg_regex_locale); } static int pg_wc_isalpha(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISALPHA)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isalpha(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalpha_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isalpha_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isalpha(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISALPHA)); + else + return pg_regex_locale->ctype->wc_isalpha(c, pg_regex_locale); } static int pg_wc_isalnum(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISALNUM)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isalnum(c, !pg_regex_locale->info.builtin.casemap_full); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswalnum_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isalnum_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isalnum(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISALNUM)); + else + return pg_regex_locale->ctype->wc_isalnum(c, pg_regex_locale); } static int @@ -388,231 +269,87 @@ pg_wc_isword(pg_wchar c) static int pg_wc_isupper(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISUPPER)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isupper(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswupper_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isupper_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isupper(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISUPPER)); + else + return pg_regex_locale->ctype->wc_isupper(c, pg_regex_locale); } static int pg_wc_islower(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISLOWER)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_islower(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswlower_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - islower_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_islower(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISLOWER)); + else + return pg_regex_locale->ctype->wc_islower(c, pg_regex_locale); } static int pg_wc_isgraph(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISGRAPH)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isgraph(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswgraph_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isgraph_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isgraph(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISGRAPH)); + else + return pg_regex_locale->ctype->wc_isgraph(c, pg_regex_locale); } static int pg_wc_isprint(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISPRINT)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isprint(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswprint_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isprint_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isprint(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISPRINT)); + else + return pg_regex_locale->ctype->wc_isprint(c, pg_regex_locale); } static int pg_wc_ispunct(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISPUNCT)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_ispunct(c, !pg_regex_locale->info.builtin.casemap_full); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswpunct_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - ispunct_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_ispunct(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISPUNCT)); + else + return pg_regex_locale->ctype->wc_ispunct(c, pg_regex_locale); } static int pg_wc_isspace(pg_wchar c) { - switch (pg_regex_strategy) - { - case PG_REGEX_STRATEGY_C: - return (c <= (pg_wchar) 127 && - (pg_char_properties[c] & PG_ISSPACE)); - case PG_REGEX_STRATEGY_BUILTIN: - return pg_u_isspace(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return iswspace_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - return (c <= (pg_wchar) UCHAR_MAX && - isspace_l((unsigned char) c, pg_regex_locale->info.lt)); - break; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_isspace(c); -#endif - break; - } - return 0; /* can't get here, but keep compiler quiet */ + if (pg_regex_locale->ctype_is_c) + return (c <= (pg_wchar) 127 && + (pg_char_properties[c] & PG_ISSPACE)); + else + return pg_regex_locale->ctype->wc_isspace(c, pg_regex_locale); } static pg_wchar pg_wc_toupper(pg_wchar c) { - switch (pg_regex_strategy) + if (pg_regex_locale->ctype_is_c) { - case PG_REGEX_STRATEGY_C: - if (c <= (pg_wchar) 127) - return pg_ascii_toupper((unsigned char) c); - return c; - case PG_REGEX_STRATEGY_BUILTIN: - return unicode_uppercase_simple(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - /* force C behavior for ASCII characters, per comments above */ - if (pg_regex_locale->is_default && c <= (pg_wchar) 127) - return pg_ascii_toupper((unsigned char) c); - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towupper_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - /* force C behavior for ASCII characters, per comments above */ - if (pg_regex_locale->is_default && c <= (pg_wchar) 127) - return pg_ascii_toupper((unsigned char) c); - if (c <= (pg_wchar) UCHAR_MAX) - return toupper_l((unsigned char) c, pg_regex_locale->info.lt); - return c; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_toupper(c); -#endif - break; + if (c <= (pg_wchar) 127) + return pg_ascii_toupper((unsigned char) c); + return c; } - return 0; /* can't get here, but keep compiler quiet */ + else + return pg_regex_locale->ctype->wc_toupper(c, pg_regex_locale); } static pg_wchar pg_wc_tolower(pg_wchar c) { - switch (pg_regex_strategy) + if (pg_regex_locale->ctype_is_c) { - case PG_REGEX_STRATEGY_C: - if (c <= (pg_wchar) 127) - return pg_ascii_tolower((unsigned char) c); - return c; - case PG_REGEX_STRATEGY_BUILTIN: - return unicode_lowercase_simple(c); - case PG_REGEX_STRATEGY_LIBC_WIDE: - /* force C behavior for ASCII characters, per comments above */ - if (pg_regex_locale->is_default && c <= (pg_wchar) 127) - return pg_ascii_tolower((unsigned char) c); - if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF) - return towlower_l((wint_t) c, pg_regex_locale->info.lt); - /* FALL THRU */ - case PG_REGEX_STRATEGY_LIBC_1BYTE: - /* force C behavior for ASCII characters, per comments above */ - if (pg_regex_locale->is_default && c <= (pg_wchar) 127) - return pg_ascii_tolower((unsigned char) c); - if (c <= (pg_wchar) UCHAR_MAX) - return tolower_l((unsigned char) c, pg_regex_locale->info.lt); - return c; - case PG_REGEX_STRATEGY_ICU: -#ifdef USE_ICU - return u_tolower(c); -#endif - break; + if (c <= (pg_wchar) 127) + return pg_ascii_tolower((unsigned char) c); + return c; } - return 0; /* can't get here, but keep compiler quiet */ + else + return pg_regex_locale->ctype->wc_tolower(c, pg_regex_locale); } @@ -738,37 +475,25 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode) * would always be true for production values of MAX_SIMPLE_CHR, but it's * useful to allow it to be small for testing purposes.) */ - switch (pg_regex_strategy) + if (pg_regex_locale->ctype_is_c) { - case PG_REGEX_STRATEGY_C: #if MAX_SIMPLE_CHR >= 127 - max_chr = (pg_wchar) 127; - pcc->cv.cclasscode = -1; + max_chr = (pg_wchar) 127; + pcc->cv.cclasscode = -1; #else - max_chr = (pg_wchar) MAX_SIMPLE_CHR; + max_chr = (pg_wchar) MAX_SIMPLE_CHR; #endif - break; - case PG_REGEX_STRATEGY_BUILTIN: - max_chr = (pg_wchar) MAX_SIMPLE_CHR; - break; - case PG_REGEX_STRATEGY_LIBC_WIDE: - max_chr = (pg_wchar) MAX_SIMPLE_CHR; - break; - case PG_REGEX_STRATEGY_LIBC_1BYTE: -#if MAX_SIMPLE_CHR >= UCHAR_MAX - max_chr = (pg_wchar) UCHAR_MAX; + } + else + { + if (pg_regex_locale->ctype->max_chr != 0 && + pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR) + { + max_chr = pg_regex_locale->ctype->max_chr; pcc->cv.cclasscode = -1; -#else - max_chr = (pg_wchar) MAX_SIMPLE_CHR; -#endif - break; - case PG_REGEX_STRATEGY_ICU: + } + else max_chr = (pg_wchar) MAX_SIMPLE_CHR; - break; - default: - Assert(false); - max_chr = 0; /* can't get here, but keep compiler quiet */ - break; } /* diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index c4299c76fb1..7b4e8629553 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -1415,7 +1415,7 @@ ReorderBufferIterTXNNext(ReorderBuffer *rb, ReorderBufferIterTXNState *state) int32 off; /* nothing there anymore */ - if (state->heap->bh_size == 0) + if (binaryheap_empty(state->heap)) return NULL; off = DatumGetInt32(binaryheap_first(state->heap)); diff --git a/src/backend/rewrite/rewriteHandler.c b/src/backend/rewrite/rewriteHandler.c index 2ef0e7fbf3a..adc9e7600e1 100644 --- a/src/backend/rewrite/rewriteHandler.c +++ b/src/backend/rewrite/rewriteHandler.c @@ -923,8 +923,9 @@ rewriteTargetListIU(List *targetList, apply_default = true; /* - * Can only insert DEFAULT into generated columns, regardless of - * any OVERRIDING clauses. + * Can only insert DEFAULT into generated columns. (The + * OVERRIDING clause does not apply to generated columns, so we + * don't consider it here.) */ if (att_tup->attgenerated && !apply_default) { diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 667aa0c0c78..bd68d7e0ca9 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -4550,11 +4550,9 @@ DropRelationBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum, if (RelFileLocatorBackendIsTemp(rlocator)) { if (rlocator.backend == MyProcNumber) - { - for (j = 0; j < nforks; j++) - DropRelationLocalBuffers(rlocator.locator, forkNum[j], - firstDelBlock[j]); - } + DropRelationLocalBuffers(rlocator.locator, forkNum, nforks, + firstDelBlock); + return; } diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c index ba26627f7b0..3da9c41ee1d 100644 --- a/src/backend/storage/buffer/localbuf.c +++ b/src/backend/storage/buffer/localbuf.c @@ -660,10 +660,11 @@ InvalidateLocalBuffer(BufferDesc *bufHdr, bool check_unreferenced) * See DropRelationBuffers in bufmgr.c for more notes. */ void -DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum, - BlockNumber firstDelBlock) +DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber *forkNum, + int nforks, BlockNumber *firstDelBlock) { int i; + int j; for (i = 0; i < NLocBuffer; i++) { @@ -672,12 +673,18 @@ DropRelationLocalBuffers(RelFileLocator rlocator, ForkNumber forkNum, buf_state = pg_atomic_read_u32(&bufHdr->state); - if ((buf_state & BM_TAG_VALID) && - BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator) && - BufTagGetForkNum(&bufHdr->tag) == forkNum && - bufHdr->tag.blockNum >= firstDelBlock) + if (!(buf_state & BM_TAG_VALID) || + !BufTagMatchesRelFileLocator(&bufHdr->tag, &rlocator)) + continue; + + for (j = 0; j < nforks; j++) { - InvalidateLocalBuffer(bufHdr, true); + if (BufTagGetForkNum(&bufHdr->tag) == forkNum[j] && + bufHdr->tag.blockNum >= firstDelBlock[j]) + { + InvalidateLocalBuffer(bufHdr, true); + break; + } } } } diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 0e8299dd556..a4ec7959f31 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -400,25 +400,22 @@ pg_fsync(int fd) * portable, even if it runs ok on the current system. * * We assert here that a descriptor for a file was opened with write - * permissions (either O_RDWR or O_WRONLY) and for a directory without - * write permissions (O_RDONLY). + * permissions (i.e., not O_RDONLY) and for a directory without write + * permissions (O_RDONLY). Notice that the assertion check is made even + * if fsync() is disabled. * - * Ignore any fstat errors and let the follow-up fsync() do its work. - * Doing this sanity check here counts for the case where fsync() is - * disabled. + * If fstat() fails, ignore it and let the follow-up fsync() complain. */ if (fstat(fd, &st) == 0) { int desc_flags = fcntl(fd, F_GETFL); - /* - * O_RDONLY is historically 0, so just make sure that for directories - * no write flags are used. - */ + desc_flags &= O_ACCMODE; + if (S_ISDIR(st.st_mode)) - Assert((desc_flags & (O_RDWR | O_WRONLY)) == 0); + Assert(desc_flags == O_RDONLY); else - Assert((desc_flags & (O_RDWR | O_WRONLY)) != 0); + Assert(desc_flags != O_RDONLY); } errno = 0; #endif diff --git a/src/backend/storage/ipc/dsm_registry.c b/src/backend/storage/ipc/dsm_registry.c index 1d4fd31ffed..828c2ff0c7f 100644 --- a/src/backend/storage/ipc/dsm_registry.c +++ b/src/backend/storage/ipc/dsm_registry.c @@ -15,6 +15,20 @@ * current backend. This function guarantees that only one backend * initializes the segment and that all other backends just attach it. * + * A DSA can be created in or retrieved from the registry by calling + * GetNamedDSA(). As with GetNamedDSMSegment(), if a DSA with the provided + * name does not yet exist, it is created. Otherwise, GetNamedDSA() + * ensures the DSA is attached to the current backend. This function + * guarantees that only one backend initializes the DSA and that all other + * backends just attach it. + * + * A dshash table can be created in or retrieved from the registry by + * calling GetNamedDSHash(). As with GetNamedDSMSegment(), if a hash + * table with the provided name does not yet exist, it is created. + * Otherwise, GetNamedDSHash() ensures the hash table is attached to the + * current backend. This function guarantees that only one backend + * initializes the table and that all other backends just attach it. + * * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * @@ -32,6 +46,12 @@ #include "storage/shmem.h" #include "utils/memutils.h" +#define DSMR_NAME_LEN 128 + +#define DSMR_DSA_TRANCHE_SUFFIX " DSA" +#define DSMR_DSA_TRANCHE_SUFFIX_LEN (sizeof(DSMR_DSA_TRANCHE_SUFFIX) - 1) +#define DSMR_DSA_TRANCHE_NAME_LEN (DSMR_NAME_LEN + DSMR_DSA_TRANCHE_SUFFIX_LEN) + typedef struct DSMRegistryCtxStruct { dsa_handle dsah; @@ -40,15 +60,48 @@ typedef struct DSMRegistryCtxStruct static DSMRegistryCtxStruct *DSMRegistryCtx; -typedef struct DSMRegistryEntry +typedef struct NamedDSMState { - char name[64]; dsm_handle handle; size_t size; +} NamedDSMState; + +typedef struct NamedDSAState +{ + dsa_handle handle; + int tranche; + char tranche_name[DSMR_DSA_TRANCHE_NAME_LEN]; +} NamedDSAState; + +typedef struct NamedDSHState +{ + NamedDSAState dsa; + dshash_table_handle handle; + int tranche; + char tranche_name[DSMR_NAME_LEN]; +} NamedDSHState; + +typedef enum DSMREntryType +{ + DSMR_ENTRY_TYPE_DSM, + DSMR_ENTRY_TYPE_DSA, + DSMR_ENTRY_TYPE_DSH, +} DSMREntryType; + +typedef struct DSMRegistryEntry +{ + char name[DSMR_NAME_LEN]; + DSMREntryType type; + union + { + NamedDSMState dsm; + NamedDSAState dsa; + NamedDSHState dsh; + } data; } DSMRegistryEntry; static const dshash_parameters dsh_params = { - offsetof(DSMRegistryEntry, handle), + offsetof(DSMRegistryEntry, type), sizeof(DSMRegistryEntry), dshash_strcmp, dshash_strhash, @@ -141,7 +194,7 @@ GetNamedDSMSegment(const char *name, size_t size, ereport(ERROR, (errmsg("DSM segment name cannot be empty"))); - if (strlen(name) >= offsetof(DSMRegistryEntry, handle)) + if (strlen(name) >= offsetof(DSMRegistryEntry, type)) ereport(ERROR, (errmsg("DSM segment name too long"))); @@ -158,32 +211,39 @@ GetNamedDSMSegment(const char *name, size_t size, entry = dshash_find_or_insert(dsm_registry_table, name, found); if (!(*found)) { + NamedDSMState *state = &entry->data.dsm; + dsm_segment *seg; + + entry->type = DSMR_ENTRY_TYPE_DSM; + /* Initialize the segment. */ - dsm_segment *seg = dsm_create(size, 0); + seg = dsm_create(size, 0); dsm_pin_segment(seg); dsm_pin_mapping(seg); - entry->handle = dsm_segment_handle(seg); - entry->size = size; + state->handle = dsm_segment_handle(seg); + state->size = size; ret = dsm_segment_address(seg); if (init_callback) (*init_callback) (ret); } - else if (entry->size != size) - { + else if (entry->type != DSMR_ENTRY_TYPE_DSM) ereport(ERROR, - (errmsg("requested DSM segment size does not match size of " - "existing segment"))); - } + (errmsg("requested DSM segment does not match type of existing entry"))); + else if (entry->data.dsm.size != size) + ereport(ERROR, + (errmsg("requested DSM segment size does not match size of existing segment"))); else { - dsm_segment *seg = dsm_find_mapping(entry->handle); + NamedDSMState *state = &entry->data.dsm; + dsm_segment *seg; /* If the existing segment is not already attached, attach it now. */ + seg = dsm_find_mapping(state->handle); if (seg == NULL) { - seg = dsm_attach(entry->handle); + seg = dsm_attach(state->handle); if (seg == NULL) elog(ERROR, "could not map dynamic shared memory segment"); @@ -198,3 +258,180 @@ GetNamedDSMSegment(const char *name, size_t size, return ret; } + +/* + * Initialize or attach a named DSA. + * + * This routine returns a pointer to the DSA. A new LWLock tranche ID will be + * generated if needed. Note that the lock tranche will be registered with the + * provided name. Also note that this should be called at most once for a + * given DSA in each backend. + */ +dsa_area * +GetNamedDSA(const char *name, bool *found) +{ + DSMRegistryEntry *entry; + MemoryContext oldcontext; + dsa_area *ret; + + Assert(found); + + if (!name || *name == '\0') + ereport(ERROR, + (errmsg("DSA name cannot be empty"))); + + if (strlen(name) >= offsetof(DSMRegistryEntry, type)) + ereport(ERROR, + (errmsg("DSA name too long"))); + + /* Be sure any local memory allocated by DSM/DSA routines is persistent. */ + oldcontext = MemoryContextSwitchTo(TopMemoryContext); + + /* Connect to the registry. */ + init_dsm_registry(); + + entry = dshash_find_or_insert(dsm_registry_table, name, found); + if (!(*found)) + { + NamedDSAState *state = &entry->data.dsa; + + entry->type = DSMR_ENTRY_TYPE_DSA; + + /* Initialize the LWLock tranche for the DSA. */ + state->tranche = LWLockNewTrancheId(); + strcpy(state->tranche_name, name); + LWLockRegisterTranche(state->tranche, state->tranche_name); + + /* Initialize the DSA. */ + ret = dsa_create(state->tranche); + dsa_pin(ret); + dsa_pin_mapping(ret); + + /* Store handle for other backends to use. */ + state->handle = dsa_get_handle(ret); + } + else if (entry->type != DSMR_ENTRY_TYPE_DSA) + ereport(ERROR, + (errmsg("requested DSA does not match type of existing entry"))); + else + { + NamedDSAState *state = &entry->data.dsa; + + if (dsa_is_attached(state->handle)) + ereport(ERROR, + (errmsg("requested DSA already attached to current process"))); + + /* Initialize existing LWLock tranche for the DSA. */ + LWLockRegisterTranche(state->tranche, state->tranche_name); + + /* Attach to existing DSA. */ + ret = dsa_attach(state->handle); + dsa_pin_mapping(ret); + } + + dshash_release_lock(dsm_registry_table, entry); + MemoryContextSwitchTo(oldcontext); + + return ret; +} + +/* + * Initialize or attach a named dshash table. + * + * This routine returns the address of the table. The tranche_id member of + * params is ignored; new tranche IDs will be generated if needed. Note that + * the DSA lock tranche will be registered with the provided name with " DSA" + * appended. The dshash lock tranche will be registered with the provided + * name. Also note that this should be called at most once for a given table + * in each backend. + */ +dshash_table * +GetNamedDSHash(const char *name, const dshash_parameters *params, bool *found) +{ + DSMRegistryEntry *entry; + MemoryContext oldcontext; + dshash_table *ret; + + Assert(params); + Assert(found); + + if (!name || *name == '\0') + ereport(ERROR, + (errmsg("DSHash name cannot be empty"))); + + if (strlen(name) >= offsetof(DSMRegistryEntry, type)) + ereport(ERROR, + (errmsg("DSHash name too long"))); + + /* Be sure any local memory allocated by DSM/DSA routines is persistent. */ + oldcontext = MemoryContextSwitchTo(TopMemoryContext); + + /* Connect to the registry. */ + init_dsm_registry(); + + entry = dshash_find_or_insert(dsm_registry_table, name, found); + if (!(*found)) + { + NamedDSAState *dsa_state = &entry->data.dsh.dsa; + NamedDSHState *dsh_state = &entry->data.dsh; + dshash_parameters params_copy; + dsa_area *dsa; + + entry->type = DSMR_ENTRY_TYPE_DSH; + + /* Initialize the LWLock tranche for the DSA. */ + dsa_state->tranche = LWLockNewTrancheId(); + sprintf(dsa_state->tranche_name, "%s%s", name, DSMR_DSA_TRANCHE_SUFFIX); + LWLockRegisterTranche(dsa_state->tranche, dsa_state->tranche_name); + + /* Initialize the LWLock tranche for the dshash table. */ + dsh_state->tranche = LWLockNewTrancheId(); + strcpy(dsh_state->tranche_name, name); + LWLockRegisterTranche(dsh_state->tranche, dsh_state->tranche_name); + + /* Initialize the DSA for the hash table. */ + dsa = dsa_create(dsa_state->tranche); + dsa_pin(dsa); + dsa_pin_mapping(dsa); + + /* Initialize the dshash table. */ + memcpy(¶ms_copy, params, sizeof(dshash_parameters)); + params_copy.tranche_id = dsh_state->tranche; + ret = dshash_create(dsa, ¶ms_copy, NULL); + + /* Store handles for other backends to use. */ + dsa_state->handle = dsa_get_handle(dsa); + dsh_state->handle = dshash_get_hash_table_handle(ret); + } + else if (entry->type != DSMR_ENTRY_TYPE_DSH) + ereport(ERROR, + (errmsg("requested DSHash does not match type of existing entry"))); + else + { + NamedDSAState *dsa_state = &entry->data.dsh.dsa; + NamedDSHState *dsh_state = &entry->data.dsh; + dsa_area *dsa; + + /* XXX: Should we verify params matches what table was created with? */ + + if (dsa_is_attached(dsa_state->handle)) + ereport(ERROR, + (errmsg("requested DSHash already attached to current process"))); + + /* Initialize existing LWLock tranches for the DSA and dshash table. */ + LWLockRegisterTranche(dsa_state->tranche, dsa_state->tranche_name); + LWLockRegisterTranche(dsh_state->tranche, dsh_state->tranche_name); + + /* Attach to existing DSA for the hash table. */ + dsa = dsa_attach(dsa_state->handle); + dsa_pin_mapping(dsa); + + /* Attach to existing dshash table. */ + ret = dshash_attach(dsa, params, dsh_state->handle, NULL); + } + + dshash_release_lock(dsm_registry_table, entry); + MemoryContextSwitchTo(oldcontext); + + return ret; +} diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c index c9ae3b45b76..ca3656fc76f 100644 --- a/src/backend/storage/ipc/shmem.c +++ b/src/backend/storage/ipc/shmem.c @@ -679,12 +679,10 @@ pg_get_shmem_allocations_numa(PG_FUNCTION_ARGS) */ for (i = 0; i < shm_ent_page_count; i++) { - volatile uint64 touch pg_attribute_unused(); - page_ptrs[i] = startptr + (i * os_page_size); if (firstNumaTouch) - pg_numa_touch_mem_if_required(touch, page_ptrs[i]); + pg_numa_touch_mem_if_required(page_ptrs[i]); CHECK_FOR_INTERRUPTS(); } diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 25fe3d58016..aff8510755f 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -1343,7 +1343,7 @@ ProcessUtilitySlow(ParseState *pstate, */ switch (stmt->subtype) { - case 'T': /* ALTER DOMAIN DEFAULT */ + case AD_AlterDefault: /* * Recursively alter column default for table and, @@ -1353,30 +1353,30 @@ ProcessUtilitySlow(ParseState *pstate, AlterDomainDefault(stmt->typeName, stmt->def); break; - case 'N': /* ALTER DOMAIN DROP NOT NULL */ + case AD_DropNotNull: address = AlterDomainNotNull(stmt->typeName, false); break; - case 'O': /* ALTER DOMAIN SET NOT NULL */ + case AD_SetNotNull: address = AlterDomainNotNull(stmt->typeName, true); break; - case 'C': /* ADD CONSTRAINT */ + case AD_AddConstraint: address = AlterDomainAddConstraint(stmt->typeName, stmt->def, &secondaryObject); break; - case 'X': /* DROP CONSTRAINT */ + case AD_DropConstraint: address = AlterDomainDropConstraint(stmt->typeName, stmt->name, stmt->behavior, stmt->missing_ok); break; - case 'V': /* VALIDATE CONSTRAINT */ + case AD_ValidateConstraint: address = AlterDomainValidateConstraint(stmt->typeName, stmt->name); diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 4a233b63c32..ffeacf2b819 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -23,6 +23,7 @@ OBJS = \ arrayutils.o \ ascii.o \ bool.o \ + bytea.o \ cash.o \ char.o \ cryptohashfuncs.o \ diff --git a/src/backend/utils/adt/bytea.c b/src/backend/utils/adt/bytea.c new file mode 100644 index 00000000000..2e539c2504e --- /dev/null +++ b/src/backend/utils/adt/bytea.c @@ -0,0 +1,1143 @@ +/*------------------------------------------------------------------------- + * + * bytea.c + * Functions for the bytea type. + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/bytea.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/detoast.h" +#include "catalog/pg_collation_d.h" +#include "catalog/pg_type_d.h" +#include "common/int.h" +#include "fmgr.h" +#include "libpq/pqformat.h" +#include "port/pg_bitutils.h" +#include "utils/builtins.h" +#include "utils/bytea.h" +#include "utils/fmgrprotos.h" +#include "utils/memutils.h" +#include "utils/sortsupport.h" +#include "utils/varlena.h" +#include "varatt.h" + +/* GUC variable */ +int bytea_output = BYTEA_OUTPUT_HEX; + +static bytea *bytea_catenate(bytea *t1, bytea *t2); +static bytea *bytea_substring(Datum str, int S, int L, + bool length_not_specified); +static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl); + +/* + * bytea_catenate + * Guts of byteacat(), broken out so it can be used by other functions + * + * Arguments can be in short-header form, but not compressed or out-of-line + */ +static bytea * +bytea_catenate(bytea *t1, bytea *t2) +{ + bytea *result; + int len1, + len2, + len; + char *ptr; + + len1 = VARSIZE_ANY_EXHDR(t1); + len2 = VARSIZE_ANY_EXHDR(t2); + + /* paranoia ... probably should throw error instead? */ + if (len1 < 0) + len1 = 0; + if (len2 < 0) + len2 = 0; + + len = len1 + len2 + VARHDRSZ; + result = (bytea *) palloc(len); + + /* Set size of result string... */ + SET_VARSIZE(result, len); + + /* Fill data field of result string... */ + ptr = VARDATA(result); + if (len1 > 0) + memcpy(ptr, VARDATA_ANY(t1), len1); + if (len2 > 0) + memcpy(ptr + len1, VARDATA_ANY(t2), len2); + + return result; +} + +#define PG_STR_GET_BYTEA(str_) \ + DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_))) + +static bytea * +bytea_substring(Datum str, + int S, + int L, + bool length_not_specified) +{ + int32 S1; /* adjusted start position */ + int32 L1; /* adjusted substring length */ + int32 E; /* end position */ + + /* + * The logic here should generally match text_substring(). + */ + S1 = Max(S, 1); + + if (length_not_specified) + { + /* + * Not passed a length - DatumGetByteaPSlice() grabs everything to the + * end of the string if we pass it a negative value for length. + */ + L1 = -1; + } + else if (L < 0) + { + /* SQL99 says to throw an error for E < S, i.e., negative length */ + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + L1 = -1; /* silence stupider compilers */ + } + else if (pg_add_s32_overflow(S, L, &E)) + { + /* + * L could be large enough for S + L to overflow, in which case the + * substring must run to end of string. + */ + L1 = -1; + } + else + { + /* + * A zero or negative value for the end position can happen if the + * start was negative or one. SQL99 says to return a zero-length + * string. + */ + if (E < 1) + return PG_STR_GET_BYTEA(""); + + L1 = E - S1; + } + + /* + * If the start position is past the end of the string, SQL99 says to + * return a zero-length string -- DatumGetByteaPSlice() will do that for + * us. We need only convert S1 to zero-based starting position. + */ + return DatumGetByteaPSlice(str, S1 - 1, L1); +} + +static bytea * +bytea_overlay(bytea *t1, bytea *t2, int sp, int sl) +{ + bytea *result; + bytea *s1; + bytea *s2; + int sp_pl_sl; + + /* + * Check for possible integer-overflow cases. For negative sp, throw a + * "substring length" error because that's what should be expected + * according to the spec's definition of OVERLAY(). + */ + if (sp <= 0) + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + if (pg_add_s32_overflow(sp, sl, &sp_pl_sl)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false); + s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true); + result = bytea_catenate(s1, t2); + result = bytea_catenate(result, s2); + + return result; +} + +/***************************************************************************** + * USER I/O ROUTINES * + *****************************************************************************/ + +#define VAL(CH) ((CH) - '0') +#define DIG(VAL) ((VAL) + '0') + +/* + * byteain - converts from printable representation of byte array + * + * Non-printable characters must be passed as '\nnn' (octal) and are + * converted to internal form. '\' must be passed as '\\'. + * ereport(ERROR, ...) if bad form. + * + * BUGS: + * The input is scanned twice. + * The error checking of input is minimal. + */ +Datum +byteain(PG_FUNCTION_ARGS) +{ + char *inputText = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + char *tp; + char *rp; + int bc; + bytea *result; + + /* Recognize hex input */ + if (inputText[0] == '\\' && inputText[1] == 'x') + { + size_t len = strlen(inputText); + + bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */ + result = palloc(bc); + bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result), + escontext); + SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ + + PG_RETURN_BYTEA_P(result); + } + + /* Else, it's the traditional escaped style */ + for (bc = 0, tp = inputText; *tp != '\0'; bc++) + { + if (tp[0] != '\\') + tp++; + else if ((tp[0] == '\\') && + (tp[1] >= '0' && tp[1] <= '3') && + (tp[2] >= '0' && tp[2] <= '7') && + (tp[3] >= '0' && tp[3] <= '7')) + tp += 4; + else if ((tp[0] == '\\') && + (tp[1] == '\\')) + tp += 2; + else + { + /* + * one backslash, not followed by another or ### valid octal + */ + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "bytea"))); + } + } + + bc += VARHDRSZ; + + result = (bytea *) palloc(bc); + SET_VARSIZE(result, bc); + + tp = inputText; + rp = VARDATA(result); + while (*tp != '\0') + { + if (tp[0] != '\\') + *rp++ = *tp++; + else if ((tp[0] == '\\') && + (tp[1] >= '0' && tp[1] <= '3') && + (tp[2] >= '0' && tp[2] <= '7') && + (tp[3] >= '0' && tp[3] <= '7')) + { + bc = VAL(tp[1]); + bc <<= 3; + bc += VAL(tp[2]); + bc <<= 3; + *rp++ = bc + VAL(tp[3]); + + tp += 4; + } + else if ((tp[0] == '\\') && + (tp[1] == '\\')) + { + *rp++ = '\\'; + tp += 2; + } + else + { + /* + * We should never get here. The first pass should not allow it. + */ + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "bytea"))); + } + } + + PG_RETURN_BYTEA_P(result); +} + +/* + * byteaout - converts to printable representation of byte array + * + * In the traditional escaped format, non-printable characters are + * printed as '\nnn' (octal) and '\' as '\\'. + */ +Datum +byteaout(PG_FUNCTION_ARGS) +{ + bytea *vlena = PG_GETARG_BYTEA_PP(0); + char *result; + char *rp; + + if (bytea_output == BYTEA_OUTPUT_HEX) + { + /* Print hex format */ + rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); + *rp++ = '\\'; + *rp++ = 'x'; + rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); + } + else if (bytea_output == BYTEA_OUTPUT_ESCAPE) + { + /* Print traditional escaped format */ + char *vp; + uint64 len; + int i; + + len = 1; /* empty string has 1 char */ + vp = VARDATA_ANY(vlena); + for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) + { + if (*vp == '\\') + len += 2; + else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) + len += 4; + else + len++; + } + + /* + * In principle len can't overflow uint32 if the input fit in 1GB, but + * for safety let's check rather than relying on palloc's internal + * check. + */ + if (len > MaxAllocSize) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg_internal("result of bytea output conversion is too large"))); + rp = result = (char *) palloc(len); + + vp = VARDATA_ANY(vlena); + for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) + { + if (*vp == '\\') + { + *rp++ = '\\'; + *rp++ = '\\'; + } + else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) + { + int val; /* holds unprintable chars */ + + val = *vp; + rp[0] = '\\'; + rp[3] = DIG(val & 07); + val >>= 3; + rp[2] = DIG(val & 07); + val >>= 3; + rp[1] = DIG(val & 03); + rp += 4; + } + else + *rp++ = *vp; + } + } + else + { + elog(ERROR, "unrecognized \"bytea_output\" setting: %d", + bytea_output); + rp = result = NULL; /* keep compiler quiet */ + } + *rp = '\0'; + PG_RETURN_CSTRING(result); +} + +/* + * bytearecv - converts external binary format to bytea + */ +Datum +bytearecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + bytea *result; + int nbytes; + + nbytes = buf->len - buf->cursor; + result = (bytea *) palloc(nbytes + VARHDRSZ); + SET_VARSIZE(result, nbytes + VARHDRSZ); + pq_copymsgbytes(buf, VARDATA(result), nbytes); + PG_RETURN_BYTEA_P(result); +} + +/* + * byteasend - converts bytea to binary format + * + * This is a special case: just copy the input... + */ +Datum +byteasend(PG_FUNCTION_ARGS) +{ + bytea *vlena = PG_GETARG_BYTEA_P_COPY(0); + + PG_RETURN_BYTEA_P(vlena); +} + +Datum +bytea_string_agg_transfn(PG_FUNCTION_ARGS) +{ + StringInfo state; + + state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); + + /* Append the value unless null, preceding it with the delimiter. */ + if (!PG_ARGISNULL(1)) + { + bytea *value = PG_GETARG_BYTEA_PP(1); + bool isfirst = false; + + /* + * You might think we can just throw away the first delimiter, however + * we must keep it as we may be a parallel worker doing partial + * aggregation building a state to send to the main process. We need + * to keep the delimiter of every aggregation so that the combine + * function can properly join up the strings of two separately + * partially aggregated results. The first delimiter is only stripped + * off in the final function. To know how much to strip off the front + * of the string, we store the length of the first delimiter in the + * StringInfo's cursor field, which we don't otherwise need here. + */ + if (state == NULL) + { + MemoryContext aggcontext; + MemoryContext oldcontext; + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "bytea_string_agg_transfn called in non-aggregate context"); + } + + /* + * Create state in aggregate context. It'll stay there across + * subsequent calls. + */ + oldcontext = MemoryContextSwitchTo(aggcontext); + state = makeStringInfo(); + MemoryContextSwitchTo(oldcontext); + + isfirst = true; + } + + if (!PG_ARGISNULL(2)) + { + bytea *delim = PG_GETARG_BYTEA_PP(2); + + appendBinaryStringInfo(state, VARDATA_ANY(delim), + VARSIZE_ANY_EXHDR(delim)); + if (isfirst) + state->cursor = VARSIZE_ANY_EXHDR(delim); + } + + appendBinaryStringInfo(state, VARDATA_ANY(value), + VARSIZE_ANY_EXHDR(value)); + } + + /* + * The transition type for string_agg() is declared to be "internal", + * which is a pass-by-value type the same size as a pointer. + */ + if (state) + PG_RETURN_POINTER(state); + PG_RETURN_NULL(); +} + +Datum +bytea_string_agg_finalfn(PG_FUNCTION_ARGS) +{ + StringInfo state; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); + + if (state != NULL) + { + /* As per comment in transfn, strip data before the cursor position */ + bytea *result; + int strippedlen = state->len - state->cursor; + + result = (bytea *) palloc(strippedlen + VARHDRSZ); + SET_VARSIZE(result, strippedlen + VARHDRSZ); + memcpy(VARDATA(result), &state->data[state->cursor], strippedlen); + PG_RETURN_BYTEA_P(result); + } + else + PG_RETURN_NULL(); +} + +/*------------------------------------------------------------- + * byteaoctetlen + * + * get the number of bytes contained in an instance of type 'bytea' + *------------------------------------------------------------- + */ +Datum +byteaoctetlen(PG_FUNCTION_ARGS) +{ + Datum str = PG_GETARG_DATUM(0); + + /* We need not detoast the input at all */ + PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ); +} + +/* + * byteacat - + * takes two bytea* and returns a bytea* that is the concatenation of + * the two. + * + * Cloned from textcat and modified as required. + */ +Datum +byteacat(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + + PG_RETURN_BYTEA_P(bytea_catenate(t1, t2)); +} + +/* + * byteaoverlay + * Replace specified substring of first string with second + * + * The SQL standard defines OVERLAY() in terms of substring and concatenation. + * This code is a direct implementation of what the standard says. + */ +Datum +byteaoverlay(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int sp = PG_GETARG_INT32(2); /* substring start position */ + int sl = PG_GETARG_INT32(3); /* substring length */ + + PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); +} + +Datum +byteaoverlay_no_len(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int sp = PG_GETARG_INT32(2); /* substring start position */ + int sl; + + sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */ + PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); +} + +/* + * bytea_substr() + * Return a substring starting at the specified position. + * Cloned from text_substr and modified as required. + * + * Input: + * - string + * - starting position (is one-based) + * - string length (optional) + * + * If the starting position is zero or less, then return from the start of the string + * adjusting the length to be consistent with the "negative start" per SQL. + * If the length is less than zero, an ERROR is thrown. If no third argument + * (length) is provided, the length to the end of the string is assumed. + */ +Datum +bytea_substr(PG_FUNCTION_ARGS) +{ + PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), + PG_GETARG_INT32(1), + PG_GETARG_INT32(2), + false)); +} + +/* + * bytea_substr_no_len - + * Wrapper to avoid opr_sanity failure due to + * one function accepting a different number of args. + */ +Datum +bytea_substr_no_len(PG_FUNCTION_ARGS) +{ + PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), + PG_GETARG_INT32(1), + -1, + true)); +} + +/* + * bit_count + */ +Datum +bytea_bit_count(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + + PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1))); +} + +/* + * byteapos - + * Return the position of the specified substring. + * Implements the SQL POSITION() function. + * Cloned from textpos and modified as required. + */ +Datum +byteapos(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int pos; + int px, + p; + int len1, + len2; + char *p1, + *p2; + + len1 = VARSIZE_ANY_EXHDR(t1); + len2 = VARSIZE_ANY_EXHDR(t2); + + if (len2 <= 0) + PG_RETURN_INT32(1); /* result for empty pattern */ + + p1 = VARDATA_ANY(t1); + p2 = VARDATA_ANY(t2); + + pos = 0; + px = (len1 - len2); + for (p = 0; p <= px; p++) + { + if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0)) + { + pos = p + 1; + break; + }; + p1++; + }; + + PG_RETURN_INT32(pos); +} + +/*------------------------------------------------------------- + * byteaGetByte + * + * this routine treats "bytea" as an array of bytes. + * It returns the Nth byte (a number between 0 and 255). + *------------------------------------------------------------- + */ +Datum +byteaGetByte(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int32 n = PG_GETARG_INT32(1); + int len; + int byte; + + len = VARSIZE_ANY_EXHDR(v); + + if (n < 0 || n >= len) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %d out of valid range, 0..%d", + n, len - 1))); + + byte = ((unsigned char *) VARDATA_ANY(v))[n]; + + PG_RETURN_INT32(byte); +} + +/*------------------------------------------------------------- + * byteaGetBit + * + * This routine treats a "bytea" type like an array of bits. + * It returns the value of the Nth bit (0 or 1). + * + *------------------------------------------------------------- + */ +Datum +byteaGetBit(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int64 n = PG_GETARG_INT64(1); + int byteNo, + bitNo; + int len; + int byte; + + len = VARSIZE_ANY_EXHDR(v); + + if (n < 0 || n >= (int64) len * 8) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, + n, (int64) len * 8 - 1))); + + /* n/8 is now known < len, so safe to cast to int */ + byteNo = (int) (n / 8); + bitNo = (int) (n % 8); + + byte = ((unsigned char *) VARDATA_ANY(v))[byteNo]; + + if (byte & (1 << bitNo)) + PG_RETURN_INT32(1); + else + PG_RETURN_INT32(0); +} + +/*------------------------------------------------------------- + * byteaSetByte + * + * Given an instance of type 'bytea' creates a new one with + * the Nth byte set to the given value. + * + *------------------------------------------------------------- + */ +Datum +byteaSetByte(PG_FUNCTION_ARGS) +{ + bytea *res = PG_GETARG_BYTEA_P_COPY(0); + int32 n = PG_GETARG_INT32(1); + int32 newByte = PG_GETARG_INT32(2); + int len; + + len = VARSIZE(res) - VARHDRSZ; + + if (n < 0 || n >= len) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %d out of valid range, 0..%d", + n, len - 1))); + + /* + * Now set the byte. + */ + ((unsigned char *) VARDATA(res))[n] = newByte; + + PG_RETURN_BYTEA_P(res); +} + +/*------------------------------------------------------------- + * byteaSetBit + * + * Given an instance of type 'bytea' creates a new one with + * the Nth bit set to the given value. + * + *------------------------------------------------------------- + */ +Datum +byteaSetBit(PG_FUNCTION_ARGS) +{ + bytea *res = PG_GETARG_BYTEA_P_COPY(0); + int64 n = PG_GETARG_INT64(1); + int32 newBit = PG_GETARG_INT32(2); + int len; + int oldByte, + newByte; + int byteNo, + bitNo; + + len = VARSIZE(res) - VARHDRSZ; + + if (n < 0 || n >= (int64) len * 8) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, + n, (int64) len * 8 - 1))); + + /* n/8 is now known < len, so safe to cast to int */ + byteNo = (int) (n / 8); + bitNo = (int) (n % 8); + + /* + * sanity check! + */ + if (newBit != 0 && newBit != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("new bit must be 0 or 1"))); + + /* + * Update the byte. + */ + oldByte = ((unsigned char *) VARDATA(res))[byteNo]; + + if (newBit == 0) + newByte = oldByte & (~(1 << bitNo)); + else + newByte = oldByte | (1 << bitNo); + + ((unsigned char *) VARDATA(res))[byteNo] = newByte; + + PG_RETURN_BYTEA_P(res); +} + +/* + * Return reversed bytea + */ +Datum +bytea_reverse(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + const char *p = VARDATA_ANY(v); + int len = VARSIZE_ANY_EXHDR(v); + const char *endp = p + len; + bytea *result = palloc(len + VARHDRSZ); + char *dst = (char *) VARDATA(result) + len; + + SET_VARSIZE(result, len + VARHDRSZ); + + while (p < endp) + *(--dst) = *p++; + + PG_RETURN_BYTEA_P(result); +} + + +/***************************************************************************** + * Comparison Functions used for bytea + * + * Note: btree indexes need these routines not to leak memory; therefore, + * be careful to free working copies of toasted datums. Most places don't + * need to be so careful. + *****************************************************************************/ + +Datum +byteaeq(PG_FUNCTION_ARGS) +{ + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + bool result; + Size len1, + len2; + + /* + * We can use a fast path for unequal lengths, which might save us from + * having to detoast one or both values. + */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = false; + else + { + bytea *barg1 = DatumGetByteaPP(arg1); + bytea *barg2 = DatumGetByteaPP(arg2); + + result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), + len1 - VARHDRSZ) == 0); + + PG_FREE_IF_COPY(barg1, 0); + PG_FREE_IF_COPY(barg2, 1); + } + + PG_RETURN_BOOL(result); +} + +Datum +byteane(PG_FUNCTION_ARGS) +{ + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + bool result; + Size len1, + len2; + + /* + * We can use a fast path for unequal lengths, which might save us from + * having to detoast one or both values. + */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = true; + else + { + bytea *barg1 = DatumGetByteaPP(arg1); + bytea *barg2 = DatumGetByteaPP(arg2); + + result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), + len1 - VARHDRSZ) != 0); + + PG_FREE_IF_COPY(barg1, 0); + PG_FREE_IF_COPY(barg2, 1); + } + + PG_RETURN_BOOL(result); +} + +Datum +bytealt(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2))); +} + +Datum +byteale(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2))); +} + +Datum +byteagt(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2))); +} + +Datum +byteage(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2))); +} + +Datum +byteacmp(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + if ((cmp == 0) && (len1 != len2)) + cmp = (len1 < len2) ? -1 : 1; + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(cmp); +} + +Datum +bytea_larger(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + bytea *result; + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2); + + PG_RETURN_BYTEA_P(result); +} + +Datum +bytea_smaller(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + bytea *result; + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2); + + PG_RETURN_BYTEA_P(result); +} + +Datum +bytea_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + /* Use generic string SortSupport, forcing "C" collation */ + varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID); + + MemoryContextSwitchTo(oldcontext); + + PG_RETURN_VOID(); +} + +/* Cast bytea -> int2 */ +Datum +bytea_int2(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint16 result; + + /* Check that the byte array is not too long */ + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range")); + + /* Convert it to an integer; most significant bytes come first */ + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT16(result); +} + +/* Cast bytea -> int4 */ +Datum +bytea_int4(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint32 result; + + /* Check that the byte array is not too long */ + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range")); + + /* Convert it to an integer; most significant bytes come first */ + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT32(result); +} + +/* Cast bytea -> int8 */ +Datum +bytea_int8(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint64 result; + + /* Check that the byte array is not too long */ + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range")); + + /* Convert it to an integer; most significant bytes come first */ + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT64(result); +} + +/* Cast int2 -> bytea; can just use int2send() */ +Datum +int2_bytea(PG_FUNCTION_ARGS) +{ + return int2send(fcinfo); +} + +/* Cast int4 -> bytea; can just use int4send() */ +Datum +int4_bytea(PG_FUNCTION_ARGS) +{ + return int4send(fcinfo); +} + +/* Cast int8 -> bytea; can just use int8send() */ +Datum +int8_bytea(PG_FUNCTION_ARGS) +{ + return int8send(fcinfo); +} diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c index 4227ab1a72b..344f58b92f7 100644 --- a/src/backend/utils/adt/date.c +++ b/src/backend/utils/adt/date.c @@ -1363,10 +1363,35 @@ timestamp_date(PG_FUNCTION_ARGS) { Timestamp timestamp = PG_GETARG_TIMESTAMP(0); DateADT result; + + result = timestamp2date_opt_overflow(timestamp, NULL); + PG_RETURN_DATEADT(result); +} + +/* + * Convert timestamp to date. + * + * On successful conversion, *overflow is set to zero if it's not NULL. + * + * If the timestamp is finite but out of the valid range for date, then: + * if overflow is NULL, we throw an out-of-range error. + * if overflow is not NULL, we store +1 or -1 there to indicate the sign + * of the overflow, and return the appropriate date infinity. + * + * Note: given the ranges of the types, overflow is only possible at + * the minimum end of the range, but we don't assume that in this code. + */ +DateADT +timestamp2date_opt_overflow(Timestamp timestamp, int *overflow) +{ + DateADT result; struct pg_tm tt, *tm = &tt; fsec_t fsec; + if (overflow) + *overflow = 0; + if (TIMESTAMP_IS_NOBEGIN(timestamp)) DATE_NOBEGIN(result); else if (TIMESTAMP_IS_NOEND(timestamp)) @@ -1374,14 +1399,30 @@ timestamp_date(PG_FUNCTION_ARGS) else { if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0) + { + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + DATE_NOBEGIN(result); + } + else + { + *overflow = 1; /* not actually reachable */ + DATE_NOEND(result); + } + return result; + } ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); + } result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE; } - PG_RETURN_DATEADT(result); + return result; } @@ -1408,11 +1449,36 @@ timestamptz_date(PG_FUNCTION_ARGS) { TimestampTz timestamp = PG_GETARG_TIMESTAMP(0); DateADT result; + + result = timestamptz2date_opt_overflow(timestamp, NULL); + PG_RETURN_DATEADT(result); +} + +/* + * Convert timestamptz to date. + * + * On successful conversion, *overflow is set to zero if it's not NULL. + * + * If the timestamptz is finite but out of the valid range for date, then: + * if overflow is NULL, we throw an out-of-range error. + * if overflow is not NULL, we store +1 or -1 there to indicate the sign + * of the overflow, and return the appropriate date infinity. + * + * Note: given the ranges of the types, overflow is only possible at + * the minimum end of the range, but we don't assume that in this code. + */ +DateADT +timestamptz2date_opt_overflow(TimestampTz timestamp, int *overflow) +{ + DateADT result; struct pg_tm tt, *tm = &tt; fsec_t fsec; int tz; + if (overflow) + *overflow = 0; + if (TIMESTAMP_IS_NOBEGIN(timestamp)) DATE_NOBEGIN(result); else if (TIMESTAMP_IS_NOEND(timestamp)) @@ -1420,14 +1486,30 @@ timestamptz_date(PG_FUNCTION_ARGS) else { if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0) + { + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + DATE_NOBEGIN(result); + } + else + { + *overflow = 1; /* not actually reachable */ + DATE_NOEND(result); + } + return result; + } ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); + } result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE; } - PG_RETURN_DATEADT(result); + return result; } diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c index ba66a9c4ce6..7b97d2be6ca 100644 --- a/src/backend/utils/adt/float.c +++ b/src/backend/utils/adt/float.c @@ -4067,8 +4067,9 @@ float84ge(PG_FUNCTION_ARGS) * with the specified characteristics. An operand smaller than the * lower bound is assigned to bucket 0. An operand greater than or equal * to the upper bound is assigned to an additional bucket (with number - * count+1). We don't allow "NaN" for any of the float8 inputs, and we - * don't allow either of the histogram bounds to be +/- infinity. + * count+1). We don't allow the histogram bounds to be NaN or +/- infinity, + * but we do allow those values for the operand (taking NaN to be larger + * than any other value, as we do in comparisons). */ Datum width_bucket_float8(PG_FUNCTION_ARGS) @@ -4084,12 +4085,11 @@ width_bucket_float8(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), errmsg("count must be greater than zero"))); - if (isnan(operand) || isnan(bound1) || isnan(bound2)) + if (isnan(bound1) || isnan(bound2)) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), - errmsg("operand, lower bound, and upper bound cannot be NaN"))); + errmsg("lower and upper bounds cannot be NaN"))); - /* Note that we allow "operand" to be infinite */ if (isinf(bound1) || isinf(bound2)) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), @@ -4097,15 +4097,15 @@ width_bucket_float8(PG_FUNCTION_ARGS) if (bound1 < bound2) { - if (operand < bound1) - result = 0; - else if (operand >= bound2) + if (isnan(operand) || operand >= bound2) { if (pg_add_s32_overflow(count, 1, &result)) ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("integer out of range"))); } + else if (operand < bound1) + result = 0; else { if (!isinf(bound2 - bound1)) @@ -4135,7 +4135,7 @@ width_bucket_float8(PG_FUNCTION_ARGS) } else if (bound1 > bound2) { - if (operand > bound1) + if (isnan(operand) || operand > bound1) result = 0; else if (operand <= bound2) { diff --git a/src/backend/utils/adt/inet_net_pton.c b/src/backend/utils/adt/inet_net_pton.c index ef2236d9f04..3b0db2a3799 100644 --- a/src/backend/utils/adt/inet_net_pton.c +++ b/src/backend/utils/adt/inet_net_pton.c @@ -115,8 +115,7 @@ inet_cidr_pton_ipv4(const char *src, u_char *dst, size_t size) src++; /* skip x or X. */ while ((ch = *src++) != '\0' && isxdigit((unsigned char) ch)) { - if (isupper((unsigned char) ch)) - ch = tolower((unsigned char) ch); + ch = pg_ascii_tolower((unsigned char) ch); n = strchr(xdigits, ch) - xdigits; assert(n >= 0 && n <= 15); if (dirty == 0) diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 7f4cf614585..4216ac17f43 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -98,7 +98,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale) else if (locale->is_default) return pg_tolower(c); else - return tolower_l(c, locale->info.lt); + return char_tolower(c, locale); } @@ -209,7 +209,17 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) * way. */ - if (pg_database_encoding_max_length() > 1 || (locale->provider == COLLPROVIDER_ICU)) + if (locale->ctype_is_c || + (char_tolower_enabled(locale) && + pg_database_encoding_max_length() == 1)) + { + p = VARDATA_ANY(pat); + plen = VARSIZE_ANY_EXHDR(pat); + s = VARDATA_ANY(str); + slen = VARSIZE_ANY_EXHDR(str); + return SB_IMatchText(s, slen, p, plen, locale); + } + else { pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation, PointerGetDatum(pat))); @@ -224,14 +234,6 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) else return MB_MatchText(s, slen, p, plen, 0); } - else - { - p = VARDATA_ANY(pat); - plen = VARSIZE_ANY_EXHDR(pat); - s = VARDATA_ANY(str); - slen = VARSIZE_ANY_EXHDR(str); - return SB_IMatchText(s, slen, p, plen, locale); - } } /* diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index 8fdc677371f..999f23f86d5 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -1495,13 +1495,8 @@ pattern_char_isalpha(char c, bool is_multibyte, { if (locale->ctype_is_c) return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); - else if (is_multibyte && IS_HIGHBIT_SET(c)) - return true; - else if (locale->provider != COLLPROVIDER_LIBC) - return IS_HIGHBIT_SET(c) || - (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); else - return isalpha_l((unsigned char) c, locale->info.lt); + return char_is_cased(c, locale); } diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build index 244f48f4fd7..ed9bbd7b926 100644 --- a/src/backend/utils/adt/meson.build +++ b/src/backend/utils/adt/meson.build @@ -12,6 +12,7 @@ backend_sources += files( 'arrayutils.c', 'ascii.c', 'bool.c', + 'bytea.c', 'cash.c', 'char.c', 'cryptohashfuncs.c', diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index 58ad1a65ef7..c9233565d57 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -1960,8 +1960,9 @@ generate_series_numeric_support(PG_FUNCTION_ARGS) * with the specified characteristics. An operand smaller than the * lower bound is assigned to bucket 0. An operand greater than or equal * to the upper bound is assigned to an additional bucket (with number - * count+1). We don't allow "NaN" for any of the numeric inputs, and we - * don't allow either of the histogram bounds to be +/- infinity. + * count+1). We don't allow the histogram bounds to be NaN or +/- infinity, + * but we do allow those values for the operand (taking NaN to be larger + * than any other value, as we do in comparisons). */ Datum width_bucket_numeric(PG_FUNCTION_ARGS) @@ -1979,17 +1980,13 @@ width_bucket_numeric(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), errmsg("count must be greater than zero"))); - if (NUMERIC_IS_SPECIAL(operand) || - NUMERIC_IS_SPECIAL(bound1) || - NUMERIC_IS_SPECIAL(bound2)) + if (NUMERIC_IS_SPECIAL(bound1) || NUMERIC_IS_SPECIAL(bound2)) { - if (NUMERIC_IS_NAN(operand) || - NUMERIC_IS_NAN(bound1) || - NUMERIC_IS_NAN(bound2)) + if (NUMERIC_IS_NAN(bound1) || NUMERIC_IS_NAN(bound2)) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), - errmsg("operand, lower bound, and upper bound cannot be NaN"))); - /* We allow "operand" to be infinite; cmp_numerics will cope */ + errmsg("lower and upper bounds cannot be NaN"))); + if (NUMERIC_IS_INF(bound1) || NUMERIC_IS_INF(bound2)) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index bf1afb24d7d..97c2ac1faf9 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -79,31 +79,6 @@ extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context); extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context); extern char *get_collation_actual_version_libc(const char *collcollate); -extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strfold_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - -extern size_t strlower_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strfold_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - -extern size_t strlower_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - /* GUC settings */ char *locale_messages; char *locale_monetary; @@ -1092,6 +1067,9 @@ create_pg_locale(Oid collid, MemoryContext context) Assert((result->collate_is_c && result->collate == NULL) || (!result->collate_is_c && result->collate != NULL)); + Assert((result->ctype_is_c && result->ctype == NULL) || + (!result->ctype_is_c && result->ctype != NULL)); + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion, &isnull); if (!isnull) @@ -1256,77 +1234,31 @@ size_t pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strlower_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strlower_icu(dst, dstsize, src, srclen, locale); -#endif - else if (locale->provider == COLLPROVIDER_LIBC) - return strlower_libc(dst, dstsize, src, srclen, locale); - else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strlower(dst, dstsize, src, srclen, locale); } size_t pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strtitle_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strtitle_icu(dst, dstsize, src, srclen, locale); -#endif - else if (locale->provider == COLLPROVIDER_LIBC) - return strtitle_libc(dst, dstsize, src, srclen, locale); - else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strtitle(dst, dstsize, src, srclen, locale); } size_t pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strupper_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strupper_icu(dst, dstsize, src, srclen, locale); -#endif - else if (locale->provider == COLLPROVIDER_LIBC) - return strupper_libc(dst, dstsize, src, srclen, locale); - else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strupper(dst, dstsize, src, srclen, locale); } size_t pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strfold_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strfold_icu(dst, dstsize, src, srclen, locale); -#endif - /* for libc, just use strlower */ - else if (locale->provider == COLLPROVIDER_LIBC) - return strlower_libc(dst, dstsize, src, srclen, locale); + if (locale->ctype->strfold) + return locale->ctype->strfold(dst, dstsize, src, srclen, locale); else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strlower(dst, dstsize, src, srclen, locale); } /* @@ -1464,6 +1396,41 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, } /* + * char_is_cased() + * + * Fuzzy test of whether the given char is case-varying or not. The argument + * is a single byte, so in a multibyte encoding, just assume any non-ASCII + * char is case-varying. + */ +bool +char_is_cased(char ch, pg_locale_t locale) +{ + return locale->ctype->char_is_cased(ch, locale); +} + +/* + * char_tolower_enabled() + * + * Does the provider support char_tolower()? + */ +bool +char_tolower_enabled(pg_locale_t locale) +{ + return (locale->ctype->char_tolower != NULL); +} + +/* + * char_tolower() + * + * Convert char (single-byte encoding) to lowercase. + */ +char +char_tolower(unsigned char ch, pg_locale_t locale) +{ + return locale->ctype->char_tolower(ch, locale); +} + +/* * Return required encoding ID for the given locale, or -1 if any encoding is * valid for the locale. */ diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c index ce4914a76a1..0c9fbdb40f2 100644 --- a/src/backend/utils/adt/pg_locale_builtin.c +++ b/src/backend/utils/adt/pg_locale_builtin.c @@ -24,15 +24,6 @@ extern pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context); extern char *get_collation_actual_version_builtin(const char *collcollate); -extern size_t strlower_builtin(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_builtin(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_builtin(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strfold_builtin(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); - struct WordBoundaryState { @@ -76,7 +67,7 @@ initcap_wbnext(void *state) return wbstate->len; } -size_t +static size_t strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -84,7 +75,7 @@ strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, locale->info.builtin.casemap_full); } -size_t +static size_t strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -102,7 +93,7 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, initcap_wbnext, &wbstate); } -size_t +static size_t strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -110,7 +101,7 @@ strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, locale->info.builtin.casemap_full); } -size_t +static size_t strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -118,6 +109,98 @@ strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, locale->info.builtin.casemap_full); } +static bool +wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isdigit(wc, !locale->info.builtin.casemap_full); +} + +static bool +wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isalpha(wc); +} + +static bool +wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isalnum(wc, !locale->info.builtin.casemap_full); +} + +static bool +wc_isupper_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isupper(wc); +} + +static bool +wc_islower_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_islower(wc); +} + +static bool +wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isgraph(wc); +} + +static bool +wc_isprint_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isprint(wc); +} + +static bool +wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_ispunct(wc, !locale->info.builtin.casemap_full); +} + +static bool +wc_isspace_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isspace(wc); +} + +static bool +char_is_cased_builtin(char ch, pg_locale_t locale) +{ + return IS_HIGHBIT_SET(ch) || + (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); +} + +static pg_wchar +wc_toupper_builtin(pg_wchar wc, pg_locale_t locale) +{ + return unicode_uppercase_simple(wc); +} + +static pg_wchar +wc_tolower_builtin(pg_wchar wc, pg_locale_t locale) +{ + return unicode_lowercase_simple(wc); +} + +static const struct ctype_methods ctype_methods_builtin = { + .strlower = strlower_builtin, + .strtitle = strtitle_builtin, + .strupper = strupper_builtin, + .strfold = strfold_builtin, + .wc_isdigit = wc_isdigit_builtin, + .wc_isalpha = wc_isalpha_builtin, + .wc_isalnum = wc_isalnum_builtin, + .wc_isupper = wc_isupper_builtin, + .wc_islower = wc_islower_builtin, + .wc_isgraph = wc_isgraph_builtin, + .wc_isprint = wc_isprint_builtin, + .wc_ispunct = wc_ispunct_builtin, + .wc_isspace = wc_isspace_builtin, + .char_is_cased = char_is_cased_builtin, + .wc_tolower = wc_tolower_builtin, + .wc_toupper = wc_toupper_builtin, +}; + pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context) { @@ -157,10 +240,11 @@ create_pg_locale_builtin(Oid collid, MemoryContext context) result->info.builtin.locale = MemoryContextStrdup(context, locstr); result->info.builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0); - result->provider = COLLPROVIDER_BUILTIN; result->deterministic = true; result->collate_is_c = true; result->ctype_is_c = (strcmp(locstr, "C") == 0); + if (!result->ctype_is_c) + result->ctype = &ctype_methods_builtin; return result; } diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c index a32c32a0744..96741e08269 100644 --- a/src/backend/utils/adt/pg_locale_icu.c +++ b/src/backend/utils/adt/pg_locale_icu.c @@ -48,19 +48,22 @@ #define TEXTBUFLEN 1024 extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context); -extern size_t strlower_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strfold_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); #ifdef USE_ICU extern UCollator *pg_ucol_open(const char *loc_str); +static size_t strlower_icu(char *dest, size_t destsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static size_t strtitle_icu(char *dest, size_t destsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static size_t strupper_icu(char *dest, size_t destsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static size_t strfold_icu(char *dest, size_t destsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static int strncoll_icu(const char *arg1, ssize_t len1, + const char *arg2, ssize_t len2, + pg_locale_t locale); static size_t strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale); @@ -118,6 +121,25 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity, const char *locale, UErrorCode *pErrorCode); +static bool +char_is_cased_icu(char ch, pg_locale_t locale) +{ + return IS_HIGHBIT_SET(ch) || + (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); +} + +static pg_wchar +toupper_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_toupper(wc); +} + +static pg_wchar +tolower_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_tolower(wc); +} + static const struct collate_methods collate_methods_icu = { .strncoll = strncoll_icu, .strnxfrm = strnxfrm_icu, @@ -136,6 +158,78 @@ static const struct collate_methods collate_methods_icu_utf8 = { .strxfrm_is_safe = true, }; +static bool +wc_isdigit_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isdigit(wc); +} + +static bool +wc_isalpha_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isalpha(wc); +} + +static bool +wc_isalnum_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isalnum(wc); +} + +static bool +wc_isupper_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isupper(wc); +} + +static bool +wc_islower_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_islower(wc); +} + +static bool +wc_isgraph_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isgraph(wc); +} + +static bool +wc_isprint_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isprint(wc); +} + +static bool +wc_ispunct_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_ispunct(wc); +} + +static bool +wc_isspace_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isspace(wc); +} + +static const struct ctype_methods ctype_methods_icu = { + .strlower = strlower_icu, + .strtitle = strtitle_icu, + .strupper = strupper_icu, + .strfold = strfold_icu, + .wc_isdigit = wc_isdigit_icu, + .wc_isalpha = wc_isalpha_icu, + .wc_isalnum = wc_isalnum_icu, + .wc_isupper = wc_isupper_icu, + .wc_islower = wc_islower_icu, + .wc_isgraph = wc_isgraph_icu, + .wc_isprint = wc_isprint_icu, + .wc_ispunct = wc_ispunct_icu, + .wc_isspace = wc_isspace_icu, + .char_is_cased = char_is_cased_icu, + .wc_toupper = toupper_icu, + .wc_tolower = tolower_icu, +}; #endif pg_locale_t @@ -198,7 +292,6 @@ create_pg_locale_icu(Oid collid, MemoryContext context) result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); result->info.icu.locale = MemoryContextStrdup(context, iculocstr); result->info.icu.ucol = collator; - result->provider = COLLPROVIDER_ICU; result->deterministic = deterministic; result->collate_is_c = false; result->ctype_is_c = false; @@ -206,6 +299,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context) result->collate = &collate_methods_icu_utf8; else result->collate = &collate_methods_icu; + result->ctype = &ctype_methods_icu; return result; #else @@ -379,7 +473,7 @@ make_icu_collator(const char *iculocstr, const char *icurules) } } -size_t +static size_t strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -399,7 +493,7 @@ strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, return result_len; } -size_t +static size_t strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -419,7 +513,7 @@ strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, return result_len; } -size_t +static size_t strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -439,7 +533,7 @@ strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, return result_len; } -size_t +static size_t strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -474,8 +568,6 @@ strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2 int result; UErrorCode status; - Assert(locale->provider == COLLPROVIDER_ICU); - Assert(GetDatabaseEncoding() == PG_UTF8); status = U_ZERO_ERROR; @@ -503,8 +595,6 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t uchar_bsize; Size result_bsize; - Assert(locale->provider == COLLPROVIDER_ICU); - init_icu_converter(); ulen = uchar_length(icu_converter, src, srclen); @@ -549,8 +639,6 @@ strnxfrm_prefix_icu_utf8(char *dest, size_t destsize, uint32_t state[2]; UErrorCode status; - Assert(locale->provider == COLLPROVIDER_ICU); - Assert(GetDatabaseEncoding() == PG_UTF8); uiter_setUTF8(&iter, src, srclen); @@ -749,8 +837,6 @@ strncoll_icu(const char *arg1, ssize_t len1, *uchar2; int result; - Assert(locale->provider == COLLPROVIDER_ICU); - /* if encoding is UTF8, use more efficient strncoll_icu_utf8 */ #ifdef HAVE_UCOL_STRCOLLUTF8 Assert(GetDatabaseEncoding() != PG_UTF8); @@ -799,8 +885,6 @@ strnxfrm_prefix_icu(char *dest, size_t destsize, size_t uchar_bsize; Size result_bsize; - Assert(locale->provider == COLLPROVIDER_ICU); - /* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */ Assert(GetDatabaseEncoding() != PG_UTF8); diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index 199857e22db..e9f9fc1e369 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -34,6 +34,46 @@ #endif /* + * For the libc provider, to provide as much functionality as possible on a + * variety of platforms without going so far as to implement everything from + * scratch, we use several implementation strategies depending on the + * situation: + * + * 1. In C/POSIX collations, we use hard-wired code. We can't depend on + * the <ctype.h> functions since those will obey LC_CTYPE. Note that these + * collations don't give a fig about multibyte characters. + * + * 2. When working in UTF8 encoding, we use the <wctype.h> functions. + * This assumes that every platform uses Unicode codepoints directly + * as the wchar_t representation of Unicode. (XXX: ICU makes this assumption + * even for non-UTF8 encodings, which may be a problem.) On some platforms + * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF. + * + * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar + * values up to 255, and punt for values above that. This is 100% correct + * only in single-byte encodings such as LATINn. However, non-Unicode + * multibyte encodings are mostly Far Eastern character sets for which the + * properties being tested here aren't very relevant for higher code values + * anyway. The difficulty with using the <wctype.h> functions with + * non-Unicode multibyte encodings is that we can have no certainty that + * the platform's wchar_t representation matches what we do in pg_wchar + * conversions. + * + * As a special case, in the "default" collation, (2) and (3) force ASCII + * letters to follow ASCII upcase/downcase rules, while in a non-default + * collation we just let the library functions do what they will. The case + * where this matters is treatment of I/i in Turkish, and the behavior is + * meant to match the upper()/lower() SQL functions. + * + * We store the active collation setting in static variables. In principle + * it could be passed down to here via the regex library's "struct vars" data + * structure; but that would require somewhat invasive changes in the regex + * library, and right now there's no real benefit to be gained from that. + * + * NB: the coding here assumes pg_wchar is an unsigned type. + */ + +/* * Size of stack buffer to use for string transformations, used to avoid heap * allocations in typical cases. This should be large enough that most strings * will fit, but small enough that we feel comfortable putting it on the @@ -43,13 +83,6 @@ extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context); -extern size_t strlower_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - static int strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale); @@ -85,6 +118,251 @@ static size_t strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale); +static bool +wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isdigit_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isalpha_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isalnum_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isupper_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return islower_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isgraph_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isprint_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return ispunct_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isspace_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswdigit_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswalpha_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswalnum_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswupper_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswlower_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswgraph_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswprint_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswpunct_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswspace_l((wint_t) wc, locale->info.lt); +} + +static char +char_tolower_libc(unsigned char ch, pg_locale_t locale) +{ + Assert(pg_database_encoding_max_length() == 1); + return tolower_l(ch, locale->info.lt); +} + +static bool +char_is_cased_libc(char ch, pg_locale_t locale) +{ + bool is_multibyte = pg_database_encoding_max_length() > 1; + + if (is_multibyte && IS_HIGHBIT_SET(ch)) + return true; + else + return isalpha_l((unsigned char) ch, locale->info.lt); +} + +static pg_wchar +toupper_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() != PG_UTF8); + + /* force C behavior for ASCII characters, per comments above */ + if (locale->is_default && wc <= (pg_wchar) 127) + return pg_ascii_toupper((unsigned char) wc); + if (wc <= (pg_wchar) UCHAR_MAX) + return toupper_l((unsigned char) wc, locale->info.lt); + else + return wc; +} + +static pg_wchar +toupper_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() == PG_UTF8); + + /* force C behavior for ASCII characters, per comments above */ + if (locale->is_default && wc <= (pg_wchar) 127) + return pg_ascii_toupper((unsigned char) wc); + if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF) + return towupper_l((wint_t) wc, locale->info.lt); + else + return wc; +} + +static pg_wchar +tolower_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() != PG_UTF8); + + /* force C behavior for ASCII characters, per comments above */ + if (locale->is_default && wc <= (pg_wchar) 127) + return pg_ascii_tolower((unsigned char) wc); + if (wc <= (pg_wchar) UCHAR_MAX) + return tolower_l((unsigned char) wc, locale->info.lt); + else + return wc; +} + +static pg_wchar +tolower_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() == PG_UTF8); + + /* force C behavior for ASCII characters, per comments above */ + if (locale->is_default && wc <= (pg_wchar) 127) + return pg_ascii_tolower((unsigned char) wc); + if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF) + return towlower_l((wint_t) wc, locale->info.lt); + else + return wc; +} + +static const struct ctype_methods ctype_methods_libc_sb = { + .strlower = strlower_libc_sb, + .strtitle = strtitle_libc_sb, + .strupper = strupper_libc_sb, + .wc_isdigit = wc_isdigit_libc_sb, + .wc_isalpha = wc_isalpha_libc_sb, + .wc_isalnum = wc_isalnum_libc_sb, + .wc_isupper = wc_isupper_libc_sb, + .wc_islower = wc_islower_libc_sb, + .wc_isgraph = wc_isgraph_libc_sb, + .wc_isprint = wc_isprint_libc_sb, + .wc_ispunct = wc_ispunct_libc_sb, + .wc_isspace = wc_isspace_libc_sb, + .char_is_cased = char_is_cased_libc, + .char_tolower = char_tolower_libc, + .wc_toupper = toupper_libc_sb, + .wc_tolower = tolower_libc_sb, + .max_chr = UCHAR_MAX, +}; + +/* + * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but + * single-byte semantics for pattern matching. + */ +static const struct ctype_methods ctype_methods_libc_other_mb = { + .strlower = strlower_libc_mb, + .strtitle = strtitle_libc_mb, + .strupper = strupper_libc_mb, + .wc_isdigit = wc_isdigit_libc_sb, + .wc_isalpha = wc_isalpha_libc_sb, + .wc_isalnum = wc_isalnum_libc_sb, + .wc_isupper = wc_isupper_libc_sb, + .wc_islower = wc_islower_libc_sb, + .wc_isgraph = wc_isgraph_libc_sb, + .wc_isprint = wc_isprint_libc_sb, + .wc_ispunct = wc_ispunct_libc_sb, + .wc_isspace = wc_isspace_libc_sb, + .char_is_cased = char_is_cased_libc, + .char_tolower = char_tolower_libc, + .wc_toupper = toupper_libc_sb, + .wc_tolower = tolower_libc_sb, + .max_chr = UCHAR_MAX, +}; + +static const struct ctype_methods ctype_methods_libc_utf8 = { + .strlower = strlower_libc_mb, + .strtitle = strtitle_libc_mb, + .strupper = strupper_libc_mb, + .wc_isdigit = wc_isdigit_libc_mb, + .wc_isalpha = wc_isalpha_libc_mb, + .wc_isalnum = wc_isalnum_libc_mb, + .wc_isupper = wc_isupper_libc_mb, + .wc_islower = wc_islower_libc_mb, + .wc_isgraph = wc_isgraph_libc_mb, + .wc_isprint = wc_isprint_libc_mb, + .wc_ispunct = wc_ispunct_libc_mb, + .wc_isspace = wc_isspace_libc_mb, + .char_is_cased = char_is_cased_libc, + .char_tolower = char_tolower_libc, + .wc_toupper = toupper_libc_mb, + .wc_tolower = tolower_libc_mb, +}; + static const struct collate_methods collate_methods_libc = { .strncoll = strncoll_libc, .strnxfrm = strnxfrm_libc, @@ -119,36 +397,6 @@ static const struct collate_methods collate_methods_libc_win32_utf8 = { }; #endif -size_t -strlower_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale) -{ - if (pg_database_encoding_max_length() > 1) - return strlower_libc_mb(dst, dstsize, src, srclen, locale); - else - return strlower_libc_sb(dst, dstsize, src, srclen, locale); -} - -size_t -strtitle_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale) -{ - if (pg_database_encoding_max_length() > 1) - return strtitle_libc_mb(dst, dstsize, src, srclen, locale); - else - return strtitle_libc_sb(dst, dstsize, src, srclen, locale); -} - -size_t -strupper_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale) -{ - if (pg_database_encoding_max_length() > 1) - return strupper_libc_mb(dst, dstsize, src, srclen, locale); - else - return strupper_libc_sb(dst, dstsize, src, srclen, locale); -} - static size_t strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) @@ -465,7 +713,6 @@ create_pg_locale_libc(Oid collid, MemoryContext context) loc = make_libc_collator(collate, ctype); result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); - result->provider = COLLPROVIDER_LIBC; result->deterministic = true; result->collate_is_c = (strcmp(collate, "C") == 0) || (strcmp(collate, "POSIX") == 0); @@ -481,6 +728,15 @@ create_pg_locale_libc(Oid collid, MemoryContext context) #endif result->collate = &collate_methods_libc; } + if (!result->ctype_is_c) + { + if (GetDatabaseEncoding() == PG_UTF8) + result->ctype = &ctype_methods_libc_utf8; + else if (pg_database_encoding_max_length() > 1) + result->ctype = &ctype_methods_libc_other_mb; + else + result->ctype = &ctype_methods_libc_sb; + } return result; } @@ -576,8 +832,6 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, const char *arg2n; int result; - Assert(locale->provider == COLLPROVIDER_LIBC); - if (bufsize1 + bufsize2 > TEXTBUFLEN) buf = palloc(bufsize1 + bufsize2); @@ -632,8 +886,6 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t bufsize = srclen + 1; size_t result; - Assert(locale->provider == COLLPROVIDER_LIBC); - if (srclen == -1) return strxfrm_l(dest, src, destsize, locale->info.lt); @@ -742,7 +994,6 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2, int r; int result; - Assert(locale->provider == COLLPROVIDER_LIBC); Assert(GetDatabaseEncoding() == PG_UTF8); if (len1 == -1) diff --git a/src/backend/utils/adt/regproc.c b/src/backend/utils/adt/regproc.c index 5ee608a2b39..b8bbe95e82e 100644 --- a/src/backend/utils/adt/regproc.c +++ b/src/backend/utils/adt/regproc.c @@ -30,6 +30,7 @@ #include "catalog/pg_ts_config.h" #include "catalog/pg_ts_dict.h" #include "catalog/pg_type.h" +#include "commands/dbcommands.h" #include "lib/stringinfo.h" #include "mb/pg_wchar.h" #include "miscadmin.h" @@ -1764,6 +1765,123 @@ regnamespacesend(PG_FUNCTION_ARGS) } /* + * regdatabasein - converts database name to database OID + * + * We also accept a numeric OID, for symmetry with the output routine. + * + * '-' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_database entry. + */ +Datum +regdatabasein(PG_FUNCTION_ARGS) +{ + char *db_name_or_oid = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + Oid result; + List *names; + + /* Handle "-" or numeric OID */ + if (parseDashOrOid(db_name_or_oid, &result, escontext)) + PG_RETURN_OID(result); + + /* The rest of this wouldn't work in bootstrap mode */ + if (IsBootstrapProcessingMode()) + elog(ERROR, "regdatabase values must be OIDs in bootstrap mode"); + + /* Normal case: see if the name matches any pg_database entry. */ + names = stringToQualifiedNameList(db_name_or_oid, escontext); + if (names == NIL) + PG_RETURN_NULL(); + + if (list_length(names) != 1) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_NAME), + errmsg("invalid name syntax"))); + + result = get_database_oid(strVal(linitial(names)), true); + + if (!OidIsValid(result)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("database \"%s\" does not exist", + strVal(linitial(names))))); + + PG_RETURN_OID(result); +} + +/* + * to_regdatabase - converts database name to database OID + * + * If the name is not found, we return NULL. + */ +Datum +to_regdatabase(PG_FUNCTION_ARGS) +{ + char *db_name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + Datum result; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!DirectInputFunctionCallSafe(regdatabasein, db_name, + InvalidOid, -1, + (Node *) &escontext, + &result)) + PG_RETURN_NULL(); + PG_RETURN_DATUM(result); +} + +/* + * regdatabaseout - converts database OID to database name + */ +Datum +regdatabaseout(PG_FUNCTION_ARGS) +{ + Oid dboid = PG_GETARG_OID(0); + char *result; + + if (dboid == InvalidOid) + { + result = pstrdup("-"); + PG_RETURN_CSTRING(result); + } + + result = get_database_name(dboid); + + if (result) + { + /* pstrdup is not really necessary, but it avoids a compiler warning */ + result = pstrdup(quote_identifier(result)); + } + else + { + /* If OID doesn't match any database, return it numerically */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", dboid); + } + + PG_RETURN_CSTRING(result); +} + +/* + * regdatabaserecv - converts external binary format to regdatabase + */ +Datum +regdatabaserecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regdatabasesend - converts regdatabase to binary format + */ +Datum +regdatabasesend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + +/* * text_regclass: convert text to regclass * * This could be replaced by CoerceViaIO, except that we need to treat diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 1e0f2de0336..ce6a626eba2 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -4619,6 +4619,7 @@ convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue, case REGDICTIONARYOID: case REGROLEOID: case REGNAMESPACEOID: + case REGDATABASEOID: *scaledvalue = convert_numeric_to_scalar(value, valuetypid, &failure); *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid, @@ -4751,6 +4752,7 @@ convert_numeric_to_scalar(Datum value, Oid typid, bool *failure) case REGDICTIONARYOID: case REGROLEOID: case REGNAMESPACEOID: + case REGDATABASEOID: /* we can treat OIDs as integers... */ return (double) DatumGetObjectId(value); } diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 347089b7626..0a5848a4ab2 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -6477,7 +6477,7 @@ timestamp2timestamptz_opt_overflow(Timestamp timestamp, int *overflow) if (TIMESTAMP_NOT_FINITE(timestamp)) return timestamp; - /* We don't expect this to fail, but check it pro forma */ + /* timestamp2tm should not fail on valid timestamps, but cope */ if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) == 0) { tz = DetermineTimeZoneOffset(tm, session_timezone); @@ -6485,23 +6485,22 @@ timestamp2timestamptz_opt_overflow(Timestamp timestamp, int *overflow) result = dt2local(timestamp, -tz); if (IS_VALID_TIMESTAMP(result)) - { return result; + } + + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + TIMESTAMP_NOBEGIN(result); } - else if (overflow) + else { - if (result < MIN_TIMESTAMP) - { - *overflow = -1; - TIMESTAMP_NOBEGIN(result); - } - else - { - *overflow = 1; - TIMESTAMP_NOEND(result); - } - return result; + *overflow = 1; + TIMESTAMP_NOEND(result); } + return result; } ereport(ERROR, @@ -6531,27 +6530,81 @@ timestamptz_timestamp(PG_FUNCTION_ARGS) PG_RETURN_TIMESTAMP(timestamptz2timestamp(timestamp)); } +/* + * Convert timestamptz to timestamp, throwing error for overflow. + */ static Timestamp timestamptz2timestamp(TimestampTz timestamp) { + return timestamptz2timestamp_opt_overflow(timestamp, NULL); +} + +/* + * Convert timestamp with time zone to timestamp. + * + * On successful conversion, *overflow is set to zero if it's not NULL. + * + * If the timestamptz is finite but out of the valid range for timestamp, then: + * if overflow is NULL, we throw an out-of-range error. + * if overflow is not NULL, we store +1 or -1 there to indicate the sign + * of the overflow, and return the appropriate timestamp infinity. + */ +Timestamp +timestamptz2timestamp_opt_overflow(TimestampTz timestamp, int *overflow) +{ Timestamp result; struct pg_tm tt, *tm = &tt; fsec_t fsec; int tz; + if (overflow) + *overflow = 0; + if (TIMESTAMP_NOT_FINITE(timestamp)) result = timestamp; else { if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0) + { + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + TIMESTAMP_NOBEGIN(result); + } + else + { + *overflow = 1; + TIMESTAMP_NOEND(result); + } + return result; + } ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); + } if (tm2timestamp(tm, fsec, NULL, &result) != 0) + { + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + TIMESTAMP_NOBEGIN(result); + } + else + { + *overflow = 1; + TIMESTAMP_NOEND(result); + } + return result; + } ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); + } } return result; } diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 3e4d5568bde..ffae8c23abf 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -35,7 +35,6 @@ #include "port/pg_bswap.h" #include "regex/regex.h" #include "utils/builtins.h" -#include "utils/bytea.h" #include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/memutils.h" @@ -43,10 +42,6 @@ #include "utils/sortsupport.h" #include "utils/varlena.h" - -/* GUC variable */ -int bytea_output = BYTEA_OUTPUT_HEX; - typedef struct varlena VarString; /* @@ -148,12 +143,6 @@ static int text_position_get_match_pos(TextPositionState *state); static void text_position_cleanup(TextPositionState *state); static void check_collation_set(Oid collid); static int text_cmp(text *arg1, text *arg2, Oid collid); -static bytea *bytea_catenate(bytea *t1, bytea *t2); -static bytea *bytea_substring(Datum str, - int S, - int L, - bool length_not_specified); -static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl); static void appendStringInfoText(StringInfo str, const text *t); static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate); static void split_text_accum_result(SplitTextOutputData *tstate, @@ -279,307 +268,6 @@ text_to_cstring_buffer(const text *src, char *dst, size_t dst_len) * USER I/O ROUTINES * *****************************************************************************/ - -#define VAL(CH) ((CH) - '0') -#define DIG(VAL) ((VAL) + '0') - -/* - * byteain - converts from printable representation of byte array - * - * Non-printable characters must be passed as '\nnn' (octal) and are - * converted to internal form. '\' must be passed as '\\'. - * ereport(ERROR, ...) if bad form. - * - * BUGS: - * The input is scanned twice. - * The error checking of input is minimal. - */ -Datum -byteain(PG_FUNCTION_ARGS) -{ - char *inputText = PG_GETARG_CSTRING(0); - Node *escontext = fcinfo->context; - char *tp; - char *rp; - int bc; - bytea *result; - - /* Recognize hex input */ - if (inputText[0] == '\\' && inputText[1] == 'x') - { - size_t len = strlen(inputText); - - bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */ - result = palloc(bc); - bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result), - escontext); - SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ - - PG_RETURN_BYTEA_P(result); - } - - /* Else, it's the traditional escaped style */ - for (bc = 0, tp = inputText; *tp != '\0'; bc++) - { - if (tp[0] != '\\') - tp++; - else if ((tp[0] == '\\') && - (tp[1] >= '0' && tp[1] <= '3') && - (tp[2] >= '0' && tp[2] <= '7') && - (tp[3] >= '0' && tp[3] <= '7')) - tp += 4; - else if ((tp[0] == '\\') && - (tp[1] == '\\')) - tp += 2; - else - { - /* - * one backslash, not followed by another or ### valid octal - */ - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type %s", "bytea"))); - } - } - - bc += VARHDRSZ; - - result = (bytea *) palloc(bc); - SET_VARSIZE(result, bc); - - tp = inputText; - rp = VARDATA(result); - while (*tp != '\0') - { - if (tp[0] != '\\') - *rp++ = *tp++; - else if ((tp[0] == '\\') && - (tp[1] >= '0' && tp[1] <= '3') && - (tp[2] >= '0' && tp[2] <= '7') && - (tp[3] >= '0' && tp[3] <= '7')) - { - bc = VAL(tp[1]); - bc <<= 3; - bc += VAL(tp[2]); - bc <<= 3; - *rp++ = bc + VAL(tp[3]); - - tp += 4; - } - else if ((tp[0] == '\\') && - (tp[1] == '\\')) - { - *rp++ = '\\'; - tp += 2; - } - else - { - /* - * We should never get here. The first pass should not allow it. - */ - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type %s", "bytea"))); - } - } - - PG_RETURN_BYTEA_P(result); -} - -/* - * byteaout - converts to printable representation of byte array - * - * In the traditional escaped format, non-printable characters are - * printed as '\nnn' (octal) and '\' as '\\'. - */ -Datum -byteaout(PG_FUNCTION_ARGS) -{ - bytea *vlena = PG_GETARG_BYTEA_PP(0); - char *result; - char *rp; - - if (bytea_output == BYTEA_OUTPUT_HEX) - { - /* Print hex format */ - rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); - *rp++ = '\\'; - *rp++ = 'x'; - rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); - } - else if (bytea_output == BYTEA_OUTPUT_ESCAPE) - { - /* Print traditional escaped format */ - char *vp; - uint64 len; - int i; - - len = 1; /* empty string has 1 char */ - vp = VARDATA_ANY(vlena); - for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) - { - if (*vp == '\\') - len += 2; - else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) - len += 4; - else - len++; - } - - /* - * In principle len can't overflow uint32 if the input fit in 1GB, but - * for safety let's check rather than relying on palloc's internal - * check. - */ - if (len > MaxAllocSize) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg_internal("result of bytea output conversion is too large"))); - rp = result = (char *) palloc(len); - - vp = VARDATA_ANY(vlena); - for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) - { - if (*vp == '\\') - { - *rp++ = '\\'; - *rp++ = '\\'; - } - else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) - { - int val; /* holds unprintable chars */ - - val = *vp; - rp[0] = '\\'; - rp[3] = DIG(val & 07); - val >>= 3; - rp[2] = DIG(val & 07); - val >>= 3; - rp[1] = DIG(val & 03); - rp += 4; - } - else - *rp++ = *vp; - } - } - else - { - elog(ERROR, "unrecognized \"bytea_output\" setting: %d", - bytea_output); - rp = result = NULL; /* keep compiler quiet */ - } - *rp = '\0'; - PG_RETURN_CSTRING(result); -} - -/* - * bytearecv - converts external binary format to bytea - */ -Datum -bytearecv(PG_FUNCTION_ARGS) -{ - StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); - bytea *result; - int nbytes; - - nbytes = buf->len - buf->cursor; - result = (bytea *) palloc(nbytes + VARHDRSZ); - SET_VARSIZE(result, nbytes + VARHDRSZ); - pq_copymsgbytes(buf, VARDATA(result), nbytes); - PG_RETURN_BYTEA_P(result); -} - -/* - * byteasend - converts bytea to binary format - * - * This is a special case: just copy the input... - */ -Datum -byteasend(PG_FUNCTION_ARGS) -{ - bytea *vlena = PG_GETARG_BYTEA_P_COPY(0); - - PG_RETURN_BYTEA_P(vlena); -} - -Datum -bytea_string_agg_transfn(PG_FUNCTION_ARGS) -{ - StringInfo state; - - state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); - - /* Append the value unless null, preceding it with the delimiter. */ - if (!PG_ARGISNULL(1)) - { - bytea *value = PG_GETARG_BYTEA_PP(1); - bool isfirst = false; - - /* - * You might think we can just throw away the first delimiter, however - * we must keep it as we may be a parallel worker doing partial - * aggregation building a state to send to the main process. We need - * to keep the delimiter of every aggregation so that the combine - * function can properly join up the strings of two separately - * partially aggregated results. The first delimiter is only stripped - * off in the final function. To know how much to strip off the front - * of the string, we store the length of the first delimiter in the - * StringInfo's cursor field, which we don't otherwise need here. - */ - if (state == NULL) - { - state = makeStringAggState(fcinfo); - isfirst = true; - } - - if (!PG_ARGISNULL(2)) - { - bytea *delim = PG_GETARG_BYTEA_PP(2); - - appendBinaryStringInfo(state, VARDATA_ANY(delim), - VARSIZE_ANY_EXHDR(delim)); - if (isfirst) - state->cursor = VARSIZE_ANY_EXHDR(delim); - } - - appendBinaryStringInfo(state, VARDATA_ANY(value), - VARSIZE_ANY_EXHDR(value)); - } - - /* - * The transition type for string_agg() is declared to be "internal", - * which is a pass-by-value type the same size as a pointer. - */ - if (state) - PG_RETURN_POINTER(state); - PG_RETURN_NULL(); -} - -Datum -bytea_string_agg_finalfn(PG_FUNCTION_ARGS) -{ - StringInfo state; - - /* cannot be called directly because of internal-type argument */ - Assert(AggCheckCallContext(fcinfo, NULL)); - - state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); - - if (state != NULL) - { - /* As per comment in transfn, strip data before the cursor position */ - bytea *result; - int strippedlen = state->len - state->cursor; - - result = (bytea *) palloc(strippedlen + VARHDRSZ); - SET_VARSIZE(result, strippedlen + VARHDRSZ); - memcpy(VARDATA(result), &state->data[state->cursor], strippedlen); - PG_RETURN_BYTEA_P(result); - } - else - PG_RETURN_NULL(); -} - /* * textin - converts cstring to internal representation */ @@ -2959,467 +2647,6 @@ bttext_pattern_sortsupport(PG_FUNCTION_ARGS) } -/*------------------------------------------------------------- - * byteaoctetlen - * - * get the number of bytes contained in an instance of type 'bytea' - *------------------------------------------------------------- - */ -Datum -byteaoctetlen(PG_FUNCTION_ARGS) -{ - Datum str = PG_GETARG_DATUM(0); - - /* We need not detoast the input at all */ - PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ); -} - -/* - * byteacat - - * takes two bytea* and returns a bytea* that is the concatenation of - * the two. - * - * Cloned from textcat and modified as required. - */ -Datum -byteacat(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - - PG_RETURN_BYTEA_P(bytea_catenate(t1, t2)); -} - -/* - * bytea_catenate - * Guts of byteacat(), broken out so it can be used by other functions - * - * Arguments can be in short-header form, but not compressed or out-of-line - */ -static bytea * -bytea_catenate(bytea *t1, bytea *t2) -{ - bytea *result; - int len1, - len2, - len; - char *ptr; - - len1 = VARSIZE_ANY_EXHDR(t1); - len2 = VARSIZE_ANY_EXHDR(t2); - - /* paranoia ... probably should throw error instead? */ - if (len1 < 0) - len1 = 0; - if (len2 < 0) - len2 = 0; - - len = len1 + len2 + VARHDRSZ; - result = (bytea *) palloc(len); - - /* Set size of result string... */ - SET_VARSIZE(result, len); - - /* Fill data field of result string... */ - ptr = VARDATA(result); - if (len1 > 0) - memcpy(ptr, VARDATA_ANY(t1), len1); - if (len2 > 0) - memcpy(ptr + len1, VARDATA_ANY(t2), len2); - - return result; -} - -#define PG_STR_GET_BYTEA(str_) \ - DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_))) - -/* - * bytea_substr() - * Return a substring starting at the specified position. - * Cloned from text_substr and modified as required. - * - * Input: - * - string - * - starting position (is one-based) - * - string length (optional) - * - * If the starting position is zero or less, then return from the start of the string - * adjusting the length to be consistent with the "negative start" per SQL. - * If the length is less than zero, an ERROR is thrown. If no third argument - * (length) is provided, the length to the end of the string is assumed. - */ -Datum -bytea_substr(PG_FUNCTION_ARGS) -{ - PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), - PG_GETARG_INT32(1), - PG_GETARG_INT32(2), - false)); -} - -/* - * bytea_substr_no_len - - * Wrapper to avoid opr_sanity failure due to - * one function accepting a different number of args. - */ -Datum -bytea_substr_no_len(PG_FUNCTION_ARGS) -{ - PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), - PG_GETARG_INT32(1), - -1, - true)); -} - -static bytea * -bytea_substring(Datum str, - int S, - int L, - bool length_not_specified) -{ - int32 S1; /* adjusted start position */ - int32 L1; /* adjusted substring length */ - int32 E; /* end position */ - - /* - * The logic here should generally match text_substring(). - */ - S1 = Max(S, 1); - - if (length_not_specified) - { - /* - * Not passed a length - DatumGetByteaPSlice() grabs everything to the - * end of the string if we pass it a negative value for length. - */ - L1 = -1; - } - else if (L < 0) - { - /* SQL99 says to throw an error for E < S, i.e., negative length */ - ereport(ERROR, - (errcode(ERRCODE_SUBSTRING_ERROR), - errmsg("negative substring length not allowed"))); - L1 = -1; /* silence stupider compilers */ - } - else if (pg_add_s32_overflow(S, L, &E)) - { - /* - * L could be large enough for S + L to overflow, in which case the - * substring must run to end of string. - */ - L1 = -1; - } - else - { - /* - * A zero or negative value for the end position can happen if the - * start was negative or one. SQL99 says to return a zero-length - * string. - */ - if (E < 1) - return PG_STR_GET_BYTEA(""); - - L1 = E - S1; - } - - /* - * If the start position is past the end of the string, SQL99 says to - * return a zero-length string -- DatumGetByteaPSlice() will do that for - * us. We need only convert S1 to zero-based starting position. - */ - return DatumGetByteaPSlice(str, S1 - 1, L1); -} - -/* - * byteaoverlay - * Replace specified substring of first string with second - * - * The SQL standard defines OVERLAY() in terms of substring and concatenation. - * This code is a direct implementation of what the standard says. - */ -Datum -byteaoverlay(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - int sp = PG_GETARG_INT32(2); /* substring start position */ - int sl = PG_GETARG_INT32(3); /* substring length */ - - PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); -} - -Datum -byteaoverlay_no_len(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - int sp = PG_GETARG_INT32(2); /* substring start position */ - int sl; - - sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */ - PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); -} - -static bytea * -bytea_overlay(bytea *t1, bytea *t2, int sp, int sl) -{ - bytea *result; - bytea *s1; - bytea *s2; - int sp_pl_sl; - - /* - * Check for possible integer-overflow cases. For negative sp, throw a - * "substring length" error because that's what should be expected - * according to the spec's definition of OVERLAY(). - */ - if (sp <= 0) - ereport(ERROR, - (errcode(ERRCODE_SUBSTRING_ERROR), - errmsg("negative substring length not allowed"))); - if (pg_add_s32_overflow(sp, sl, &sp_pl_sl)) - ereport(ERROR, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("integer out of range"))); - - s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false); - s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true); - result = bytea_catenate(s1, t2); - result = bytea_catenate(result, s2); - - return result; -} - -/* - * bit_count - */ -Datum -bytea_bit_count(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - - PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1))); -} - -/* - * byteapos - - * Return the position of the specified substring. - * Implements the SQL POSITION() function. - * Cloned from textpos and modified as required. - */ -Datum -byteapos(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - int pos; - int px, - p; - int len1, - len2; - char *p1, - *p2; - - len1 = VARSIZE_ANY_EXHDR(t1); - len2 = VARSIZE_ANY_EXHDR(t2); - - if (len2 <= 0) - PG_RETURN_INT32(1); /* result for empty pattern */ - - p1 = VARDATA_ANY(t1); - p2 = VARDATA_ANY(t2); - - pos = 0; - px = (len1 - len2); - for (p = 0; p <= px; p++) - { - if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0)) - { - pos = p + 1; - break; - }; - p1++; - }; - - PG_RETURN_INT32(pos); -} - -/*------------------------------------------------------------- - * byteaGetByte - * - * this routine treats "bytea" as an array of bytes. - * It returns the Nth byte (a number between 0 and 255). - *------------------------------------------------------------- - */ -Datum -byteaGetByte(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int32 n = PG_GETARG_INT32(1); - int len; - int byte; - - len = VARSIZE_ANY_EXHDR(v); - - if (n < 0 || n >= len) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %d out of valid range, 0..%d", - n, len - 1))); - - byte = ((unsigned char *) VARDATA_ANY(v))[n]; - - PG_RETURN_INT32(byte); -} - -/*------------------------------------------------------------- - * byteaGetBit - * - * This routine treats a "bytea" type like an array of bits. - * It returns the value of the Nth bit (0 or 1). - * - *------------------------------------------------------------- - */ -Datum -byteaGetBit(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int64 n = PG_GETARG_INT64(1); - int byteNo, - bitNo; - int len; - int byte; - - len = VARSIZE_ANY_EXHDR(v); - - if (n < 0 || n >= (int64) len * 8) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, - n, (int64) len * 8 - 1))); - - /* n/8 is now known < len, so safe to cast to int */ - byteNo = (int) (n / 8); - bitNo = (int) (n % 8); - - byte = ((unsigned char *) VARDATA_ANY(v))[byteNo]; - - if (byte & (1 << bitNo)) - PG_RETURN_INT32(1); - else - PG_RETURN_INT32(0); -} - -/*------------------------------------------------------------- - * byteaSetByte - * - * Given an instance of type 'bytea' creates a new one with - * the Nth byte set to the given value. - * - *------------------------------------------------------------- - */ -Datum -byteaSetByte(PG_FUNCTION_ARGS) -{ - bytea *res = PG_GETARG_BYTEA_P_COPY(0); - int32 n = PG_GETARG_INT32(1); - int32 newByte = PG_GETARG_INT32(2); - int len; - - len = VARSIZE(res) - VARHDRSZ; - - if (n < 0 || n >= len) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %d out of valid range, 0..%d", - n, len - 1))); - - /* - * Now set the byte. - */ - ((unsigned char *) VARDATA(res))[n] = newByte; - - PG_RETURN_BYTEA_P(res); -} - -/*------------------------------------------------------------- - * byteaSetBit - * - * Given an instance of type 'bytea' creates a new one with - * the Nth bit set to the given value. - * - *------------------------------------------------------------- - */ -Datum -byteaSetBit(PG_FUNCTION_ARGS) -{ - bytea *res = PG_GETARG_BYTEA_P_COPY(0); - int64 n = PG_GETARG_INT64(1); - int32 newBit = PG_GETARG_INT32(2); - int len; - int oldByte, - newByte; - int byteNo, - bitNo; - - len = VARSIZE(res) - VARHDRSZ; - - if (n < 0 || n >= (int64) len * 8) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, - n, (int64) len * 8 - 1))); - - /* n/8 is now known < len, so safe to cast to int */ - byteNo = (int) (n / 8); - bitNo = (int) (n % 8); - - /* - * sanity check! - */ - if (newBit != 0 && newBit != 1) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("new bit must be 0 or 1"))); - - /* - * Update the byte. - */ - oldByte = ((unsigned char *) VARDATA(res))[byteNo]; - - if (newBit == 0) - newByte = oldByte & (~(1 << bitNo)); - else - newByte = oldByte | (1 << bitNo); - - ((unsigned char *) VARDATA(res))[byteNo] = newByte; - - PG_RETURN_BYTEA_P(res); -} - -/* - * Return reversed bytea - */ -Datum -bytea_reverse(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - const char *p = VARDATA_ANY(v); - int len = VARSIZE_ANY_EXHDR(v); - const char *endp = p + len; - bytea *result = palloc(len + VARHDRSZ); - char *dst = (char *) VARDATA(result) + len; - - SET_VARSIZE(result, len + VARHDRSZ); - - while (p < endp) - *(--dst) = *p++; - - PG_RETURN_BYTEA_P(result); -} - - /* text_name() * Converts a text type to a Name type. */ @@ -3849,331 +3076,6 @@ SplitGUCList(char *rawstring, char separator, return true; } - -/***************************************************************************** - * Comparison Functions used for bytea - * - * Note: btree indexes need these routines not to leak memory; therefore, - * be careful to free working copies of toasted datums. Most places don't - * need to be so careful. - *****************************************************************************/ - -Datum -byteaeq(PG_FUNCTION_ARGS) -{ - Datum arg1 = PG_GETARG_DATUM(0); - Datum arg2 = PG_GETARG_DATUM(1); - bool result; - Size len1, - len2; - - /* - * We can use a fast path for unequal lengths, which might save us from - * having to detoast one or both values. - */ - len1 = toast_raw_datum_size(arg1); - len2 = toast_raw_datum_size(arg2); - if (len1 != len2) - result = false; - else - { - bytea *barg1 = DatumGetByteaPP(arg1); - bytea *barg2 = DatumGetByteaPP(arg2); - - result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), - len1 - VARHDRSZ) == 0); - - PG_FREE_IF_COPY(barg1, 0); - PG_FREE_IF_COPY(barg2, 1); - } - - PG_RETURN_BOOL(result); -} - -Datum -byteane(PG_FUNCTION_ARGS) -{ - Datum arg1 = PG_GETARG_DATUM(0); - Datum arg2 = PG_GETARG_DATUM(1); - bool result; - Size len1, - len2; - - /* - * We can use a fast path for unequal lengths, which might save us from - * having to detoast one or both values. - */ - len1 = toast_raw_datum_size(arg1); - len2 = toast_raw_datum_size(arg2); - if (len1 != len2) - result = true; - else - { - bytea *barg1 = DatumGetByteaPP(arg1); - bytea *barg2 = DatumGetByteaPP(arg2); - - result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), - len1 - VARHDRSZ) != 0); - - PG_FREE_IF_COPY(barg1, 0); - PG_FREE_IF_COPY(barg2, 1); - } - - PG_RETURN_BOOL(result); -} - -Datum -bytealt(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2))); -} - -Datum -byteale(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2))); -} - -Datum -byteagt(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2))); -} - -Datum -byteage(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2))); -} - -Datum -byteacmp(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - if ((cmp == 0) && (len1 != len2)) - cmp = (len1 < len2) ? -1 : 1; - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_INT32(cmp); -} - -Datum -bytea_larger(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - bytea *result; - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2); - - PG_RETURN_BYTEA_P(result); -} - -Datum -bytea_smaller(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - bytea *result; - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2); - - PG_RETURN_BYTEA_P(result); -} - -Datum -bytea_sortsupport(PG_FUNCTION_ARGS) -{ - SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); - MemoryContext oldcontext; - - oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); - - /* Use generic string SortSupport, forcing "C" collation */ - varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID); - - MemoryContextSwitchTo(oldcontext); - - PG_RETURN_VOID(); -} - -/* Cast bytea -> int2 */ -Datum -bytea_int2(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int len = VARSIZE_ANY_EXHDR(v); - uint16 result; - - /* Check that the byte array is not too long */ - if (len > sizeof(result)) - ereport(ERROR, - errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("smallint out of range")); - - /* Convert it to an integer; most significant bytes come first */ - result = 0; - for (int i = 0; i < len; i++) - { - result <<= BITS_PER_BYTE; - result |= ((unsigned char *) VARDATA_ANY(v))[i]; - } - - PG_RETURN_INT16(result); -} - -/* Cast bytea -> int4 */ -Datum -bytea_int4(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int len = VARSIZE_ANY_EXHDR(v); - uint32 result; - - /* Check that the byte array is not too long */ - if (len > sizeof(result)) - ereport(ERROR, - errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("integer out of range")); - - /* Convert it to an integer; most significant bytes come first */ - result = 0; - for (int i = 0; i < len; i++) - { - result <<= BITS_PER_BYTE; - result |= ((unsigned char *) VARDATA_ANY(v))[i]; - } - - PG_RETURN_INT32(result); -} - -/* Cast bytea -> int8 */ -Datum -bytea_int8(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int len = VARSIZE_ANY_EXHDR(v); - uint64 result; - - /* Check that the byte array is not too long */ - if (len > sizeof(result)) - ereport(ERROR, - errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("bigint out of range")); - - /* Convert it to an integer; most significant bytes come first */ - result = 0; - for (int i = 0; i < len; i++) - { - result <<= BITS_PER_BYTE; - result |= ((unsigned char *) VARDATA_ANY(v))[i]; - } - - PG_RETURN_INT64(result); -} - -/* Cast int2 -> bytea; can just use int2send() */ -Datum -int2_bytea(PG_FUNCTION_ARGS) -{ - return int2send(fcinfo); -} - -/* Cast int4 -> bytea; can just use int4send() */ -Datum -int4_bytea(PG_FUNCTION_ARGS) -{ - return int4send(fcinfo); -} - -/* Cast int8 -> bytea; can just use int8send() */ -Datum -int8_bytea(PG_FUNCTION_ARGS) -{ - return int8send(fcinfo); -} - /* * appendStringInfoText * diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index a4150bff2ea..2bd39b6ac4b 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -529,14 +529,36 @@ xmltext(PG_FUNCTION_ARGS) #ifdef USE_LIBXML text *arg = PG_GETARG_TEXT_PP(0); text *result; - xmlChar *xmlbuf = NULL; + volatile xmlChar *xmlbuf = NULL; + PgXmlErrorContext *xmlerrcxt; + + /* Otherwise, we gotta spin up some error handling. */ + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); - xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg)); + PG_TRY(); + { + xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg)); - Assert(xmlbuf); + if (xmlbuf == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlChar"); + + result = cstring_to_text_with_len((const char *) xmlbuf, + xmlStrlen((const xmlChar *) xmlbuf)); + } + PG_CATCH(); + { + if (xmlbuf) + xmlFree((xmlChar *) xmlbuf); + + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlFree((xmlChar *) xmlbuf); + pg_xml_done(xmlerrcxt, false); - result = cstring_to_text_with_len((const char *) xmlbuf, xmlStrlen(xmlbuf)); - xmlFree(xmlbuf); PG_RETURN_XML_P(result); #else NO_XML_SUPPORT(); @@ -770,7 +792,10 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent) if (oldroot != NULL) xmlFreeNode(oldroot); - xmlAddChildList(root, content_nodes); + if (xmlAddChildList(root, content_nodes) == NULL || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not append xml node list"); /* * We use this node to insert newlines in the dump. Note: in at @@ -931,7 +956,10 @@ xmlelement(XmlExpr *xexpr, xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate xmlTextWriter"); - xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name); + if (xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not start xml element"); forboth(arg, named_arg_strings, narg, xexpr->arg_names) { @@ -939,19 +967,30 @@ xmlelement(XmlExpr *xexpr, char *argname = strVal(lfirst(narg)); if (str) - xmlTextWriterWriteAttribute(writer, - (xmlChar *) argname, - (xmlChar *) str); + { + if (xmlTextWriterWriteAttribute(writer, + (xmlChar *) argname, + (xmlChar *) str) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not write xml attribute"); + } } foreach(arg, arg_strings) { char *str = (char *) lfirst(arg); - xmlTextWriterWriteRaw(writer, (xmlChar *) str); + if (xmlTextWriterWriteRaw(writer, (xmlChar *) str) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not write raw xml text"); } - xmlTextWriterEndElement(writer); + if (xmlTextWriterEndElement(writer) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not end xml element"); /* we MUST do this now to flush data out to the buffer ... */ xmlFreeTextWriter(writer); @@ -4220,20 +4259,27 @@ xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt) } else { - xmlChar *str; + volatile xmlChar *str = NULL; - str = xmlXPathCastNodeToString(cur); PG_TRY(); { + char *escaped; + + str = xmlXPathCastNodeToString(cur); + if (str == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlChar"); + /* Here we rely on XML having the same representation as TEXT */ - char *escaped = escape_xml((char *) str); + escaped = escape_xml((char *) str); result = (xmltype *) cstring_to_text(escaped); pfree(escaped); } PG_FINALLY(); { - xmlFree(str); + if (str) + xmlFree((xmlChar *) str); } PG_END_TRY(); } diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 657648996c2..d1b25214376 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -317,6 +317,7 @@ GetCCHashEqFuncs(Oid keytype, CCHashFN *hashfunc, RegProcedure *eqfunc, CCFastEq case REGDICTIONARYOID: case REGROLEOID: case REGNAMESPACEOID: + case REGDATABASEOID: *hashfunc = int4hashfast; *fasteqfunc = int4eqfast; *eqfunc = F_OIDEQ; diff --git a/src/backend/utils/misc/injection_point.c b/src/backend/utils/misc/injection_point.c index f58ebc8ee52..83b887b6978 100644 --- a/src/backend/utils/misc/injection_point.c +++ b/src/backend/utils/misc/injection_point.c @@ -584,3 +584,49 @@ IsInjectionPointAttached(const char *name) return false; /* silence compiler */ #endif } + +/* + * Retrieve a list of all the injection points currently attached. + * + * This list is palloc'd in the current memory context. + */ +List * +InjectionPointList(void) +{ +#ifdef USE_INJECTION_POINTS + List *inj_points = NIL; + uint32 max_inuse; + + LWLockAcquire(InjectionPointLock, LW_SHARED); + + max_inuse = pg_atomic_read_u32(&ActiveInjectionPoints->max_inuse); + + for (uint32 idx = 0; idx < max_inuse; idx++) + { + InjectionPointEntry *entry; + InjectionPointData *inj_point; + uint64 generation; + + entry = &ActiveInjectionPoints->entries[idx]; + generation = pg_atomic_read_u64(&entry->generation); + + /* skip free slots */ + if (generation % 2 == 0) + continue; + + inj_point = (InjectionPointData *) palloc0(sizeof(InjectionPointData)); + inj_point->name = pstrdup(entry->name); + inj_point->library = pstrdup(entry->library); + inj_point->function = pstrdup(entry->function); + inj_points = lappend(inj_points, inj_point); + } + + LWLockRelease(InjectionPointLock); + + return inj_points; + +#else + elog(ERROR, "Injection points are not supported by this build"); + return NIL; /* keep compiler quiet */ +#endif +} diff --git a/src/backend/utils/mmgr/dsa.c b/src/backend/utils/mmgr/dsa.c index 17d4f7a7a06..be43e9351c3 100644 --- a/src/backend/utils/mmgr/dsa.c +++ b/src/backend/utils/mmgr/dsa.c @@ -532,6 +532,21 @@ dsa_attach(dsa_handle handle) } /* + * Returns whether the area with the given handle was already attached by the + * current process. The area must have been created with dsa_create (not + * dsa_create_in_place). + */ +bool +dsa_is_attached(dsa_handle handle) +{ + /* + * An area handle is really a DSM segment handle for the first segment, so + * we can just search for that. + */ + return dsm_find_mapping(handle) != NULL; +} + +/* * Attach to an area that was created with dsa_create_in_place. The caller * must somehow know the location in memory that was used when the area was * created, though it may be mapped at a different virtual address in this |