diff options
Diffstat (limited to 'src/backend/utils/adt')
27 files changed, 2180 insertions, 1761 deletions
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 4a233b63c32..ffeacf2b819 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -23,6 +23,7 @@ OBJS = \ arrayutils.o \ ascii.o \ bool.o \ + bytea.o \ cash.o \ char.o \ cryptohashfuncs.o \ diff --git a/src/backend/utils/adt/bytea.c b/src/backend/utils/adt/bytea.c new file mode 100644 index 00000000000..2e539c2504e --- /dev/null +++ b/src/backend/utils/adt/bytea.c @@ -0,0 +1,1143 @@ +/*------------------------------------------------------------------------- + * + * bytea.c + * Functions for the bytea type. + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/bytea.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/detoast.h" +#include "catalog/pg_collation_d.h" +#include "catalog/pg_type_d.h" +#include "common/int.h" +#include "fmgr.h" +#include "libpq/pqformat.h" +#include "port/pg_bitutils.h" +#include "utils/builtins.h" +#include "utils/bytea.h" +#include "utils/fmgrprotos.h" +#include "utils/memutils.h" +#include "utils/sortsupport.h" +#include "utils/varlena.h" +#include "varatt.h" + +/* GUC variable */ +int bytea_output = BYTEA_OUTPUT_HEX; + +static bytea *bytea_catenate(bytea *t1, bytea *t2); +static bytea *bytea_substring(Datum str, int S, int L, + bool length_not_specified); +static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl); + +/* + * bytea_catenate + * Guts of byteacat(), broken out so it can be used by other functions + * + * Arguments can be in short-header form, but not compressed or out-of-line + */ +static bytea * +bytea_catenate(bytea *t1, bytea *t2) +{ + bytea *result; + int len1, + len2, + len; + char *ptr; + + len1 = VARSIZE_ANY_EXHDR(t1); + len2 = VARSIZE_ANY_EXHDR(t2); + + /* paranoia ... probably should throw error instead? */ + if (len1 < 0) + len1 = 0; + if (len2 < 0) + len2 = 0; + + len = len1 + len2 + VARHDRSZ; + result = (bytea *) palloc(len); + + /* Set size of result string... */ + SET_VARSIZE(result, len); + + /* Fill data field of result string... */ + ptr = VARDATA(result); + if (len1 > 0) + memcpy(ptr, VARDATA_ANY(t1), len1); + if (len2 > 0) + memcpy(ptr + len1, VARDATA_ANY(t2), len2); + + return result; +} + +#define PG_STR_GET_BYTEA(str_) \ + DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_))) + +static bytea * +bytea_substring(Datum str, + int S, + int L, + bool length_not_specified) +{ + int32 S1; /* adjusted start position */ + int32 L1; /* adjusted substring length */ + int32 E; /* end position */ + + /* + * The logic here should generally match text_substring(). + */ + S1 = Max(S, 1); + + if (length_not_specified) + { + /* + * Not passed a length - DatumGetByteaPSlice() grabs everything to the + * end of the string if we pass it a negative value for length. + */ + L1 = -1; + } + else if (L < 0) + { + /* SQL99 says to throw an error for E < S, i.e., negative length */ + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + L1 = -1; /* silence stupider compilers */ + } + else if (pg_add_s32_overflow(S, L, &E)) + { + /* + * L could be large enough for S + L to overflow, in which case the + * substring must run to end of string. + */ + L1 = -1; + } + else + { + /* + * A zero or negative value for the end position can happen if the + * start was negative or one. SQL99 says to return a zero-length + * string. + */ + if (E < 1) + return PG_STR_GET_BYTEA(""); + + L1 = E - S1; + } + + /* + * If the start position is past the end of the string, SQL99 says to + * return a zero-length string -- DatumGetByteaPSlice() will do that for + * us. We need only convert S1 to zero-based starting position. + */ + return DatumGetByteaPSlice(str, S1 - 1, L1); +} + +static bytea * +bytea_overlay(bytea *t1, bytea *t2, int sp, int sl) +{ + bytea *result; + bytea *s1; + bytea *s2; + int sp_pl_sl; + + /* + * Check for possible integer-overflow cases. For negative sp, throw a + * "substring length" error because that's what should be expected + * according to the spec's definition of OVERLAY(). + */ + if (sp <= 0) + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + if (pg_add_s32_overflow(sp, sl, &sp_pl_sl)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false); + s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true); + result = bytea_catenate(s1, t2); + result = bytea_catenate(result, s2); + + return result; +} + +/***************************************************************************** + * USER I/O ROUTINES * + *****************************************************************************/ + +#define VAL(CH) ((CH) - '0') +#define DIG(VAL) ((VAL) + '0') + +/* + * byteain - converts from printable representation of byte array + * + * Non-printable characters must be passed as '\nnn' (octal) and are + * converted to internal form. '\' must be passed as '\\'. + * ereport(ERROR, ...) if bad form. + * + * BUGS: + * The input is scanned twice. + * The error checking of input is minimal. + */ +Datum +byteain(PG_FUNCTION_ARGS) +{ + char *inputText = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + char *tp; + char *rp; + int bc; + bytea *result; + + /* Recognize hex input */ + if (inputText[0] == '\\' && inputText[1] == 'x') + { + size_t len = strlen(inputText); + + bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */ + result = palloc(bc); + bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result), + escontext); + SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ + + PG_RETURN_BYTEA_P(result); + } + + /* Else, it's the traditional escaped style */ + for (bc = 0, tp = inputText; *tp != '\0'; bc++) + { + if (tp[0] != '\\') + tp++; + else if ((tp[0] == '\\') && + (tp[1] >= '0' && tp[1] <= '3') && + (tp[2] >= '0' && tp[2] <= '7') && + (tp[3] >= '0' && tp[3] <= '7')) + tp += 4; + else if ((tp[0] == '\\') && + (tp[1] == '\\')) + tp += 2; + else + { + /* + * one backslash, not followed by another or ### valid octal + */ + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "bytea"))); + } + } + + bc += VARHDRSZ; + + result = (bytea *) palloc(bc); + SET_VARSIZE(result, bc); + + tp = inputText; + rp = VARDATA(result); + while (*tp != '\0') + { + if (tp[0] != '\\') + *rp++ = *tp++; + else if ((tp[0] == '\\') && + (tp[1] >= '0' && tp[1] <= '3') && + (tp[2] >= '0' && tp[2] <= '7') && + (tp[3] >= '0' && tp[3] <= '7')) + { + bc = VAL(tp[1]); + bc <<= 3; + bc += VAL(tp[2]); + bc <<= 3; + *rp++ = bc + VAL(tp[3]); + + tp += 4; + } + else if ((tp[0] == '\\') && + (tp[1] == '\\')) + { + *rp++ = '\\'; + tp += 2; + } + else + { + /* + * We should never get here. The first pass should not allow it. + */ + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "bytea"))); + } + } + + PG_RETURN_BYTEA_P(result); +} + +/* + * byteaout - converts to printable representation of byte array + * + * In the traditional escaped format, non-printable characters are + * printed as '\nnn' (octal) and '\' as '\\'. + */ +Datum +byteaout(PG_FUNCTION_ARGS) +{ + bytea *vlena = PG_GETARG_BYTEA_PP(0); + char *result; + char *rp; + + if (bytea_output == BYTEA_OUTPUT_HEX) + { + /* Print hex format */ + rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); + *rp++ = '\\'; + *rp++ = 'x'; + rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); + } + else if (bytea_output == BYTEA_OUTPUT_ESCAPE) + { + /* Print traditional escaped format */ + char *vp; + uint64 len; + int i; + + len = 1; /* empty string has 1 char */ + vp = VARDATA_ANY(vlena); + for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) + { + if (*vp == '\\') + len += 2; + else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) + len += 4; + else + len++; + } + + /* + * In principle len can't overflow uint32 if the input fit in 1GB, but + * for safety let's check rather than relying on palloc's internal + * check. + */ + if (len > MaxAllocSize) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg_internal("result of bytea output conversion is too large"))); + rp = result = (char *) palloc(len); + + vp = VARDATA_ANY(vlena); + for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) + { + if (*vp == '\\') + { + *rp++ = '\\'; + *rp++ = '\\'; + } + else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) + { + int val; /* holds unprintable chars */ + + val = *vp; + rp[0] = '\\'; + rp[3] = DIG(val & 07); + val >>= 3; + rp[2] = DIG(val & 07); + val >>= 3; + rp[1] = DIG(val & 03); + rp += 4; + } + else + *rp++ = *vp; + } + } + else + { + elog(ERROR, "unrecognized \"bytea_output\" setting: %d", + bytea_output); + rp = result = NULL; /* keep compiler quiet */ + } + *rp = '\0'; + PG_RETURN_CSTRING(result); +} + +/* + * bytearecv - converts external binary format to bytea + */ +Datum +bytearecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + bytea *result; + int nbytes; + + nbytes = buf->len - buf->cursor; + result = (bytea *) palloc(nbytes + VARHDRSZ); + SET_VARSIZE(result, nbytes + VARHDRSZ); + pq_copymsgbytes(buf, VARDATA(result), nbytes); + PG_RETURN_BYTEA_P(result); +} + +/* + * byteasend - converts bytea to binary format + * + * This is a special case: just copy the input... + */ +Datum +byteasend(PG_FUNCTION_ARGS) +{ + bytea *vlena = PG_GETARG_BYTEA_P_COPY(0); + + PG_RETURN_BYTEA_P(vlena); +} + +Datum +bytea_string_agg_transfn(PG_FUNCTION_ARGS) +{ + StringInfo state; + + state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); + + /* Append the value unless null, preceding it with the delimiter. */ + if (!PG_ARGISNULL(1)) + { + bytea *value = PG_GETARG_BYTEA_PP(1); + bool isfirst = false; + + /* + * You might think we can just throw away the first delimiter, however + * we must keep it as we may be a parallel worker doing partial + * aggregation building a state to send to the main process. We need + * to keep the delimiter of every aggregation so that the combine + * function can properly join up the strings of two separately + * partially aggregated results. The first delimiter is only stripped + * off in the final function. To know how much to strip off the front + * of the string, we store the length of the first delimiter in the + * StringInfo's cursor field, which we don't otherwise need here. + */ + if (state == NULL) + { + MemoryContext aggcontext; + MemoryContext oldcontext; + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "bytea_string_agg_transfn called in non-aggregate context"); + } + + /* + * Create state in aggregate context. It'll stay there across + * subsequent calls. + */ + oldcontext = MemoryContextSwitchTo(aggcontext); + state = makeStringInfo(); + MemoryContextSwitchTo(oldcontext); + + isfirst = true; + } + + if (!PG_ARGISNULL(2)) + { + bytea *delim = PG_GETARG_BYTEA_PP(2); + + appendBinaryStringInfo(state, VARDATA_ANY(delim), + VARSIZE_ANY_EXHDR(delim)); + if (isfirst) + state->cursor = VARSIZE_ANY_EXHDR(delim); + } + + appendBinaryStringInfo(state, VARDATA_ANY(value), + VARSIZE_ANY_EXHDR(value)); + } + + /* + * The transition type for string_agg() is declared to be "internal", + * which is a pass-by-value type the same size as a pointer. + */ + if (state) + PG_RETURN_POINTER(state); + PG_RETURN_NULL(); +} + +Datum +bytea_string_agg_finalfn(PG_FUNCTION_ARGS) +{ + StringInfo state; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); + + if (state != NULL) + { + /* As per comment in transfn, strip data before the cursor position */ + bytea *result; + int strippedlen = state->len - state->cursor; + + result = (bytea *) palloc(strippedlen + VARHDRSZ); + SET_VARSIZE(result, strippedlen + VARHDRSZ); + memcpy(VARDATA(result), &state->data[state->cursor], strippedlen); + PG_RETURN_BYTEA_P(result); + } + else + PG_RETURN_NULL(); +} + +/*------------------------------------------------------------- + * byteaoctetlen + * + * get the number of bytes contained in an instance of type 'bytea' + *------------------------------------------------------------- + */ +Datum +byteaoctetlen(PG_FUNCTION_ARGS) +{ + Datum str = PG_GETARG_DATUM(0); + + /* We need not detoast the input at all */ + PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ); +} + +/* + * byteacat - + * takes two bytea* and returns a bytea* that is the concatenation of + * the two. + * + * Cloned from textcat and modified as required. + */ +Datum +byteacat(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + + PG_RETURN_BYTEA_P(bytea_catenate(t1, t2)); +} + +/* + * byteaoverlay + * Replace specified substring of first string with second + * + * The SQL standard defines OVERLAY() in terms of substring and concatenation. + * This code is a direct implementation of what the standard says. + */ +Datum +byteaoverlay(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int sp = PG_GETARG_INT32(2); /* substring start position */ + int sl = PG_GETARG_INT32(3); /* substring length */ + + PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); +} + +Datum +byteaoverlay_no_len(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int sp = PG_GETARG_INT32(2); /* substring start position */ + int sl; + + sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */ + PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); +} + +/* + * bytea_substr() + * Return a substring starting at the specified position. + * Cloned from text_substr and modified as required. + * + * Input: + * - string + * - starting position (is one-based) + * - string length (optional) + * + * If the starting position is zero or less, then return from the start of the string + * adjusting the length to be consistent with the "negative start" per SQL. + * If the length is less than zero, an ERROR is thrown. If no third argument + * (length) is provided, the length to the end of the string is assumed. + */ +Datum +bytea_substr(PG_FUNCTION_ARGS) +{ + PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), + PG_GETARG_INT32(1), + PG_GETARG_INT32(2), + false)); +} + +/* + * bytea_substr_no_len - + * Wrapper to avoid opr_sanity failure due to + * one function accepting a different number of args. + */ +Datum +bytea_substr_no_len(PG_FUNCTION_ARGS) +{ + PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), + PG_GETARG_INT32(1), + -1, + true)); +} + +/* + * bit_count + */ +Datum +bytea_bit_count(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + + PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1))); +} + +/* + * byteapos - + * Return the position of the specified substring. + * Implements the SQL POSITION() function. + * Cloned from textpos and modified as required. + */ +Datum +byteapos(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int pos; + int px, + p; + int len1, + len2; + char *p1, + *p2; + + len1 = VARSIZE_ANY_EXHDR(t1); + len2 = VARSIZE_ANY_EXHDR(t2); + + if (len2 <= 0) + PG_RETURN_INT32(1); /* result for empty pattern */ + + p1 = VARDATA_ANY(t1); + p2 = VARDATA_ANY(t2); + + pos = 0; + px = (len1 - len2); + for (p = 0; p <= px; p++) + { + if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0)) + { + pos = p + 1; + break; + }; + p1++; + }; + + PG_RETURN_INT32(pos); +} + +/*------------------------------------------------------------- + * byteaGetByte + * + * this routine treats "bytea" as an array of bytes. + * It returns the Nth byte (a number between 0 and 255). + *------------------------------------------------------------- + */ +Datum +byteaGetByte(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int32 n = PG_GETARG_INT32(1); + int len; + int byte; + + len = VARSIZE_ANY_EXHDR(v); + + if (n < 0 || n >= len) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %d out of valid range, 0..%d", + n, len - 1))); + + byte = ((unsigned char *) VARDATA_ANY(v))[n]; + + PG_RETURN_INT32(byte); +} + +/*------------------------------------------------------------- + * byteaGetBit + * + * This routine treats a "bytea" type like an array of bits. + * It returns the value of the Nth bit (0 or 1). + * + *------------------------------------------------------------- + */ +Datum +byteaGetBit(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int64 n = PG_GETARG_INT64(1); + int byteNo, + bitNo; + int len; + int byte; + + len = VARSIZE_ANY_EXHDR(v); + + if (n < 0 || n >= (int64) len * 8) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, + n, (int64) len * 8 - 1))); + + /* n/8 is now known < len, so safe to cast to int */ + byteNo = (int) (n / 8); + bitNo = (int) (n % 8); + + byte = ((unsigned char *) VARDATA_ANY(v))[byteNo]; + + if (byte & (1 << bitNo)) + PG_RETURN_INT32(1); + else + PG_RETURN_INT32(0); +} + +/*------------------------------------------------------------- + * byteaSetByte + * + * Given an instance of type 'bytea' creates a new one with + * the Nth byte set to the given value. + * + *------------------------------------------------------------- + */ +Datum +byteaSetByte(PG_FUNCTION_ARGS) +{ + bytea *res = PG_GETARG_BYTEA_P_COPY(0); + int32 n = PG_GETARG_INT32(1); + int32 newByte = PG_GETARG_INT32(2); + int len; + + len = VARSIZE(res) - VARHDRSZ; + + if (n < 0 || n >= len) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %d out of valid range, 0..%d", + n, len - 1))); + + /* + * Now set the byte. + */ + ((unsigned char *) VARDATA(res))[n] = newByte; + + PG_RETURN_BYTEA_P(res); +} + +/*------------------------------------------------------------- + * byteaSetBit + * + * Given an instance of type 'bytea' creates a new one with + * the Nth bit set to the given value. + * + *------------------------------------------------------------- + */ +Datum +byteaSetBit(PG_FUNCTION_ARGS) +{ + bytea *res = PG_GETARG_BYTEA_P_COPY(0); + int64 n = PG_GETARG_INT64(1); + int32 newBit = PG_GETARG_INT32(2); + int len; + int oldByte, + newByte; + int byteNo, + bitNo; + + len = VARSIZE(res) - VARHDRSZ; + + if (n < 0 || n >= (int64) len * 8) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, + n, (int64) len * 8 - 1))); + + /* n/8 is now known < len, so safe to cast to int */ + byteNo = (int) (n / 8); + bitNo = (int) (n % 8); + + /* + * sanity check! + */ + if (newBit != 0 && newBit != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("new bit must be 0 or 1"))); + + /* + * Update the byte. + */ + oldByte = ((unsigned char *) VARDATA(res))[byteNo]; + + if (newBit == 0) + newByte = oldByte & (~(1 << bitNo)); + else + newByte = oldByte | (1 << bitNo); + + ((unsigned char *) VARDATA(res))[byteNo] = newByte; + + PG_RETURN_BYTEA_P(res); +} + +/* + * Return reversed bytea + */ +Datum +bytea_reverse(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + const char *p = VARDATA_ANY(v); + int len = VARSIZE_ANY_EXHDR(v); + const char *endp = p + len; + bytea *result = palloc(len + VARHDRSZ); + char *dst = (char *) VARDATA(result) + len; + + SET_VARSIZE(result, len + VARHDRSZ); + + while (p < endp) + *(--dst) = *p++; + + PG_RETURN_BYTEA_P(result); +} + + +/***************************************************************************** + * Comparison Functions used for bytea + * + * Note: btree indexes need these routines not to leak memory; therefore, + * be careful to free working copies of toasted datums. Most places don't + * need to be so careful. + *****************************************************************************/ + +Datum +byteaeq(PG_FUNCTION_ARGS) +{ + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + bool result; + Size len1, + len2; + + /* + * We can use a fast path for unequal lengths, which might save us from + * having to detoast one or both values. + */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = false; + else + { + bytea *barg1 = DatumGetByteaPP(arg1); + bytea *barg2 = DatumGetByteaPP(arg2); + + result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), + len1 - VARHDRSZ) == 0); + + PG_FREE_IF_COPY(barg1, 0); + PG_FREE_IF_COPY(barg2, 1); + } + + PG_RETURN_BOOL(result); +} + +Datum +byteane(PG_FUNCTION_ARGS) +{ + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + bool result; + Size len1, + len2; + + /* + * We can use a fast path for unequal lengths, which might save us from + * having to detoast one or both values. + */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = true; + else + { + bytea *barg1 = DatumGetByteaPP(arg1); + bytea *barg2 = DatumGetByteaPP(arg2); + + result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), + len1 - VARHDRSZ) != 0); + + PG_FREE_IF_COPY(barg1, 0); + PG_FREE_IF_COPY(barg2, 1); + } + + PG_RETURN_BOOL(result); +} + +Datum +bytealt(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2))); +} + +Datum +byteale(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2))); +} + +Datum +byteagt(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2))); +} + +Datum +byteage(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2))); +} + +Datum +byteacmp(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + if ((cmp == 0) && (len1 != len2)) + cmp = (len1 < len2) ? -1 : 1; + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(cmp); +} + +Datum +bytea_larger(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + bytea *result; + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2); + + PG_RETURN_BYTEA_P(result); +} + +Datum +bytea_smaller(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + bytea *result; + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2); + + PG_RETURN_BYTEA_P(result); +} + +Datum +bytea_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + /* Use generic string SortSupport, forcing "C" collation */ + varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID); + + MemoryContextSwitchTo(oldcontext); + + PG_RETURN_VOID(); +} + +/* Cast bytea -> int2 */ +Datum +bytea_int2(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint16 result; + + /* Check that the byte array is not too long */ + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range")); + + /* Convert it to an integer; most significant bytes come first */ + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT16(result); +} + +/* Cast bytea -> int4 */ +Datum +bytea_int4(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint32 result; + + /* Check that the byte array is not too long */ + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range")); + + /* Convert it to an integer; most significant bytes come first */ + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT32(result); +} + +/* Cast bytea -> int8 */ +Datum +bytea_int8(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint64 result; + + /* Check that the byte array is not too long */ + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range")); + + /* Convert it to an integer; most significant bytes come first */ + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT64(result); +} + +/* Cast int2 -> bytea; can just use int2send() */ +Datum +int2_bytea(PG_FUNCTION_ARGS) +{ + return int2send(fcinfo); +} + +/* Cast int4 -> bytea; can just use int4send() */ +Datum +int4_bytea(PG_FUNCTION_ARGS) +{ + return int4send(fcinfo); +} + +/* Cast int8 -> bytea; can just use int8send() */ +Datum +int8_bytea(PG_FUNCTION_ARGS) +{ + return int8send(fcinfo); +} diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c index 4227ab1a72b..344f58b92f7 100644 --- a/src/backend/utils/adt/date.c +++ b/src/backend/utils/adt/date.c @@ -1363,10 +1363,35 @@ timestamp_date(PG_FUNCTION_ARGS) { Timestamp timestamp = PG_GETARG_TIMESTAMP(0); DateADT result; + + result = timestamp2date_opt_overflow(timestamp, NULL); + PG_RETURN_DATEADT(result); +} + +/* + * Convert timestamp to date. + * + * On successful conversion, *overflow is set to zero if it's not NULL. + * + * If the timestamp is finite but out of the valid range for date, then: + * if overflow is NULL, we throw an out-of-range error. + * if overflow is not NULL, we store +1 or -1 there to indicate the sign + * of the overflow, and return the appropriate date infinity. + * + * Note: given the ranges of the types, overflow is only possible at + * the minimum end of the range, but we don't assume that in this code. + */ +DateADT +timestamp2date_opt_overflow(Timestamp timestamp, int *overflow) +{ + DateADT result; struct pg_tm tt, *tm = &tt; fsec_t fsec; + if (overflow) + *overflow = 0; + if (TIMESTAMP_IS_NOBEGIN(timestamp)) DATE_NOBEGIN(result); else if (TIMESTAMP_IS_NOEND(timestamp)) @@ -1374,14 +1399,30 @@ timestamp_date(PG_FUNCTION_ARGS) else { if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0) + { + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + DATE_NOBEGIN(result); + } + else + { + *overflow = 1; /* not actually reachable */ + DATE_NOEND(result); + } + return result; + } ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); + } result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE; } - PG_RETURN_DATEADT(result); + return result; } @@ -1408,11 +1449,36 @@ timestamptz_date(PG_FUNCTION_ARGS) { TimestampTz timestamp = PG_GETARG_TIMESTAMP(0); DateADT result; + + result = timestamptz2date_opt_overflow(timestamp, NULL); + PG_RETURN_DATEADT(result); +} + +/* + * Convert timestamptz to date. + * + * On successful conversion, *overflow is set to zero if it's not NULL. + * + * If the timestamptz is finite but out of the valid range for date, then: + * if overflow is NULL, we throw an out-of-range error. + * if overflow is not NULL, we store +1 or -1 there to indicate the sign + * of the overflow, and return the appropriate date infinity. + * + * Note: given the ranges of the types, overflow is only possible at + * the minimum end of the range, but we don't assume that in this code. + */ +DateADT +timestamptz2date_opt_overflow(TimestampTz timestamp, int *overflow) +{ + DateADT result; struct pg_tm tt, *tm = &tt; fsec_t fsec; int tz; + if (overflow) + *overflow = 0; + if (TIMESTAMP_IS_NOBEGIN(timestamp)) DATE_NOBEGIN(result); else if (TIMESTAMP_IS_NOEND(timestamp)) @@ -1420,14 +1486,30 @@ timestamptz_date(PG_FUNCTION_ARGS) else { if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0) + { + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + DATE_NOBEGIN(result); + } + else + { + *overflow = 1; /* not actually reachable */ + DATE_NOEND(result); + } + return result; + } ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); + } result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE; } - PG_RETURN_DATEADT(result); + return result; } diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c index 793d8a9adcc..680fee2a844 100644 --- a/src/backend/utils/adt/datetime.c +++ b/src/backend/utils/adt/datetime.c @@ -702,9 +702,18 @@ ParseFraction(char *cp, double *frac) } else { + /* + * On the other hand, let's reject anything that's not digits after + * the ".". strtod is happy with input like ".123e9", but that'd + * break callers' expectation that the result is in 0..1. (It's quite + * difficult to get here with such input, but not impossible.) + */ + if (strspn(cp + 1, "0123456789") != strlen(cp + 1)) + return DTERR_BAD_FORMAT; + errno = 0; *frac = strtod(cp, &cp); - /* check for parse failure */ + /* check for parse failure (probably redundant given prior check) */ if (*cp != '\0' || errno != 0) return DTERR_BAD_FORMAT; } @@ -2959,30 +2968,27 @@ DecodeNumberField(int len, char *str, int fmask, char *cp; /* + * This function was originally meant to cope only with DTK_NUMBER fields, + * but we now sometimes abuse it to parse (parts of) DTK_DATE fields, + * which can contain letters and other punctuation. Reject if it's not a + * valid DTK_NUMBER, that is digits and decimal point(s). (ParseFraction + * will reject if there's more than one decimal point.) + */ + if (strspn(str, "0123456789.") != len) + return DTERR_BAD_FORMAT; + + /* * Have a decimal point? Then this is a date or something with a seconds * field... */ if ((cp = strchr(str, '.')) != NULL) { - /* - * Can we use ParseFractionalSecond here? Not clear whether trailing - * junk should be rejected ... - */ - if (cp[1] == '\0') - { - /* avoid assuming that strtod will accept "." */ - *fsec = 0; - } - else - { - double frac; + int dterr; - errno = 0; - frac = strtod(cp, NULL); - if (errno != 0) - return DTERR_BAD_FORMAT; - *fsec = rint(frac * 1000000); - } + /* Convert the fraction and store at *fsec */ + dterr = ParseFractionalSecond(cp, fsec); + if (dterr) + return dterr; /* Now truncate off the fraction for further processing */ *cp = '\0'; len = strlen(str); diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c index 6d20ae07ae7..7b97d2be6ca 100644 --- a/src/backend/utils/adt/float.c +++ b/src/backend/utils/adt/float.c @@ -4065,10 +4065,11 @@ float84ge(PG_FUNCTION_ARGS) * in the histogram. width_bucket() returns an integer indicating the * bucket number that 'operand' belongs to in an equiwidth histogram * with the specified characteristics. An operand smaller than the - * lower bound is assigned to bucket 0. An operand greater than the - * upper bound is assigned to an additional bucket (with number - * count+1). We don't allow "NaN" for any of the float8 inputs, and we - * don't allow either of the histogram bounds to be +/- infinity. + * lower bound is assigned to bucket 0. An operand greater than or equal + * to the upper bound is assigned to an additional bucket (with number + * count+1). We don't allow the histogram bounds to be NaN or +/- infinity, + * but we do allow those values for the operand (taking NaN to be larger + * than any other value, as we do in comparisons). */ Datum width_bucket_float8(PG_FUNCTION_ARGS) @@ -4084,12 +4085,11 @@ width_bucket_float8(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), errmsg("count must be greater than zero"))); - if (isnan(operand) || isnan(bound1) || isnan(bound2)) + if (isnan(bound1) || isnan(bound2)) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), - errmsg("operand, lower bound, and upper bound cannot be NaN"))); + errmsg("lower and upper bounds cannot be NaN"))); - /* Note that we allow "operand" to be infinite */ if (isinf(bound1) || isinf(bound2)) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), @@ -4097,15 +4097,15 @@ width_bucket_float8(PG_FUNCTION_ARGS) if (bound1 < bound2) { - if (operand < bound1) - result = 0; - else if (operand >= bound2) + if (isnan(operand) || operand >= bound2) { if (pg_add_s32_overflow(count, 1, &result)) ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("integer out of range"))); } + else if (operand < bound1) + result = 0; else { if (!isinf(bound2 - bound1)) @@ -4135,7 +4135,7 @@ width_bucket_float8(PG_FUNCTION_ARGS) } else if (bound1 > bound2) { - if (operand > bound1) + if (isnan(operand) || operand > bound1) result = 0; else if (operand <= bound2) { diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 5bd1e01f7e4..1d05481181d 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -3590,14 +3590,15 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out, if (matched < 2) ereturn(escontext,, (errcode(ERRCODE_INVALID_DATETIME_FORMAT), - errmsg("invalid input string for \"Y,YYY\""))); + errmsg("invalid value \"%s\" for \"%s\"", + s, "Y,YYY"))); /* years += (millennia * 1000); */ if (pg_mul_s32_overflow(millennia, 1000, &millennia) || pg_add_s32_overflow(years, millennia, &years)) ereturn(escontext,, (errcode(ERRCODE_DATETIME_FIELD_OVERFLOW), - errmsg("value for \"Y,YYY\" in source string is out of range"))); + errmsg("value for \"%s\" in source string is out of range", "Y,YYY"))); if (!from_char_set_int(&out->year, years, n, escontext)) return; diff --git a/src/backend/utils/adt/inet_net_pton.c b/src/backend/utils/adt/inet_net_pton.c index ef2236d9f04..3b0db2a3799 100644 --- a/src/backend/utils/adt/inet_net_pton.c +++ b/src/backend/utils/adt/inet_net_pton.c @@ -115,8 +115,7 @@ inet_cidr_pton_ipv4(const char *src, u_char *dst, size_t size) src++; /* skip x or X. */ while ((ch = *src++) != '\0' && isxdigit((unsigned char) ch)) { - if (isupper((unsigned char) ch)) - ch = tolower((unsigned char) ch); + ch = pg_ascii_tolower((unsigned char) ch); n = strchr(xdigits, ch) - xdigits; assert(n >= 0 && n <= 15); if (dirty == 0) diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 7f4cf614585..4216ac17f43 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -98,7 +98,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale) else if (locale->is_default) return pg_tolower(c); else - return tolower_l(c, locale->info.lt); + return char_tolower(c, locale); } @@ -209,7 +209,17 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) * way. */ - if (pg_database_encoding_max_length() > 1 || (locale->provider == COLLPROVIDER_ICU)) + if (locale->ctype_is_c || + (char_tolower_enabled(locale) && + pg_database_encoding_max_length() == 1)) + { + p = VARDATA_ANY(pat); + plen = VARSIZE_ANY_EXHDR(pat); + s = VARDATA_ANY(str); + slen = VARSIZE_ANY_EXHDR(str); + return SB_IMatchText(s, slen, p, plen, locale); + } + else { pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation, PointerGetDatum(pat))); @@ -224,14 +234,6 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) else return MB_MatchText(s, slen, p, plen, 0); } - else - { - p = VARDATA_ANY(pat); - plen = VARSIZE_ANY_EXHDR(pat); - s = VARDATA_ANY(str); - slen = VARSIZE_ANY_EXHDR(str); - return SB_IMatchText(s, slen, p, plen, locale); - } } /* diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index 8fdc677371f..999f23f86d5 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -1495,13 +1495,8 @@ pattern_char_isalpha(char c, bool is_multibyte, { if (locale->ctype_is_c) return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); - else if (is_multibyte && IS_HIGHBIT_SET(c)) - return true; - else if (locale->provider != COLLPROVIDER_LIBC) - return IS_HIGHBIT_SET(c) || - (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); else - return isalpha_l((unsigned char) c, locale->info.lt); + return char_is_cased(c, locale); } diff --git a/src/backend/utils/adt/mcxtfuncs.c b/src/backend/utils/adt/mcxtfuncs.c index 7ec2c225016..fe6dce9cba3 100644 --- a/src/backend/utils/adt/mcxtfuncs.c +++ b/src/backend/utils/adt/mcxtfuncs.c @@ -15,27 +15,30 @@ #include "postgres.h" -#include "access/twophase.h" -#include "catalog/pg_authid_d.h" #include "funcapi.h" #include "mb/pg_wchar.h" -#include "miscadmin.h" #include "storage/proc.h" #include "storage/procarray.h" -#include "utils/acl.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/hsearch.h" -#include "utils/memutils.h" -#include "utils/wait_event_types.h" /* ---------- * The max bytes for showing identifiers of MemoryContext. * ---------- */ #define MEMORY_CONTEXT_IDENT_DISPLAY_SIZE 1024 -struct MemoryStatsBackendState *memCxtState = NULL; -struct MemoryStatsCtl *memCxtArea = NULL; + +/* + * MemoryContextId + * Used for storage of transient identifiers for + * pg_get_backend_memory_contexts. + */ +typedef struct MemoryContextId +{ + MemoryContext context; + int context_id; +} MemoryContextId; /* * int_list_to_array @@ -86,7 +89,7 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore, */ for (MemoryContext cur = context; cur != NULL; cur = cur->parent) { - MemoryStatsContextId *entry; + MemoryContextId *entry; bool found; entry = hash_search(context_id_lookup, &cur, HASH_FIND, &found); @@ -140,51 +143,36 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore, else nulls[1] = true; - type = ContextTypeToString(context->type); - - values[2] = CStringGetTextDatum(type); - values[3] = Int32GetDatum(list_length(path)); /* level */ - values[4] = int_list_to_array(path); - values[5] = Int64GetDatum(stat.totalspace); - values[6] = Int64GetDatum(stat.nblocks); - values[7] = Int64GetDatum(stat.freespace); - values[8] = Int64GetDatum(stat.freechunks); - values[9] = Int64GetDatum(stat.totalspace - stat.freespace); - - tuplestore_putvalues(tupstore, tupdesc, values, nulls); - list_free(path); -} - -/* - * ContextTypeToString - * Returns a textual representation of a context type - * - * This should cover the same types as MemoryContextIsValid. - */ -const char * -ContextTypeToString(NodeTag type) -{ - const char *context_type; - - switch (type) + switch (context->type) { case T_AllocSetContext: - context_type = "AllocSet"; + type = "AllocSet"; break; case T_GenerationContext: - context_type = "Generation"; + type = "Generation"; break; case T_SlabContext: - context_type = "Slab"; + type = "Slab"; break; case T_BumpContext: - context_type = "Bump"; + type = "Bump"; break; default: - context_type = "???"; + type = "???"; break; } - return context_type; + + values[2] = CStringGetTextDatum(type); + values[3] = Int32GetDatum(list_length(path)); /* level */ + values[4] = int_list_to_array(path); + values[5] = Int64GetDatum(stat.totalspace); + values[6] = Int64GetDatum(stat.nblocks); + values[7] = Int64GetDatum(stat.freespace); + values[8] = Int64GetDatum(stat.freechunks); + values[9] = Int64GetDatum(stat.totalspace - stat.freespace); + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + list_free(path); } /* @@ -201,7 +189,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS) HTAB *context_id_lookup; ctl.keysize = sizeof(MemoryContext); - ctl.entrysize = sizeof(MemoryStatsContextId); + ctl.entrysize = sizeof(MemoryContextId); ctl.hcxt = CurrentMemoryContext; context_id_lookup = hash_create("pg_get_backend_memory_contexts", @@ -228,7 +216,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS) foreach_ptr(MemoryContextData, cur, contexts) { - MemoryStatsContextId *entry; + MemoryContextId *entry; bool found; /* @@ -236,8 +224,8 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS) * PutMemoryContextsStatsTupleStore needs this to populate the "path" * column with the parent context_ids. */ - entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &cur, - HASH_ENTER, &found); + entry = (MemoryContextId *) hash_search(context_id_lookup, &cur, + HASH_ENTER, &found); entry->context_id = context_id++; Assert(!found); @@ -317,349 +305,3 @@ pg_log_backend_memory_contexts(PG_FUNCTION_ARGS) PG_RETURN_BOOL(true); } - -/* - * pg_get_process_memory_contexts - * Signal a backend or an auxiliary process to send its memory contexts, - * wait for the results and display them. - * - * By default, only superusers or users with ROLE_PG_READ_ALL_STATS are allowed - * to signal a process to return the memory contexts. This is because allowing - * any users to issue this request at an unbounded rate would cause lots of - * requests to be sent, which can lead to denial of service. Additional roles - * can be permitted with GRANT. - * - * On receipt of this signal, a backend or an auxiliary process sets the flag - * in the signal handler, which causes the next CHECK_FOR_INTERRUPTS() - * or process-specific interrupt handler to copy the memory context details - * to a dynamic shared memory space. - * - * We have defined a limit on DSA memory that could be allocated per process - - * if the process has more memory contexts than what can fit in the allocated - * size, the excess contexts are summarized and represented as cumulative total - * at the end of the buffer. - * - * After sending the signal, wait on a condition variable. The publishing - * backend, after copying the data to shared memory, sends signal on that - * condition variable. There is one condition variable per publishing backend. - * Once the condition variable is signalled, check if the latest memory context - * information is available and display. - * - * If the publishing backend does not respond before the condition variable - * times out, which is set to MEMSTATS_WAIT_TIMEOUT, retry given that there is - * time left within the timeout specified by the user, before giving up and - * returning previously published statistics, if any. If no previous statistics - * exist, return NULL. - */ -#define MEMSTATS_WAIT_TIMEOUT 100 -Datum -pg_get_process_memory_contexts(PG_FUNCTION_ARGS) -{ - int pid = PG_GETARG_INT32(0); - bool summary = PG_GETARG_BOOL(1); - double timeout = PG_GETARG_FLOAT8(2); - PGPROC *proc; - ProcNumber procNumber = INVALID_PROC_NUMBER; - bool proc_is_aux = false; - ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; - MemoryStatsEntry *memcxt_info; - TimestampTz start_timestamp; - - /* - * See if the process with given pid is a backend or an auxiliary process - * and remember the type for when we requery the process later. - */ - proc = BackendPidGetProc(pid); - if (proc == NULL) - { - proc = AuxiliaryPidGetProc(pid); - proc_is_aux = true; - } - - /* - * BackendPidGetProc() and AuxiliaryPidGetProc() return NULL if the pid - * isn't valid; this is however not a problem and leave with a WARNING. - * See comment in pg_log_backend_memory_contexts for a discussion on this. - */ - if (proc == NULL) - { - /* - * This is just a warning so a loop-through-resultset will not abort - * if one backend terminated on its own during the run. - */ - ereport(WARNING, - errmsg("PID %d is not a PostgreSQL server process", pid)); - PG_RETURN_NULL(); - } - - InitMaterializedSRF(fcinfo, 0); - - procNumber = GetNumberFromPGProc(proc); - - LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE); - memCxtState[procNumber].summary = summary; - LWLockRelease(&memCxtState[procNumber].lw_lock); - - start_timestamp = GetCurrentTimestamp(); - - /* - * Send a signal to a PostgreSQL process, informing it we want it to - * produce information about its memory contexts. - */ - if (SendProcSignal(pid, PROCSIG_GET_MEMORY_CONTEXT, procNumber) < 0) - { - ereport(WARNING, - errmsg("could not send signal to process %d: %m", pid)); - PG_RETURN_NULL(); - } - - /* - * Even if the proc has published statistics, the may not be due to the - * current request, but previously published stats. Check if the stats - * are updated by comparing the timestamp, if the stats are newer than our - * previously recorded timestamp from before sending the procsignal, they - * must by definition be updated. Wait for the timeout specified by the - * user, following which display old statistics if available or return - * NULL. - */ - while (1) - { - long msecs; - - /* - * We expect to come out of sleep when the requested process has - * finished publishing the statistics, verified using the valid DSA - * pointer. - * - * Make sure that the information belongs to pid we requested - * information for, Otherwise loop back and wait for the server - * process to finish publishing statistics. - */ - LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE); - - /* - * Note in procnumber.h file says that a procNumber can be re-used for - * a different backend immediately after a backend exits. In case an - * old process' data was there and not updated by the current process - * in the slot identified by the procNumber, the pid of the requested - * process and the proc_id might not match. - */ - if (memCxtState[procNumber].proc_id == pid) - { - /* - * Break if the latest stats have been read, indicated by - * statistics timestamp being newer than the current request - * timestamp. - */ - msecs = TimestampDifferenceMilliseconds(start_timestamp, - memCxtState[procNumber].stats_timestamp); - - if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer) - && msecs > 0) - break; - } - LWLockRelease(&memCxtState[procNumber].lw_lock); - - /* - * Recheck the state of the backend before sleeping on the condition - * variable to ensure the process is still alive. Only check the - * relevant process type based on the earlier PID check. - */ - if (proc_is_aux) - proc = AuxiliaryPidGetProc(pid); - else - proc = BackendPidGetProc(pid); - - /* - * The process ending during memory context processing is not an - * error. - */ - if (proc == NULL) - { - ereport(WARNING, - errmsg("PID %d is no longer a PostgreSQL server process", - pid)); - PG_RETURN_NULL(); - } - - msecs = TimestampDifferenceMilliseconds(start_timestamp, GetCurrentTimestamp()); - - /* - * If we haven't already exceeded the timeout value, sleep for the - * remainder of the timeout on the condition variable. - */ - if (msecs > 0 && msecs < (timeout * 1000)) - { - /* - * Wait for the timeout as defined by the user. If no updated - * statistics are available within the allowed time then display - * previously published statistics if there are any. If no - * previous statistics are available then return NULL. The timer - * is defined in milliseconds since that's what the condition - * variable sleep uses. - */ - if (ConditionVariableTimedSleep(&memCxtState[procNumber].memcxt_cv, - ((timeout * 1000) - msecs), WAIT_EVENT_MEM_CXT_PUBLISH)) - { - LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE); - /* Displaying previously published statistics if available */ - if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer)) - break; - else - { - LWLockRelease(&memCxtState[procNumber].lw_lock); - PG_RETURN_NULL(); - } - } - } - else - { - LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE); - /* Displaying previously published statistics if available */ - if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer)) - break; - else - { - LWLockRelease(&memCxtState[procNumber].lw_lock); - PG_RETURN_NULL(); - } - } - } - - /* - * We should only reach here with a valid DSA handle, either containing - * updated statistics or previously published statistics (identified by - * the timestamp. - */ - Assert(memCxtArea->memstats_dsa_handle != DSA_HANDLE_INVALID); - /* Attach to the dsa area if we have not already done so */ - if (MemoryStatsDsaArea == NULL) - { - MemoryContext oldcontext = CurrentMemoryContext; - - MemoryContextSwitchTo(TopMemoryContext); - MemoryStatsDsaArea = dsa_attach(memCxtArea->memstats_dsa_handle); - MemoryContextSwitchTo(oldcontext); - dsa_pin_mapping(MemoryStatsDsaArea); - } - - /* - * Backend has finished publishing the stats, project them. - */ - memcxt_info = (MemoryStatsEntry *) - dsa_get_address(MemoryStatsDsaArea, memCxtState[procNumber].memstats_dsa_pointer); - -#define PG_GET_PROCESS_MEMORY_CONTEXTS_COLS 12 - for (int i = 0; i < memCxtState[procNumber].total_stats; i++) - { - ArrayType *path_array; - int path_length; - Datum values[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS]; - bool nulls[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS]; - char *name; - char *ident; - Datum *path_datum = NULL; - int *path_int = NULL; - - memset(values, 0, sizeof(values)); - memset(nulls, 0, sizeof(nulls)); - - if (DsaPointerIsValid(memcxt_info[i].name)) - { - name = (char *) dsa_get_address(MemoryStatsDsaArea, memcxt_info[i].name); - values[0] = CStringGetTextDatum(name); - } - else - nulls[0] = true; - - if (DsaPointerIsValid(memcxt_info[i].ident)) - { - ident = (char *) dsa_get_address(MemoryStatsDsaArea, memcxt_info[i].ident); - values[1] = CStringGetTextDatum(ident); - } - else - nulls[1] = true; - - values[2] = CStringGetTextDatum(ContextTypeToString(memcxt_info[i].type)); - - path_length = memcxt_info[i].path_length; - path_datum = (Datum *) palloc(path_length * sizeof(Datum)); - if (DsaPointerIsValid(memcxt_info[i].path)) - { - path_int = (int *) dsa_get_address(MemoryStatsDsaArea, memcxt_info[i].path); - for (int j = 0; j < path_length; j++) - path_datum[j] = Int32GetDatum(path_int[j]); - path_array = construct_array_builtin(path_datum, path_length, INT4OID); - values[3] = PointerGetDatum(path_array); - } - else - nulls[3] = true; - - values[4] = Int32GetDatum(memcxt_info[i].levels); - values[5] = Int64GetDatum(memcxt_info[i].totalspace); - values[6] = Int64GetDatum(memcxt_info[i].nblocks); - values[7] = Int64GetDatum(memcxt_info[i].freespace); - values[8] = Int64GetDatum(memcxt_info[i].freechunks); - values[9] = Int64GetDatum(memcxt_info[i].totalspace - - memcxt_info[i].freespace); - values[10] = Int32GetDatum(memcxt_info[i].num_agg_stats); - values[11] = TimestampTzGetDatum(memCxtState[procNumber].stats_timestamp); - - tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, - values, nulls); - } - LWLockRelease(&memCxtState[procNumber].lw_lock); - - ConditionVariableCancelSleep(); - - PG_RETURN_NULL(); -} - -Size -MemoryContextReportingShmemSize(void) -{ - Size sz = 0; - Size TotalProcs = 0; - - TotalProcs = add_size(TotalProcs, NUM_AUXILIARY_PROCS); - TotalProcs = add_size(TotalProcs, MaxBackends); - sz = add_size(sz, mul_size(TotalProcs, sizeof(MemoryStatsBackendState))); - - sz = add_size(sz, sizeof(MemoryStatsCtl)); - - return sz; -} - -/* - * Initialize shared memory for displaying memory context statistics - */ -void -MemoryContextReportingShmemInit(void) -{ - bool found; - - memCxtArea = (MemoryStatsCtl *) - ShmemInitStruct("MemoryStatsCtl", - sizeof(MemoryStatsCtl), &found); - - if (!found) - { - LWLockInitialize(&memCxtArea->lw_lock, LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE); - memCxtArea->memstats_dsa_handle = DSA_HANDLE_INVALID; - } - - memCxtState = (MemoryStatsBackendState *) - ShmemInitStruct("MemoryStatsBackendState", - ((MaxBackends + NUM_AUXILIARY_PROCS) * sizeof(MemoryStatsBackendState)), - &found); - - if (found) - return; - - for (int i = 0; i < (MaxBackends + NUM_AUXILIARY_PROCS); i++) - { - ConditionVariableInit(&memCxtState[i].memcxt_cv); - LWLockInitialize(&memCxtState[i].lw_lock, LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC); - memCxtState[i].memstats_dsa_pointer = InvalidDsaPointer; - } -} diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build index 244f48f4fd7..ed9bbd7b926 100644 --- a/src/backend/utils/adt/meson.build +++ b/src/backend/utils/adt/meson.build @@ -12,6 +12,7 @@ backend_sources += files( 'arrayutils.c', 'ascii.c', 'bool.c', + 'bytea.c', 'cash.c', 'char.c', 'cryptohashfuncs.c', diff --git a/src/backend/utils/adt/network.c b/src/backend/utils/adt/network.c index f03fcc1147b..9fd211b2d45 100644 --- a/src/backend/utils/adt/network.c +++ b/src/backend/utils/adt/network.c @@ -12,8 +12,6 @@ #include <netinet/in.h> #include <arpa/inet.h> -#include "access/stratnum.h" -#include "catalog/pg_opfamily.h" #include "catalog/pg_type.h" #include "common/hashfn.h" #include "common/ip.h" diff --git a/src/backend/utils/adt/network_spgist.c b/src/backend/utils/adt/network_spgist.c index a84747d9275..602276a35c3 100644 --- a/src/backend/utils/adt/network_spgist.c +++ b/src/backend/utils/adt/network_spgist.c @@ -37,7 +37,6 @@ #include "catalog/pg_type.h" #include "utils/fmgrprotos.h" #include "utils/inet.h" -#include "varatt.h" static int inet_spg_node_number(const inet *val, int commonbits); diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index 40dcbc7b671..c9233565d57 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -1958,9 +1958,11 @@ generate_series_numeric_support(PG_FUNCTION_ARGS) * in the histogram. width_bucket() returns an integer indicating the * bucket number that 'operand' belongs to in an equiwidth histogram * with the specified characteristics. An operand smaller than the - * lower bound is assigned to bucket 0. An operand greater than the - * upper bound is assigned to an additional bucket (with number - * count+1). We don't allow "NaN" for any of the numeric arguments. + * lower bound is assigned to bucket 0. An operand greater than or equal + * to the upper bound is assigned to an additional bucket (with number + * count+1). We don't allow the histogram bounds to be NaN or +/- infinity, + * but we do allow those values for the operand (taking NaN to be larger + * than any other value, as we do in comparisons). */ Datum width_bucket_numeric(PG_FUNCTION_ARGS) @@ -1978,17 +1980,13 @@ width_bucket_numeric(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), errmsg("count must be greater than zero"))); - if (NUMERIC_IS_SPECIAL(operand) || - NUMERIC_IS_SPECIAL(bound1) || - NUMERIC_IS_SPECIAL(bound2)) + if (NUMERIC_IS_SPECIAL(bound1) || NUMERIC_IS_SPECIAL(bound2)) { - if (NUMERIC_IS_NAN(operand) || - NUMERIC_IS_NAN(bound1) || - NUMERIC_IS_NAN(bound2)) + if (NUMERIC_IS_NAN(bound1) || NUMERIC_IS_NAN(bound2)) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), - errmsg("operand, lower bound, and upper bound cannot be NaN"))); - /* We allow "operand" to be infinite; cmp_numerics will cope */ + errmsg("lower and upper bounds cannot be NaN"))); + if (NUMERIC_IS_INF(bound1) || NUMERIC_IS_INF(bound2)) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index a858f27cadc..97c2ac1faf9 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -41,11 +41,11 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" #include "utils/builtins.h" -#include "utils/formatting.h" #include "utils/guc_hooks.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/pg_locale.h" +#include "utils/relcache.h" #include "utils/syscache.h" #ifdef WIN32 @@ -79,31 +79,6 @@ extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context); extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context); extern char *get_collation_actual_version_libc(const char *collcollate); -extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strfold_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - -extern size_t strlower_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strfold_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - -extern size_t strlower_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - /* GUC settings */ char *locale_messages; char *locale_monetary; @@ -1092,6 +1067,9 @@ create_pg_locale(Oid collid, MemoryContext context) Assert((result->collate_is_c && result->collate == NULL) || (!result->collate_is_c && result->collate != NULL)); + Assert((result->ctype_is_c && result->ctype == NULL) || + (!result->ctype_is_c && result->ctype != NULL)); + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion, &isnull); if (!isnull) @@ -1256,77 +1234,31 @@ size_t pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strlower_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strlower_icu(dst, dstsize, src, srclen, locale); -#endif - else if (locale->provider == COLLPROVIDER_LIBC) - return strlower_libc(dst, dstsize, src, srclen, locale); - else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strlower(dst, dstsize, src, srclen, locale); } size_t pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strtitle_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strtitle_icu(dst, dstsize, src, srclen, locale); -#endif - else if (locale->provider == COLLPROVIDER_LIBC) - return strtitle_libc(dst, dstsize, src, srclen, locale); - else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strtitle(dst, dstsize, src, srclen, locale); } size_t pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strupper_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strupper_icu(dst, dstsize, src, srclen, locale); -#endif - else if (locale->provider == COLLPROVIDER_LIBC) - return strupper_libc(dst, dstsize, src, srclen, locale); - else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strupper(dst, dstsize, src, srclen, locale); } size_t pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strfold_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strfold_icu(dst, dstsize, src, srclen, locale); -#endif - /* for libc, just use strlower */ - else if (locale->provider == COLLPROVIDER_LIBC) - return strlower_libc(dst, dstsize, src, srclen, locale); + if (locale->ctype->strfold) + return locale->ctype->strfold(dst, dstsize, src, srclen, locale); else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strlower(dst, dstsize, src, srclen, locale); } /* @@ -1464,6 +1396,41 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, } /* + * char_is_cased() + * + * Fuzzy test of whether the given char is case-varying or not. The argument + * is a single byte, so in a multibyte encoding, just assume any non-ASCII + * char is case-varying. + */ +bool +char_is_cased(char ch, pg_locale_t locale) +{ + return locale->ctype->char_is_cased(ch, locale); +} + +/* + * char_tolower_enabled() + * + * Does the provider support char_tolower()? + */ +bool +char_tolower_enabled(pg_locale_t locale) +{ + return (locale->ctype->char_tolower != NULL); +} + +/* + * char_tolower() + * + * Convert char (single-byte encoding) to lowercase. + */ +char +char_tolower(unsigned char ch, pg_locale_t locale) +{ + return locale->ctype->char_tolower(ch, locale); +} + +/* * Return required encoding ID for the given locale, or -1 if any encoding is * valid for the locale. */ diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c index f51768830cd..0c9fbdb40f2 100644 --- a/src/backend/utils/adt/pg_locale_builtin.c +++ b/src/backend/utils/adt/pg_locale_builtin.c @@ -18,22 +18,12 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" #include "utils/builtins.h" -#include "utils/memutils.h" #include "utils/pg_locale.h" #include "utils/syscache.h" extern pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context); extern char *get_collation_actual_version_builtin(const char *collcollate); -extern size_t strlower_builtin(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_builtin(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_builtin(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strfold_builtin(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); - struct WordBoundaryState { @@ -77,7 +67,7 @@ initcap_wbnext(void *state) return wbstate->len; } -size_t +static size_t strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -85,7 +75,7 @@ strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, locale->info.builtin.casemap_full); } -size_t +static size_t strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -103,7 +93,7 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, initcap_wbnext, &wbstate); } -size_t +static size_t strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -111,7 +101,7 @@ strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, locale->info.builtin.casemap_full); } -size_t +static size_t strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -119,6 +109,98 @@ strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, locale->info.builtin.casemap_full); } +static bool +wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isdigit(wc, !locale->info.builtin.casemap_full); +} + +static bool +wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isalpha(wc); +} + +static bool +wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isalnum(wc, !locale->info.builtin.casemap_full); +} + +static bool +wc_isupper_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isupper(wc); +} + +static bool +wc_islower_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_islower(wc); +} + +static bool +wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isgraph(wc); +} + +static bool +wc_isprint_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isprint(wc); +} + +static bool +wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_ispunct(wc, !locale->info.builtin.casemap_full); +} + +static bool +wc_isspace_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isspace(wc); +} + +static bool +char_is_cased_builtin(char ch, pg_locale_t locale) +{ + return IS_HIGHBIT_SET(ch) || + (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); +} + +static pg_wchar +wc_toupper_builtin(pg_wchar wc, pg_locale_t locale) +{ + return unicode_uppercase_simple(wc); +} + +static pg_wchar +wc_tolower_builtin(pg_wchar wc, pg_locale_t locale) +{ + return unicode_lowercase_simple(wc); +} + +static const struct ctype_methods ctype_methods_builtin = { + .strlower = strlower_builtin, + .strtitle = strtitle_builtin, + .strupper = strupper_builtin, + .strfold = strfold_builtin, + .wc_isdigit = wc_isdigit_builtin, + .wc_isalpha = wc_isalpha_builtin, + .wc_isalnum = wc_isalnum_builtin, + .wc_isupper = wc_isupper_builtin, + .wc_islower = wc_islower_builtin, + .wc_isgraph = wc_isgraph_builtin, + .wc_isprint = wc_isprint_builtin, + .wc_ispunct = wc_ispunct_builtin, + .wc_isspace = wc_isspace_builtin, + .char_is_cased = char_is_cased_builtin, + .wc_tolower = wc_tolower_builtin, + .wc_toupper = wc_toupper_builtin, +}; + pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context) { @@ -158,10 +240,11 @@ create_pg_locale_builtin(Oid collid, MemoryContext context) result->info.builtin.locale = MemoryContextStrdup(context, locstr); result->info.builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0); - result->provider = COLLPROVIDER_BUILTIN; result->deterministic = true; result->collate_is_c = true; result->ctype_is_c = (strcmp(locstr, "C") == 0); + if (!result->ctype_is_c) + result->ctype = &ctype_methods_builtin; return result; } diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c index a32c32a0744..96741e08269 100644 --- a/src/backend/utils/adt/pg_locale_icu.c +++ b/src/backend/utils/adt/pg_locale_icu.c @@ -48,19 +48,22 @@ #define TEXTBUFLEN 1024 extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context); -extern size_t strlower_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strfold_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); #ifdef USE_ICU extern UCollator *pg_ucol_open(const char *loc_str); +static size_t strlower_icu(char *dest, size_t destsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static size_t strtitle_icu(char *dest, size_t destsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static size_t strupper_icu(char *dest, size_t destsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static size_t strfold_icu(char *dest, size_t destsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static int strncoll_icu(const char *arg1, ssize_t len1, + const char *arg2, ssize_t len2, + pg_locale_t locale); static size_t strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale); @@ -118,6 +121,25 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity, const char *locale, UErrorCode *pErrorCode); +static bool +char_is_cased_icu(char ch, pg_locale_t locale) +{ + return IS_HIGHBIT_SET(ch) || + (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); +} + +static pg_wchar +toupper_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_toupper(wc); +} + +static pg_wchar +tolower_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_tolower(wc); +} + static const struct collate_methods collate_methods_icu = { .strncoll = strncoll_icu, .strnxfrm = strnxfrm_icu, @@ -136,6 +158,78 @@ static const struct collate_methods collate_methods_icu_utf8 = { .strxfrm_is_safe = true, }; +static bool +wc_isdigit_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isdigit(wc); +} + +static bool +wc_isalpha_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isalpha(wc); +} + +static bool +wc_isalnum_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isalnum(wc); +} + +static bool +wc_isupper_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isupper(wc); +} + +static bool +wc_islower_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_islower(wc); +} + +static bool +wc_isgraph_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isgraph(wc); +} + +static bool +wc_isprint_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isprint(wc); +} + +static bool +wc_ispunct_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_ispunct(wc); +} + +static bool +wc_isspace_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isspace(wc); +} + +static const struct ctype_methods ctype_methods_icu = { + .strlower = strlower_icu, + .strtitle = strtitle_icu, + .strupper = strupper_icu, + .strfold = strfold_icu, + .wc_isdigit = wc_isdigit_icu, + .wc_isalpha = wc_isalpha_icu, + .wc_isalnum = wc_isalnum_icu, + .wc_isupper = wc_isupper_icu, + .wc_islower = wc_islower_icu, + .wc_isgraph = wc_isgraph_icu, + .wc_isprint = wc_isprint_icu, + .wc_ispunct = wc_ispunct_icu, + .wc_isspace = wc_isspace_icu, + .char_is_cased = char_is_cased_icu, + .wc_toupper = toupper_icu, + .wc_tolower = tolower_icu, +}; #endif pg_locale_t @@ -198,7 +292,6 @@ create_pg_locale_icu(Oid collid, MemoryContext context) result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); result->info.icu.locale = MemoryContextStrdup(context, iculocstr); result->info.icu.ucol = collator; - result->provider = COLLPROVIDER_ICU; result->deterministic = deterministic; result->collate_is_c = false; result->ctype_is_c = false; @@ -206,6 +299,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context) result->collate = &collate_methods_icu_utf8; else result->collate = &collate_methods_icu; + result->ctype = &ctype_methods_icu; return result; #else @@ -379,7 +473,7 @@ make_icu_collator(const char *iculocstr, const char *icurules) } } -size_t +static size_t strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -399,7 +493,7 @@ strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, return result_len; } -size_t +static size_t strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -419,7 +513,7 @@ strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, return result_len; } -size_t +static size_t strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -439,7 +533,7 @@ strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, return result_len; } -size_t +static size_t strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -474,8 +568,6 @@ strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2 int result; UErrorCode status; - Assert(locale->provider == COLLPROVIDER_ICU); - Assert(GetDatabaseEncoding() == PG_UTF8); status = U_ZERO_ERROR; @@ -503,8 +595,6 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t uchar_bsize; Size result_bsize; - Assert(locale->provider == COLLPROVIDER_ICU); - init_icu_converter(); ulen = uchar_length(icu_converter, src, srclen); @@ -549,8 +639,6 @@ strnxfrm_prefix_icu_utf8(char *dest, size_t destsize, uint32_t state[2]; UErrorCode status; - Assert(locale->provider == COLLPROVIDER_ICU); - Assert(GetDatabaseEncoding() == PG_UTF8); uiter_setUTF8(&iter, src, srclen); @@ -749,8 +837,6 @@ strncoll_icu(const char *arg1, ssize_t len1, *uchar2; int result; - Assert(locale->provider == COLLPROVIDER_ICU); - /* if encoding is UTF8, use more efficient strncoll_icu_utf8 */ #ifdef HAVE_UCOL_STRCOLLUTF8 Assert(GetDatabaseEncoding() != PG_UTF8); @@ -799,8 +885,6 @@ strnxfrm_prefix_icu(char *dest, size_t destsize, size_t uchar_bsize; Size result_bsize; - Assert(locale->provider == COLLPROVIDER_ICU); - /* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */ Assert(GetDatabaseEncoding() != PG_UTF8); diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index 199857e22db..e9f9fc1e369 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -34,6 +34,46 @@ #endif /* + * For the libc provider, to provide as much functionality as possible on a + * variety of platforms without going so far as to implement everything from + * scratch, we use several implementation strategies depending on the + * situation: + * + * 1. In C/POSIX collations, we use hard-wired code. We can't depend on + * the <ctype.h> functions since those will obey LC_CTYPE. Note that these + * collations don't give a fig about multibyte characters. + * + * 2. When working in UTF8 encoding, we use the <wctype.h> functions. + * This assumes that every platform uses Unicode codepoints directly + * as the wchar_t representation of Unicode. (XXX: ICU makes this assumption + * even for non-UTF8 encodings, which may be a problem.) On some platforms + * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF. + * + * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar + * values up to 255, and punt for values above that. This is 100% correct + * only in single-byte encodings such as LATINn. However, non-Unicode + * multibyte encodings are mostly Far Eastern character sets for which the + * properties being tested here aren't very relevant for higher code values + * anyway. The difficulty with using the <wctype.h> functions with + * non-Unicode multibyte encodings is that we can have no certainty that + * the platform's wchar_t representation matches what we do in pg_wchar + * conversions. + * + * As a special case, in the "default" collation, (2) and (3) force ASCII + * letters to follow ASCII upcase/downcase rules, while in a non-default + * collation we just let the library functions do what they will. The case + * where this matters is treatment of I/i in Turkish, and the behavior is + * meant to match the upper()/lower() SQL functions. + * + * We store the active collation setting in static variables. In principle + * it could be passed down to here via the regex library's "struct vars" data + * structure; but that would require somewhat invasive changes in the regex + * library, and right now there's no real benefit to be gained from that. + * + * NB: the coding here assumes pg_wchar is an unsigned type. + */ + +/* * Size of stack buffer to use for string transformations, used to avoid heap * allocations in typical cases. This should be large enough that most strings * will fit, but small enough that we feel comfortable putting it on the @@ -43,13 +83,6 @@ extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context); -extern size_t strlower_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - static int strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale); @@ -85,6 +118,251 @@ static size_t strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale); +static bool +wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isdigit_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isalpha_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isalnum_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isupper_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return islower_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isgraph_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isprint_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return ispunct_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isspace_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswdigit_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswalpha_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswalnum_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswupper_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswlower_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswgraph_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswprint_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswpunct_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswspace_l((wint_t) wc, locale->info.lt); +} + +static char +char_tolower_libc(unsigned char ch, pg_locale_t locale) +{ + Assert(pg_database_encoding_max_length() == 1); + return tolower_l(ch, locale->info.lt); +} + +static bool +char_is_cased_libc(char ch, pg_locale_t locale) +{ + bool is_multibyte = pg_database_encoding_max_length() > 1; + + if (is_multibyte && IS_HIGHBIT_SET(ch)) + return true; + else + return isalpha_l((unsigned char) ch, locale->info.lt); +} + +static pg_wchar +toupper_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() != PG_UTF8); + + /* force C behavior for ASCII characters, per comments above */ + if (locale->is_default && wc <= (pg_wchar) 127) + return pg_ascii_toupper((unsigned char) wc); + if (wc <= (pg_wchar) UCHAR_MAX) + return toupper_l((unsigned char) wc, locale->info.lt); + else + return wc; +} + +static pg_wchar +toupper_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() == PG_UTF8); + + /* force C behavior for ASCII characters, per comments above */ + if (locale->is_default && wc <= (pg_wchar) 127) + return pg_ascii_toupper((unsigned char) wc); + if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF) + return towupper_l((wint_t) wc, locale->info.lt); + else + return wc; +} + +static pg_wchar +tolower_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() != PG_UTF8); + + /* force C behavior for ASCII characters, per comments above */ + if (locale->is_default && wc <= (pg_wchar) 127) + return pg_ascii_tolower((unsigned char) wc); + if (wc <= (pg_wchar) UCHAR_MAX) + return tolower_l((unsigned char) wc, locale->info.lt); + else + return wc; +} + +static pg_wchar +tolower_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() == PG_UTF8); + + /* force C behavior for ASCII characters, per comments above */ + if (locale->is_default && wc <= (pg_wchar) 127) + return pg_ascii_tolower((unsigned char) wc); + if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF) + return towlower_l((wint_t) wc, locale->info.lt); + else + return wc; +} + +static const struct ctype_methods ctype_methods_libc_sb = { + .strlower = strlower_libc_sb, + .strtitle = strtitle_libc_sb, + .strupper = strupper_libc_sb, + .wc_isdigit = wc_isdigit_libc_sb, + .wc_isalpha = wc_isalpha_libc_sb, + .wc_isalnum = wc_isalnum_libc_sb, + .wc_isupper = wc_isupper_libc_sb, + .wc_islower = wc_islower_libc_sb, + .wc_isgraph = wc_isgraph_libc_sb, + .wc_isprint = wc_isprint_libc_sb, + .wc_ispunct = wc_ispunct_libc_sb, + .wc_isspace = wc_isspace_libc_sb, + .char_is_cased = char_is_cased_libc, + .char_tolower = char_tolower_libc, + .wc_toupper = toupper_libc_sb, + .wc_tolower = tolower_libc_sb, + .max_chr = UCHAR_MAX, +}; + +/* + * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but + * single-byte semantics for pattern matching. + */ +static const struct ctype_methods ctype_methods_libc_other_mb = { + .strlower = strlower_libc_mb, + .strtitle = strtitle_libc_mb, + .strupper = strupper_libc_mb, + .wc_isdigit = wc_isdigit_libc_sb, + .wc_isalpha = wc_isalpha_libc_sb, + .wc_isalnum = wc_isalnum_libc_sb, + .wc_isupper = wc_isupper_libc_sb, + .wc_islower = wc_islower_libc_sb, + .wc_isgraph = wc_isgraph_libc_sb, + .wc_isprint = wc_isprint_libc_sb, + .wc_ispunct = wc_ispunct_libc_sb, + .wc_isspace = wc_isspace_libc_sb, + .char_is_cased = char_is_cased_libc, + .char_tolower = char_tolower_libc, + .wc_toupper = toupper_libc_sb, + .wc_tolower = tolower_libc_sb, + .max_chr = UCHAR_MAX, +}; + +static const struct ctype_methods ctype_methods_libc_utf8 = { + .strlower = strlower_libc_mb, + .strtitle = strtitle_libc_mb, + .strupper = strupper_libc_mb, + .wc_isdigit = wc_isdigit_libc_mb, + .wc_isalpha = wc_isalpha_libc_mb, + .wc_isalnum = wc_isalnum_libc_mb, + .wc_isupper = wc_isupper_libc_mb, + .wc_islower = wc_islower_libc_mb, + .wc_isgraph = wc_isgraph_libc_mb, + .wc_isprint = wc_isprint_libc_mb, + .wc_ispunct = wc_ispunct_libc_mb, + .wc_isspace = wc_isspace_libc_mb, + .char_is_cased = char_is_cased_libc, + .char_tolower = char_tolower_libc, + .wc_toupper = toupper_libc_mb, + .wc_tolower = tolower_libc_mb, +}; + static const struct collate_methods collate_methods_libc = { .strncoll = strncoll_libc, .strnxfrm = strnxfrm_libc, @@ -119,36 +397,6 @@ static const struct collate_methods collate_methods_libc_win32_utf8 = { }; #endif -size_t -strlower_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale) -{ - if (pg_database_encoding_max_length() > 1) - return strlower_libc_mb(dst, dstsize, src, srclen, locale); - else - return strlower_libc_sb(dst, dstsize, src, srclen, locale); -} - -size_t -strtitle_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale) -{ - if (pg_database_encoding_max_length() > 1) - return strtitle_libc_mb(dst, dstsize, src, srclen, locale); - else - return strtitle_libc_sb(dst, dstsize, src, srclen, locale); -} - -size_t -strupper_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale) -{ - if (pg_database_encoding_max_length() > 1) - return strupper_libc_mb(dst, dstsize, src, srclen, locale); - else - return strupper_libc_sb(dst, dstsize, src, srclen, locale); -} - static size_t strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) @@ -465,7 +713,6 @@ create_pg_locale_libc(Oid collid, MemoryContext context) loc = make_libc_collator(collate, ctype); result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); - result->provider = COLLPROVIDER_LIBC; result->deterministic = true; result->collate_is_c = (strcmp(collate, "C") == 0) || (strcmp(collate, "POSIX") == 0); @@ -481,6 +728,15 @@ create_pg_locale_libc(Oid collid, MemoryContext context) #endif result->collate = &collate_methods_libc; } + if (!result->ctype_is_c) + { + if (GetDatabaseEncoding() == PG_UTF8) + result->ctype = &ctype_methods_libc_utf8; + else if (pg_database_encoding_max_length() > 1) + result->ctype = &ctype_methods_libc_other_mb; + else + result->ctype = &ctype_methods_libc_sb; + } return result; } @@ -576,8 +832,6 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, const char *arg2n; int result; - Assert(locale->provider == COLLPROVIDER_LIBC); - if (bufsize1 + bufsize2 > TEXTBUFLEN) buf = palloc(bufsize1 + bufsize2); @@ -632,8 +886,6 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t bufsize = srclen + 1; size_t result; - Assert(locale->provider == COLLPROVIDER_LIBC); - if (srclen == -1) return strxfrm_l(dest, src, destsize, locale->info.lt); @@ -742,7 +994,6 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2, int r; int result; - Assert(locale->provider == COLLPROVIDER_LIBC); Assert(GetDatabaseEncoding() == PG_UTF8); if (len1 == -1) diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 97af7c6554f..1c12ddbae49 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -640,10 +640,10 @@ pg_stat_get_activity(PG_FUNCTION_ARGS) values[28] = BoolGetDatum(false); /* GSS credentials not * delegated */ } - if (beentry->st_query_id == 0) + if (beentry->st_query_id == INT64CONST(0)) nulls[30] = true; else - values[30] = UInt64GetDatum(beentry->st_query_id); + values[30] = Int64GetDatum(beentry->st_query_id); } else { @@ -1510,7 +1510,7 @@ pg_stat_io_build_tuples(ReturnSetInfo *rsinfo, bktype_stats->bytes[io_obj][io_context][io_op]; /* Convert to numeric */ - snprintf(buf, sizeof buf, UINT64_FORMAT, byte); + snprintf(buf, sizeof buf, INT64_FORMAT, byte); values[byte_idx] = DirectFunctionCall3(numeric_in, CStringGetDatum(buf), ObjectIdGetDatum(0), diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index edee1f7880b..6e2864cbbda 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -773,8 +773,11 @@ similar_escape_internal(text *pat_text, text *esc_text) int plen, elen; bool afterescape = false; - bool incharclass = false; int nquotes = 0; + int charclass_depth = 0; /* Nesting level of character classes, + * encompassed by square brackets */ + int charclass_start = 0; /* State of the character class start, + * for carets */ p = VARDATA_ANY(pat_text); plen = VARSIZE_ANY_EXHDR(pat_text); @@ -904,7 +907,7 @@ similar_escape_internal(text *pat_text, text *esc_text) /* fast path */ if (afterescape) { - if (pchar == '"' && !incharclass) /* escape-double-quote? */ + if (pchar == '"' && charclass_depth < 1) /* escape-double-quote? */ { /* emit appropriate part separator, per notes above */ if (nquotes == 0) @@ -953,18 +956,41 @@ similar_escape_internal(text *pat_text, text *esc_text) /* SQL escape character; do not send to output */ afterescape = true; } - else if (incharclass) + else if (charclass_depth > 0) { if (pchar == '\\') *r++ = '\\'; *r++ = pchar; - if (pchar == ']') - incharclass = false; + + /* + * Ignore a closing bracket at the start of a character class. + * Such a bracket is taken literally rather than closing the + * class. "charclass_start" is 1 right at the beginning of a + * class and 2 after an initial caret. + */ + if (pchar == ']' && charclass_start > 2) + charclass_depth--; + else if (pchar == '[') + charclass_depth++; + + /* + * If there is a caret right after the opening bracket, it negates + * the character class, but a following closing bracket should + * still be treated as a normal character. That holds only for + * the first caret, so only the values 1 and 2 mean that closing + * brackets should be taken literally. + */ + if (pchar == '^') + charclass_start++; + else + charclass_start = 3; /* definitely past the start */ } else if (pchar == '[') { + /* start of a character class */ *r++ = pchar; - incharclass = true; + charclass_depth++; + charclass_start = 1; } else if (pchar == '%') { diff --git a/src/backend/utils/adt/regproc.c b/src/backend/utils/adt/regproc.c index 5ee608a2b39..b8bbe95e82e 100644 --- a/src/backend/utils/adt/regproc.c +++ b/src/backend/utils/adt/regproc.c @@ -30,6 +30,7 @@ #include "catalog/pg_ts_config.h" #include "catalog/pg_ts_dict.h" #include "catalog/pg_type.h" +#include "commands/dbcommands.h" #include "lib/stringinfo.h" #include "mb/pg_wchar.h" #include "miscadmin.h" @@ -1764,6 +1765,123 @@ regnamespacesend(PG_FUNCTION_ARGS) } /* + * regdatabasein - converts database name to database OID + * + * We also accept a numeric OID, for symmetry with the output routine. + * + * '-' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_database entry. + */ +Datum +regdatabasein(PG_FUNCTION_ARGS) +{ + char *db_name_or_oid = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + Oid result; + List *names; + + /* Handle "-" or numeric OID */ + if (parseDashOrOid(db_name_or_oid, &result, escontext)) + PG_RETURN_OID(result); + + /* The rest of this wouldn't work in bootstrap mode */ + if (IsBootstrapProcessingMode()) + elog(ERROR, "regdatabase values must be OIDs in bootstrap mode"); + + /* Normal case: see if the name matches any pg_database entry. */ + names = stringToQualifiedNameList(db_name_or_oid, escontext); + if (names == NIL) + PG_RETURN_NULL(); + + if (list_length(names) != 1) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_NAME), + errmsg("invalid name syntax"))); + + result = get_database_oid(strVal(linitial(names)), true); + + if (!OidIsValid(result)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("database \"%s\" does not exist", + strVal(linitial(names))))); + + PG_RETURN_OID(result); +} + +/* + * to_regdatabase - converts database name to database OID + * + * If the name is not found, we return NULL. + */ +Datum +to_regdatabase(PG_FUNCTION_ARGS) +{ + char *db_name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + Datum result; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!DirectInputFunctionCallSafe(regdatabasein, db_name, + InvalidOid, -1, + (Node *) &escontext, + &result)) + PG_RETURN_NULL(); + PG_RETURN_DATUM(result); +} + +/* + * regdatabaseout - converts database OID to database name + */ +Datum +regdatabaseout(PG_FUNCTION_ARGS) +{ + Oid dboid = PG_GETARG_OID(0); + char *result; + + if (dboid == InvalidOid) + { + result = pstrdup("-"); + PG_RETURN_CSTRING(result); + } + + result = get_database_name(dboid); + + if (result) + { + /* pstrdup is not really necessary, but it avoids a compiler warning */ + result = pstrdup(quote_identifier(result)); + } + else + { + /* If OID doesn't match any database, return it numerically */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", dboid); + } + + PG_RETURN_CSTRING(result); +} + +/* + * regdatabaserecv - converts external binary format to regdatabase + */ +Datum +regdatabaserecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regdatabasesend - converts regdatabase to binary format + */ +Datum +regdatabasesend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + +/* * text_regclass: convert text to regclass * * This could be replaced by CoerceViaIO, except that we need to treat diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c index 6239900fa28..059fc5ebf60 100644 --- a/src/backend/utils/adt/ri_triggers.c +++ b/src/backend/utils/adt/ri_triggers.c @@ -30,7 +30,6 @@ #include "access/xact.h" #include "catalog/pg_collation.h" #include "catalog/pg_constraint.h" -#include "catalog/pg_proc.h" #include "commands/trigger.h" #include "executor/executor.h" #include "executor/spi.h" @@ -46,7 +45,6 @@ #include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/memutils.h" -#include "utils/rangetypes.h" #include "utils/rel.h" #include "utils/rls.h" #include "utils/ruleutils.h" diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 467b08198b8..3d6e6bdbfd2 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -5956,9 +5956,19 @@ get_select_query_def(Query *query, deparse_context *context) { if (query->limitOption == LIMIT_OPTION_WITH_TIES) { + /* + * The limitCount arg is a c_expr, so it needs parens. Simple + * literals and function expressions would not need parens, but + * unfortunately it's hard to tell if the expression will be + * printed as a simple literal like 123 or as a typecast + * expression, like '-123'::int4. The grammar accepts the former + * without quoting, but not the latter. + */ appendContextKeyword(context, " FETCH FIRST ", -PRETTYINDENT_STD, PRETTYINDENT_STD, 0); + appendStringInfoChar(buf, '('); get_rule_expr(query->limitCount, context, false); + appendStringInfoChar(buf, ')'); appendStringInfoString(buf, " ROWS WITH TIES"); } else diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index a96b1b9c0bc..ce6a626eba2 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -103,7 +103,6 @@ #include "access/table.h" #include "access/tableam.h" #include "access/visibilitymap.h" -#include "catalog/pg_am.h" #include "catalog/pg_collation.h" #include "catalog/pg_operator.h" #include "catalog/pg_statistic.h" @@ -4620,6 +4619,7 @@ convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue, case REGDICTIONARYOID: case REGROLEOID: case REGNAMESPACEOID: + case REGDATABASEOID: *scaledvalue = convert_numeric_to_scalar(value, valuetypid, &failure); *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid, @@ -4752,6 +4752,7 @@ convert_numeric_to_scalar(Datum value, Oid typid, bool *failure) case REGDICTIONARYOID: case REGROLEOID: case REGNAMESPACEOID: + case REGDATABASEOID: /* we can treat OIDs as integers... */ return (double) DatumGetObjectId(value); } diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 347089b7626..0a5848a4ab2 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -6477,7 +6477,7 @@ timestamp2timestamptz_opt_overflow(Timestamp timestamp, int *overflow) if (TIMESTAMP_NOT_FINITE(timestamp)) return timestamp; - /* We don't expect this to fail, but check it pro forma */ + /* timestamp2tm should not fail on valid timestamps, but cope */ if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) == 0) { tz = DetermineTimeZoneOffset(tm, session_timezone); @@ -6485,23 +6485,22 @@ timestamp2timestamptz_opt_overflow(Timestamp timestamp, int *overflow) result = dt2local(timestamp, -tz); if (IS_VALID_TIMESTAMP(result)) - { return result; + } + + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + TIMESTAMP_NOBEGIN(result); } - else if (overflow) + else { - if (result < MIN_TIMESTAMP) - { - *overflow = -1; - TIMESTAMP_NOBEGIN(result); - } - else - { - *overflow = 1; - TIMESTAMP_NOEND(result); - } - return result; + *overflow = 1; + TIMESTAMP_NOEND(result); } + return result; } ereport(ERROR, @@ -6531,27 +6530,81 @@ timestamptz_timestamp(PG_FUNCTION_ARGS) PG_RETURN_TIMESTAMP(timestamptz2timestamp(timestamp)); } +/* + * Convert timestamptz to timestamp, throwing error for overflow. + */ static Timestamp timestamptz2timestamp(TimestampTz timestamp) { + return timestamptz2timestamp_opt_overflow(timestamp, NULL); +} + +/* + * Convert timestamp with time zone to timestamp. + * + * On successful conversion, *overflow is set to zero if it's not NULL. + * + * If the timestamptz is finite but out of the valid range for timestamp, then: + * if overflow is NULL, we throw an out-of-range error. + * if overflow is not NULL, we store +1 or -1 there to indicate the sign + * of the overflow, and return the appropriate timestamp infinity. + */ +Timestamp +timestamptz2timestamp_opt_overflow(TimestampTz timestamp, int *overflow) +{ Timestamp result; struct pg_tm tt, *tm = &tt; fsec_t fsec; int tz; + if (overflow) + *overflow = 0; + if (TIMESTAMP_NOT_FINITE(timestamp)) result = timestamp; else { if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0) + { + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + TIMESTAMP_NOBEGIN(result); + } + else + { + *overflow = 1; + TIMESTAMP_NOEND(result); + } + return result; + } ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); + } if (tm2timestamp(tm, fsec, NULL, &result) != 0) + { + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + TIMESTAMP_NOBEGIN(result); + } + else + { + *overflow = 1; + TIMESTAMP_NOEND(result); + } + return result; + } ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); + } } return result; } diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 3e4d5568bde..ffae8c23abf 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -35,7 +35,6 @@ #include "port/pg_bswap.h" #include "regex/regex.h" #include "utils/builtins.h" -#include "utils/bytea.h" #include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/memutils.h" @@ -43,10 +42,6 @@ #include "utils/sortsupport.h" #include "utils/varlena.h" - -/* GUC variable */ -int bytea_output = BYTEA_OUTPUT_HEX; - typedef struct varlena VarString; /* @@ -148,12 +143,6 @@ static int text_position_get_match_pos(TextPositionState *state); static void text_position_cleanup(TextPositionState *state); static void check_collation_set(Oid collid); static int text_cmp(text *arg1, text *arg2, Oid collid); -static bytea *bytea_catenate(bytea *t1, bytea *t2); -static bytea *bytea_substring(Datum str, - int S, - int L, - bool length_not_specified); -static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl); static void appendStringInfoText(StringInfo str, const text *t); static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate); static void split_text_accum_result(SplitTextOutputData *tstate, @@ -279,307 +268,6 @@ text_to_cstring_buffer(const text *src, char *dst, size_t dst_len) * USER I/O ROUTINES * *****************************************************************************/ - -#define VAL(CH) ((CH) - '0') -#define DIG(VAL) ((VAL) + '0') - -/* - * byteain - converts from printable representation of byte array - * - * Non-printable characters must be passed as '\nnn' (octal) and are - * converted to internal form. '\' must be passed as '\\'. - * ereport(ERROR, ...) if bad form. - * - * BUGS: - * The input is scanned twice. - * The error checking of input is minimal. - */ -Datum -byteain(PG_FUNCTION_ARGS) -{ - char *inputText = PG_GETARG_CSTRING(0); - Node *escontext = fcinfo->context; - char *tp; - char *rp; - int bc; - bytea *result; - - /* Recognize hex input */ - if (inputText[0] == '\\' && inputText[1] == 'x') - { - size_t len = strlen(inputText); - - bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */ - result = palloc(bc); - bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result), - escontext); - SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ - - PG_RETURN_BYTEA_P(result); - } - - /* Else, it's the traditional escaped style */ - for (bc = 0, tp = inputText; *tp != '\0'; bc++) - { - if (tp[0] != '\\') - tp++; - else if ((tp[0] == '\\') && - (tp[1] >= '0' && tp[1] <= '3') && - (tp[2] >= '0' && tp[2] <= '7') && - (tp[3] >= '0' && tp[3] <= '7')) - tp += 4; - else if ((tp[0] == '\\') && - (tp[1] == '\\')) - tp += 2; - else - { - /* - * one backslash, not followed by another or ### valid octal - */ - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type %s", "bytea"))); - } - } - - bc += VARHDRSZ; - - result = (bytea *) palloc(bc); - SET_VARSIZE(result, bc); - - tp = inputText; - rp = VARDATA(result); - while (*tp != '\0') - { - if (tp[0] != '\\') - *rp++ = *tp++; - else if ((tp[0] == '\\') && - (tp[1] >= '0' && tp[1] <= '3') && - (tp[2] >= '0' && tp[2] <= '7') && - (tp[3] >= '0' && tp[3] <= '7')) - { - bc = VAL(tp[1]); - bc <<= 3; - bc += VAL(tp[2]); - bc <<= 3; - *rp++ = bc + VAL(tp[3]); - - tp += 4; - } - else if ((tp[0] == '\\') && - (tp[1] == '\\')) - { - *rp++ = '\\'; - tp += 2; - } - else - { - /* - * We should never get here. The first pass should not allow it. - */ - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type %s", "bytea"))); - } - } - - PG_RETURN_BYTEA_P(result); -} - -/* - * byteaout - converts to printable representation of byte array - * - * In the traditional escaped format, non-printable characters are - * printed as '\nnn' (octal) and '\' as '\\'. - */ -Datum -byteaout(PG_FUNCTION_ARGS) -{ - bytea *vlena = PG_GETARG_BYTEA_PP(0); - char *result; - char *rp; - - if (bytea_output == BYTEA_OUTPUT_HEX) - { - /* Print hex format */ - rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); - *rp++ = '\\'; - *rp++ = 'x'; - rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); - } - else if (bytea_output == BYTEA_OUTPUT_ESCAPE) - { - /* Print traditional escaped format */ - char *vp; - uint64 len; - int i; - - len = 1; /* empty string has 1 char */ - vp = VARDATA_ANY(vlena); - for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) - { - if (*vp == '\\') - len += 2; - else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) - len += 4; - else - len++; - } - - /* - * In principle len can't overflow uint32 if the input fit in 1GB, but - * for safety let's check rather than relying on palloc's internal - * check. - */ - if (len > MaxAllocSize) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg_internal("result of bytea output conversion is too large"))); - rp = result = (char *) palloc(len); - - vp = VARDATA_ANY(vlena); - for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) - { - if (*vp == '\\') - { - *rp++ = '\\'; - *rp++ = '\\'; - } - else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) - { - int val; /* holds unprintable chars */ - - val = *vp; - rp[0] = '\\'; - rp[3] = DIG(val & 07); - val >>= 3; - rp[2] = DIG(val & 07); - val >>= 3; - rp[1] = DIG(val & 03); - rp += 4; - } - else - *rp++ = *vp; - } - } - else - { - elog(ERROR, "unrecognized \"bytea_output\" setting: %d", - bytea_output); - rp = result = NULL; /* keep compiler quiet */ - } - *rp = '\0'; - PG_RETURN_CSTRING(result); -} - -/* - * bytearecv - converts external binary format to bytea - */ -Datum -bytearecv(PG_FUNCTION_ARGS) -{ - StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); - bytea *result; - int nbytes; - - nbytes = buf->len - buf->cursor; - result = (bytea *) palloc(nbytes + VARHDRSZ); - SET_VARSIZE(result, nbytes + VARHDRSZ); - pq_copymsgbytes(buf, VARDATA(result), nbytes); - PG_RETURN_BYTEA_P(result); -} - -/* - * byteasend - converts bytea to binary format - * - * This is a special case: just copy the input... - */ -Datum -byteasend(PG_FUNCTION_ARGS) -{ - bytea *vlena = PG_GETARG_BYTEA_P_COPY(0); - - PG_RETURN_BYTEA_P(vlena); -} - -Datum -bytea_string_agg_transfn(PG_FUNCTION_ARGS) -{ - StringInfo state; - - state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); - - /* Append the value unless null, preceding it with the delimiter. */ - if (!PG_ARGISNULL(1)) - { - bytea *value = PG_GETARG_BYTEA_PP(1); - bool isfirst = false; - - /* - * You might think we can just throw away the first delimiter, however - * we must keep it as we may be a parallel worker doing partial - * aggregation building a state to send to the main process. We need - * to keep the delimiter of every aggregation so that the combine - * function can properly join up the strings of two separately - * partially aggregated results. The first delimiter is only stripped - * off in the final function. To know how much to strip off the front - * of the string, we store the length of the first delimiter in the - * StringInfo's cursor field, which we don't otherwise need here. - */ - if (state == NULL) - { - state = makeStringAggState(fcinfo); - isfirst = true; - } - - if (!PG_ARGISNULL(2)) - { - bytea *delim = PG_GETARG_BYTEA_PP(2); - - appendBinaryStringInfo(state, VARDATA_ANY(delim), - VARSIZE_ANY_EXHDR(delim)); - if (isfirst) - state->cursor = VARSIZE_ANY_EXHDR(delim); - } - - appendBinaryStringInfo(state, VARDATA_ANY(value), - VARSIZE_ANY_EXHDR(value)); - } - - /* - * The transition type for string_agg() is declared to be "internal", - * which is a pass-by-value type the same size as a pointer. - */ - if (state) - PG_RETURN_POINTER(state); - PG_RETURN_NULL(); -} - -Datum -bytea_string_agg_finalfn(PG_FUNCTION_ARGS) -{ - StringInfo state; - - /* cannot be called directly because of internal-type argument */ - Assert(AggCheckCallContext(fcinfo, NULL)); - - state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); - - if (state != NULL) - { - /* As per comment in transfn, strip data before the cursor position */ - bytea *result; - int strippedlen = state->len - state->cursor; - - result = (bytea *) palloc(strippedlen + VARHDRSZ); - SET_VARSIZE(result, strippedlen + VARHDRSZ); - memcpy(VARDATA(result), &state->data[state->cursor], strippedlen); - PG_RETURN_BYTEA_P(result); - } - else - PG_RETURN_NULL(); -} - /* * textin - converts cstring to internal representation */ @@ -2959,467 +2647,6 @@ bttext_pattern_sortsupport(PG_FUNCTION_ARGS) } -/*------------------------------------------------------------- - * byteaoctetlen - * - * get the number of bytes contained in an instance of type 'bytea' - *------------------------------------------------------------- - */ -Datum -byteaoctetlen(PG_FUNCTION_ARGS) -{ - Datum str = PG_GETARG_DATUM(0); - - /* We need not detoast the input at all */ - PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ); -} - -/* - * byteacat - - * takes two bytea* and returns a bytea* that is the concatenation of - * the two. - * - * Cloned from textcat and modified as required. - */ -Datum -byteacat(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - - PG_RETURN_BYTEA_P(bytea_catenate(t1, t2)); -} - -/* - * bytea_catenate - * Guts of byteacat(), broken out so it can be used by other functions - * - * Arguments can be in short-header form, but not compressed or out-of-line - */ -static bytea * -bytea_catenate(bytea *t1, bytea *t2) -{ - bytea *result; - int len1, - len2, - len; - char *ptr; - - len1 = VARSIZE_ANY_EXHDR(t1); - len2 = VARSIZE_ANY_EXHDR(t2); - - /* paranoia ... probably should throw error instead? */ - if (len1 < 0) - len1 = 0; - if (len2 < 0) - len2 = 0; - - len = len1 + len2 + VARHDRSZ; - result = (bytea *) palloc(len); - - /* Set size of result string... */ - SET_VARSIZE(result, len); - - /* Fill data field of result string... */ - ptr = VARDATA(result); - if (len1 > 0) - memcpy(ptr, VARDATA_ANY(t1), len1); - if (len2 > 0) - memcpy(ptr + len1, VARDATA_ANY(t2), len2); - - return result; -} - -#define PG_STR_GET_BYTEA(str_) \ - DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_))) - -/* - * bytea_substr() - * Return a substring starting at the specified position. - * Cloned from text_substr and modified as required. - * - * Input: - * - string - * - starting position (is one-based) - * - string length (optional) - * - * If the starting position is zero or less, then return from the start of the string - * adjusting the length to be consistent with the "negative start" per SQL. - * If the length is less than zero, an ERROR is thrown. If no third argument - * (length) is provided, the length to the end of the string is assumed. - */ -Datum -bytea_substr(PG_FUNCTION_ARGS) -{ - PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), - PG_GETARG_INT32(1), - PG_GETARG_INT32(2), - false)); -} - -/* - * bytea_substr_no_len - - * Wrapper to avoid opr_sanity failure due to - * one function accepting a different number of args. - */ -Datum -bytea_substr_no_len(PG_FUNCTION_ARGS) -{ - PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), - PG_GETARG_INT32(1), - -1, - true)); -} - -static bytea * -bytea_substring(Datum str, - int S, - int L, - bool length_not_specified) -{ - int32 S1; /* adjusted start position */ - int32 L1; /* adjusted substring length */ - int32 E; /* end position */ - - /* - * The logic here should generally match text_substring(). - */ - S1 = Max(S, 1); - - if (length_not_specified) - { - /* - * Not passed a length - DatumGetByteaPSlice() grabs everything to the - * end of the string if we pass it a negative value for length. - */ - L1 = -1; - } - else if (L < 0) - { - /* SQL99 says to throw an error for E < S, i.e., negative length */ - ereport(ERROR, - (errcode(ERRCODE_SUBSTRING_ERROR), - errmsg("negative substring length not allowed"))); - L1 = -1; /* silence stupider compilers */ - } - else if (pg_add_s32_overflow(S, L, &E)) - { - /* - * L could be large enough for S + L to overflow, in which case the - * substring must run to end of string. - */ - L1 = -1; - } - else - { - /* - * A zero or negative value for the end position can happen if the - * start was negative or one. SQL99 says to return a zero-length - * string. - */ - if (E < 1) - return PG_STR_GET_BYTEA(""); - - L1 = E - S1; - } - - /* - * If the start position is past the end of the string, SQL99 says to - * return a zero-length string -- DatumGetByteaPSlice() will do that for - * us. We need only convert S1 to zero-based starting position. - */ - return DatumGetByteaPSlice(str, S1 - 1, L1); -} - -/* - * byteaoverlay - * Replace specified substring of first string with second - * - * The SQL standard defines OVERLAY() in terms of substring and concatenation. - * This code is a direct implementation of what the standard says. - */ -Datum -byteaoverlay(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - int sp = PG_GETARG_INT32(2); /* substring start position */ - int sl = PG_GETARG_INT32(3); /* substring length */ - - PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); -} - -Datum -byteaoverlay_no_len(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - int sp = PG_GETARG_INT32(2); /* substring start position */ - int sl; - - sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */ - PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); -} - -static bytea * -bytea_overlay(bytea *t1, bytea *t2, int sp, int sl) -{ - bytea *result; - bytea *s1; - bytea *s2; - int sp_pl_sl; - - /* - * Check for possible integer-overflow cases. For negative sp, throw a - * "substring length" error because that's what should be expected - * according to the spec's definition of OVERLAY(). - */ - if (sp <= 0) - ereport(ERROR, - (errcode(ERRCODE_SUBSTRING_ERROR), - errmsg("negative substring length not allowed"))); - if (pg_add_s32_overflow(sp, sl, &sp_pl_sl)) - ereport(ERROR, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("integer out of range"))); - - s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false); - s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true); - result = bytea_catenate(s1, t2); - result = bytea_catenate(result, s2); - - return result; -} - -/* - * bit_count - */ -Datum -bytea_bit_count(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - - PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1))); -} - -/* - * byteapos - - * Return the position of the specified substring. - * Implements the SQL POSITION() function. - * Cloned from textpos and modified as required. - */ -Datum -byteapos(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - int pos; - int px, - p; - int len1, - len2; - char *p1, - *p2; - - len1 = VARSIZE_ANY_EXHDR(t1); - len2 = VARSIZE_ANY_EXHDR(t2); - - if (len2 <= 0) - PG_RETURN_INT32(1); /* result for empty pattern */ - - p1 = VARDATA_ANY(t1); - p2 = VARDATA_ANY(t2); - - pos = 0; - px = (len1 - len2); - for (p = 0; p <= px; p++) - { - if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0)) - { - pos = p + 1; - break; - }; - p1++; - }; - - PG_RETURN_INT32(pos); -} - -/*------------------------------------------------------------- - * byteaGetByte - * - * this routine treats "bytea" as an array of bytes. - * It returns the Nth byte (a number between 0 and 255). - *------------------------------------------------------------- - */ -Datum -byteaGetByte(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int32 n = PG_GETARG_INT32(1); - int len; - int byte; - - len = VARSIZE_ANY_EXHDR(v); - - if (n < 0 || n >= len) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %d out of valid range, 0..%d", - n, len - 1))); - - byte = ((unsigned char *) VARDATA_ANY(v))[n]; - - PG_RETURN_INT32(byte); -} - -/*------------------------------------------------------------- - * byteaGetBit - * - * This routine treats a "bytea" type like an array of bits. - * It returns the value of the Nth bit (0 or 1). - * - *------------------------------------------------------------- - */ -Datum -byteaGetBit(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int64 n = PG_GETARG_INT64(1); - int byteNo, - bitNo; - int len; - int byte; - - len = VARSIZE_ANY_EXHDR(v); - - if (n < 0 || n >= (int64) len * 8) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, - n, (int64) len * 8 - 1))); - - /* n/8 is now known < len, so safe to cast to int */ - byteNo = (int) (n / 8); - bitNo = (int) (n % 8); - - byte = ((unsigned char *) VARDATA_ANY(v))[byteNo]; - - if (byte & (1 << bitNo)) - PG_RETURN_INT32(1); - else - PG_RETURN_INT32(0); -} - -/*------------------------------------------------------------- - * byteaSetByte - * - * Given an instance of type 'bytea' creates a new one with - * the Nth byte set to the given value. - * - *------------------------------------------------------------- - */ -Datum -byteaSetByte(PG_FUNCTION_ARGS) -{ - bytea *res = PG_GETARG_BYTEA_P_COPY(0); - int32 n = PG_GETARG_INT32(1); - int32 newByte = PG_GETARG_INT32(2); - int len; - - len = VARSIZE(res) - VARHDRSZ; - - if (n < 0 || n >= len) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %d out of valid range, 0..%d", - n, len - 1))); - - /* - * Now set the byte. - */ - ((unsigned char *) VARDATA(res))[n] = newByte; - - PG_RETURN_BYTEA_P(res); -} - -/*------------------------------------------------------------- - * byteaSetBit - * - * Given an instance of type 'bytea' creates a new one with - * the Nth bit set to the given value. - * - *------------------------------------------------------------- - */ -Datum -byteaSetBit(PG_FUNCTION_ARGS) -{ - bytea *res = PG_GETARG_BYTEA_P_COPY(0); - int64 n = PG_GETARG_INT64(1); - int32 newBit = PG_GETARG_INT32(2); - int len; - int oldByte, - newByte; - int byteNo, - bitNo; - - len = VARSIZE(res) - VARHDRSZ; - - if (n < 0 || n >= (int64) len * 8) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, - n, (int64) len * 8 - 1))); - - /* n/8 is now known < len, so safe to cast to int */ - byteNo = (int) (n / 8); - bitNo = (int) (n % 8); - - /* - * sanity check! - */ - if (newBit != 0 && newBit != 1) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("new bit must be 0 or 1"))); - - /* - * Update the byte. - */ - oldByte = ((unsigned char *) VARDATA(res))[byteNo]; - - if (newBit == 0) - newByte = oldByte & (~(1 << bitNo)); - else - newByte = oldByte | (1 << bitNo); - - ((unsigned char *) VARDATA(res))[byteNo] = newByte; - - PG_RETURN_BYTEA_P(res); -} - -/* - * Return reversed bytea - */ -Datum -bytea_reverse(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - const char *p = VARDATA_ANY(v); - int len = VARSIZE_ANY_EXHDR(v); - const char *endp = p + len; - bytea *result = palloc(len + VARHDRSZ); - char *dst = (char *) VARDATA(result) + len; - - SET_VARSIZE(result, len + VARHDRSZ); - - while (p < endp) - *(--dst) = *p++; - - PG_RETURN_BYTEA_P(result); -} - - /* text_name() * Converts a text type to a Name type. */ @@ -3849,331 +3076,6 @@ SplitGUCList(char *rawstring, char separator, return true; } - -/***************************************************************************** - * Comparison Functions used for bytea - * - * Note: btree indexes need these routines not to leak memory; therefore, - * be careful to free working copies of toasted datums. Most places don't - * need to be so careful. - *****************************************************************************/ - -Datum -byteaeq(PG_FUNCTION_ARGS) -{ - Datum arg1 = PG_GETARG_DATUM(0); - Datum arg2 = PG_GETARG_DATUM(1); - bool result; - Size len1, - len2; - - /* - * We can use a fast path for unequal lengths, which might save us from - * having to detoast one or both values. - */ - len1 = toast_raw_datum_size(arg1); - len2 = toast_raw_datum_size(arg2); - if (len1 != len2) - result = false; - else - { - bytea *barg1 = DatumGetByteaPP(arg1); - bytea *barg2 = DatumGetByteaPP(arg2); - - result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), - len1 - VARHDRSZ) == 0); - - PG_FREE_IF_COPY(barg1, 0); - PG_FREE_IF_COPY(barg2, 1); - } - - PG_RETURN_BOOL(result); -} - -Datum -byteane(PG_FUNCTION_ARGS) -{ - Datum arg1 = PG_GETARG_DATUM(0); - Datum arg2 = PG_GETARG_DATUM(1); - bool result; - Size len1, - len2; - - /* - * We can use a fast path for unequal lengths, which might save us from - * having to detoast one or both values. - */ - len1 = toast_raw_datum_size(arg1); - len2 = toast_raw_datum_size(arg2); - if (len1 != len2) - result = true; - else - { - bytea *barg1 = DatumGetByteaPP(arg1); - bytea *barg2 = DatumGetByteaPP(arg2); - - result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), - len1 - VARHDRSZ) != 0); - - PG_FREE_IF_COPY(barg1, 0); - PG_FREE_IF_COPY(barg2, 1); - } - - PG_RETURN_BOOL(result); -} - -Datum -bytealt(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2))); -} - -Datum -byteale(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2))); -} - -Datum -byteagt(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2))); -} - -Datum -byteage(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2))); -} - -Datum -byteacmp(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - if ((cmp == 0) && (len1 != len2)) - cmp = (len1 < len2) ? -1 : 1; - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_INT32(cmp); -} - -Datum -bytea_larger(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - bytea *result; - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2); - - PG_RETURN_BYTEA_P(result); -} - -Datum -bytea_smaller(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - bytea *result; - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2); - - PG_RETURN_BYTEA_P(result); -} - -Datum -bytea_sortsupport(PG_FUNCTION_ARGS) -{ - SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); - MemoryContext oldcontext; - - oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); - - /* Use generic string SortSupport, forcing "C" collation */ - varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID); - - MemoryContextSwitchTo(oldcontext); - - PG_RETURN_VOID(); -} - -/* Cast bytea -> int2 */ -Datum -bytea_int2(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int len = VARSIZE_ANY_EXHDR(v); - uint16 result; - - /* Check that the byte array is not too long */ - if (len > sizeof(result)) - ereport(ERROR, - errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("smallint out of range")); - - /* Convert it to an integer; most significant bytes come first */ - result = 0; - for (int i = 0; i < len; i++) - { - result <<= BITS_PER_BYTE; - result |= ((unsigned char *) VARDATA_ANY(v))[i]; - } - - PG_RETURN_INT16(result); -} - -/* Cast bytea -> int4 */ -Datum -bytea_int4(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int len = VARSIZE_ANY_EXHDR(v); - uint32 result; - - /* Check that the byte array is not too long */ - if (len > sizeof(result)) - ereport(ERROR, - errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("integer out of range")); - - /* Convert it to an integer; most significant bytes come first */ - result = 0; - for (int i = 0; i < len; i++) - { - result <<= BITS_PER_BYTE; - result |= ((unsigned char *) VARDATA_ANY(v))[i]; - } - - PG_RETURN_INT32(result); -} - -/* Cast bytea -> int8 */ -Datum -bytea_int8(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int len = VARSIZE_ANY_EXHDR(v); - uint64 result; - - /* Check that the byte array is not too long */ - if (len > sizeof(result)) - ereport(ERROR, - errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("bigint out of range")); - - /* Convert it to an integer; most significant bytes come first */ - result = 0; - for (int i = 0; i < len; i++) - { - result <<= BITS_PER_BYTE; - result |= ((unsigned char *) VARDATA_ANY(v))[i]; - } - - PG_RETURN_INT64(result); -} - -/* Cast int2 -> bytea; can just use int2send() */ -Datum -int2_bytea(PG_FUNCTION_ARGS) -{ - return int2send(fcinfo); -} - -/* Cast int4 -> bytea; can just use int4send() */ -Datum -int4_bytea(PG_FUNCTION_ARGS) -{ - return int4send(fcinfo); -} - -/* Cast int8 -> bytea; can just use int8send() */ -Datum -int8_bytea(PG_FUNCTION_ARGS) -{ - return int8send(fcinfo); -} - /* * appendStringInfoText * diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index db8d0d6a7e8..2bd39b6ac4b 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -529,14 +529,36 @@ xmltext(PG_FUNCTION_ARGS) #ifdef USE_LIBXML text *arg = PG_GETARG_TEXT_PP(0); text *result; - xmlChar *xmlbuf = NULL; + volatile xmlChar *xmlbuf = NULL; + PgXmlErrorContext *xmlerrcxt; + + /* Otherwise, we gotta spin up some error handling. */ + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); + + PG_TRY(); + { + xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg)); + + if (xmlbuf == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlChar"); - xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg)); + result = cstring_to_text_with_len((const char *) xmlbuf, + xmlStrlen((const xmlChar *) xmlbuf)); + } + PG_CATCH(); + { + if (xmlbuf) + xmlFree((xmlChar *) xmlbuf); - Assert(xmlbuf); + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlFree((xmlChar *) xmlbuf); + pg_xml_done(xmlerrcxt, false); - result = cstring_to_text_with_len((const char *) xmlbuf, xmlStrlen(xmlbuf)); - xmlFree(xmlbuf); PG_RETURN_XML_P(result); #else NO_XML_SUPPORT(); @@ -754,6 +776,7 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent) * content nodes, and then iterate over the nodes. */ xmlNodePtr root; + xmlNodePtr oldroot; xmlNodePtr newline; root = xmlNewNode(NULL, (const xmlChar *) "content-root"); @@ -761,9 +784,18 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent) xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate xml node"); - /* This attaches root to doc, so we need not free it separately. */ - xmlDocSetRootElement(doc, root); - xmlAddChildList(root, content_nodes); + /* + * This attaches root to doc, so we need not free it separately... + * but instead, we have to free the old root if there was one. + */ + oldroot = xmlDocSetRootElement(doc, root); + if (oldroot != NULL) + xmlFreeNode(oldroot); + + if (xmlAddChildList(root, content_nodes) == NULL || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not append xml node list"); /* * We use this node to insert newlines in the dump. Note: in at @@ -924,7 +956,10 @@ xmlelement(XmlExpr *xexpr, xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate xmlTextWriter"); - xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name); + if (xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not start xml element"); forboth(arg, named_arg_strings, narg, xexpr->arg_names) { @@ -932,19 +967,30 @@ xmlelement(XmlExpr *xexpr, char *argname = strVal(lfirst(narg)); if (str) - xmlTextWriterWriteAttribute(writer, - (xmlChar *) argname, - (xmlChar *) str); + { + if (xmlTextWriterWriteAttribute(writer, + (xmlChar *) argname, + (xmlChar *) str) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not write xml attribute"); + } } foreach(arg, arg_strings) { char *str = (char *) lfirst(arg); - xmlTextWriterWriteRaw(writer, (xmlChar *) str); + if (xmlTextWriterWriteRaw(writer, (xmlChar *) str) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not write raw xml text"); } - xmlTextWriterEndElement(writer); + if (xmlTextWriterEndElement(writer) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not end xml element"); /* we MUST do this now to flush data out to the buffer ... */ xmlFreeTextWriter(writer); @@ -1850,6 +1896,7 @@ xml_parse(text *data, XmlOptionType xmloption_arg, else { xmlNodePtr root; + xmlNodePtr oldroot PG_USED_FOR_ASSERTS_ONLY; /* set up document with empty root node to be the context node */ doc = xmlNewDoc(version); @@ -1868,8 +1915,13 @@ xml_parse(text *data, XmlOptionType xmloption_arg, if (root == NULL || xmlerrcxt->err_occurred) xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate xml node"); - /* This attaches root to doc, so we need not free it separately. */ - xmlDocSetRootElement(doc, root); + + /* + * This attaches root to doc, so we need not free it separately; + * and there can't yet be any old root to free. + */ + oldroot = xmlDocSetRootElement(doc, root); + Assert(oldroot == NULL); /* allow empty content */ if (*(utf8string + count)) @@ -4207,20 +4259,27 @@ xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt) } else { - xmlChar *str; + volatile xmlChar *str = NULL; - str = xmlXPathCastNodeToString(cur); PG_TRY(); { + char *escaped; + + str = xmlXPathCastNodeToString(cur); + if (str == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlChar"); + /* Here we rely on XML having the same representation as TEXT */ - char *escaped = escape_xml((char *) str); + escaped = escape_xml((char *) str); result = (xmltype *) cstring_to_text(escaped); pfree(escaped); } PG_FINALLY(); { - xmlFree(str); + if (str) + xmlFree((xmlChar *) str); } PG_END_TRY(); } |