diff options
Diffstat (limited to 'src/backend/utils')
-rw-r--r-- | src/backend/utils/adt/Makefile | 1 | ||||
-rw-r--r-- | src/backend/utils/adt/bytea.c | 1143 | ||||
-rw-r--r-- | src/backend/utils/adt/date.c | 86 | ||||
-rw-r--r-- | src/backend/utils/adt/float.c | 18 | ||||
-rw-r--r-- | src/backend/utils/adt/inet_net_pton.c | 3 | ||||
-rw-r--r-- | src/backend/utils/adt/like.c | 22 | ||||
-rw-r--r-- | src/backend/utils/adt/like_support.c | 7 | ||||
-rw-r--r-- | src/backend/utils/adt/meson.build | 1 | ||||
-rw-r--r-- | src/backend/utils/adt/numeric.c | 17 | ||||
-rw-r--r-- | src/backend/utils/adt/pg_locale.c | 121 | ||||
-rw-r--r-- | src/backend/utils/adt/pg_locale_builtin.c | 112 | ||||
-rw-r--r-- | src/backend/utils/adt/pg_locale_icu.c | 130 | ||||
-rw-r--r-- | src/backend/utils/adt/pg_locale_libc.c | 337 | ||||
-rw-r--r-- | src/backend/utils/adt/regproc.c | 118 | ||||
-rw-r--r-- | src/backend/utils/adt/selfuncs.c | 2 | ||||
-rw-r--r-- | src/backend/utils/adt/timestamp.c | 81 | ||||
-rw-r--r-- | src/backend/utils/adt/varlena.c | 1098 | ||||
-rw-r--r-- | src/backend/utils/adt/xml.c | 78 | ||||
-rw-r--r-- | src/backend/utils/cache/catcache.c | 1 | ||||
-rw-r--r-- | src/backend/utils/misc/injection_point.c | 46 | ||||
-rw-r--r-- | src/backend/utils/mmgr/dsa.c | 15 |
21 files changed, 2113 insertions, 1324 deletions
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index 4a233b63c32..ffeacf2b819 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -23,6 +23,7 @@ OBJS = \ arrayutils.o \ ascii.o \ bool.o \ + bytea.o \ cash.o \ char.o \ cryptohashfuncs.o \ diff --git a/src/backend/utils/adt/bytea.c b/src/backend/utils/adt/bytea.c new file mode 100644 index 00000000000..2e539c2504e --- /dev/null +++ b/src/backend/utils/adt/bytea.c @@ -0,0 +1,1143 @@ +/*------------------------------------------------------------------------- + * + * bytea.c + * Functions for the bytea type. + * + * Portions Copyright (c) 2025, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/bytea.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/detoast.h" +#include "catalog/pg_collation_d.h" +#include "catalog/pg_type_d.h" +#include "common/int.h" +#include "fmgr.h" +#include "libpq/pqformat.h" +#include "port/pg_bitutils.h" +#include "utils/builtins.h" +#include "utils/bytea.h" +#include "utils/fmgrprotos.h" +#include "utils/memutils.h" +#include "utils/sortsupport.h" +#include "utils/varlena.h" +#include "varatt.h" + +/* GUC variable */ +int bytea_output = BYTEA_OUTPUT_HEX; + +static bytea *bytea_catenate(bytea *t1, bytea *t2); +static bytea *bytea_substring(Datum str, int S, int L, + bool length_not_specified); +static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl); + +/* + * bytea_catenate + * Guts of byteacat(), broken out so it can be used by other functions + * + * Arguments can be in short-header form, but not compressed or out-of-line + */ +static bytea * +bytea_catenate(bytea *t1, bytea *t2) +{ + bytea *result; + int len1, + len2, + len; + char *ptr; + + len1 = VARSIZE_ANY_EXHDR(t1); + len2 = VARSIZE_ANY_EXHDR(t2); + + /* paranoia ... probably should throw error instead? */ + if (len1 < 0) + len1 = 0; + if (len2 < 0) + len2 = 0; + + len = len1 + len2 + VARHDRSZ; + result = (bytea *) palloc(len); + + /* Set size of result string... */ + SET_VARSIZE(result, len); + + /* Fill data field of result string... */ + ptr = VARDATA(result); + if (len1 > 0) + memcpy(ptr, VARDATA_ANY(t1), len1); + if (len2 > 0) + memcpy(ptr + len1, VARDATA_ANY(t2), len2); + + return result; +} + +#define PG_STR_GET_BYTEA(str_) \ + DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_))) + +static bytea * +bytea_substring(Datum str, + int S, + int L, + bool length_not_specified) +{ + int32 S1; /* adjusted start position */ + int32 L1; /* adjusted substring length */ + int32 E; /* end position */ + + /* + * The logic here should generally match text_substring(). + */ + S1 = Max(S, 1); + + if (length_not_specified) + { + /* + * Not passed a length - DatumGetByteaPSlice() grabs everything to the + * end of the string if we pass it a negative value for length. + */ + L1 = -1; + } + else if (L < 0) + { + /* SQL99 says to throw an error for E < S, i.e., negative length */ + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + L1 = -1; /* silence stupider compilers */ + } + else if (pg_add_s32_overflow(S, L, &E)) + { + /* + * L could be large enough for S + L to overflow, in which case the + * substring must run to end of string. + */ + L1 = -1; + } + else + { + /* + * A zero or negative value for the end position can happen if the + * start was negative or one. SQL99 says to return a zero-length + * string. + */ + if (E < 1) + return PG_STR_GET_BYTEA(""); + + L1 = E - S1; + } + + /* + * If the start position is past the end of the string, SQL99 says to + * return a zero-length string -- DatumGetByteaPSlice() will do that for + * us. We need only convert S1 to zero-based starting position. + */ + return DatumGetByteaPSlice(str, S1 - 1, L1); +} + +static bytea * +bytea_overlay(bytea *t1, bytea *t2, int sp, int sl) +{ + bytea *result; + bytea *s1; + bytea *s2; + int sp_pl_sl; + + /* + * Check for possible integer-overflow cases. For negative sp, throw a + * "substring length" error because that's what should be expected + * according to the spec's definition of OVERLAY(). + */ + if (sp <= 0) + ereport(ERROR, + (errcode(ERRCODE_SUBSTRING_ERROR), + errmsg("negative substring length not allowed"))); + if (pg_add_s32_overflow(sp, sl, &sp_pl_sl)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false); + s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true); + result = bytea_catenate(s1, t2); + result = bytea_catenate(result, s2); + + return result; +} + +/***************************************************************************** + * USER I/O ROUTINES * + *****************************************************************************/ + +#define VAL(CH) ((CH) - '0') +#define DIG(VAL) ((VAL) + '0') + +/* + * byteain - converts from printable representation of byte array + * + * Non-printable characters must be passed as '\nnn' (octal) and are + * converted to internal form. '\' must be passed as '\\'. + * ereport(ERROR, ...) if bad form. + * + * BUGS: + * The input is scanned twice. + * The error checking of input is minimal. + */ +Datum +byteain(PG_FUNCTION_ARGS) +{ + char *inputText = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + char *tp; + char *rp; + int bc; + bytea *result; + + /* Recognize hex input */ + if (inputText[0] == '\\' && inputText[1] == 'x') + { + size_t len = strlen(inputText); + + bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */ + result = palloc(bc); + bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result), + escontext); + SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ + + PG_RETURN_BYTEA_P(result); + } + + /* Else, it's the traditional escaped style */ + for (bc = 0, tp = inputText; *tp != '\0'; bc++) + { + if (tp[0] != '\\') + tp++; + else if ((tp[0] == '\\') && + (tp[1] >= '0' && tp[1] <= '3') && + (tp[2] >= '0' && tp[2] <= '7') && + (tp[3] >= '0' && tp[3] <= '7')) + tp += 4; + else if ((tp[0] == '\\') && + (tp[1] == '\\')) + tp += 2; + else + { + /* + * one backslash, not followed by another or ### valid octal + */ + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "bytea"))); + } + } + + bc += VARHDRSZ; + + result = (bytea *) palloc(bc); + SET_VARSIZE(result, bc); + + tp = inputText; + rp = VARDATA(result); + while (*tp != '\0') + { + if (tp[0] != '\\') + *rp++ = *tp++; + else if ((tp[0] == '\\') && + (tp[1] >= '0' && tp[1] <= '3') && + (tp[2] >= '0' && tp[2] <= '7') && + (tp[3] >= '0' && tp[3] <= '7')) + { + bc = VAL(tp[1]); + bc <<= 3; + bc += VAL(tp[2]); + bc <<= 3; + *rp++ = bc + VAL(tp[3]); + + tp += 4; + } + else if ((tp[0] == '\\') && + (tp[1] == '\\')) + { + *rp++ = '\\'; + tp += 2; + } + else + { + /* + * We should never get here. The first pass should not allow it. + */ + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type %s", "bytea"))); + } + } + + PG_RETURN_BYTEA_P(result); +} + +/* + * byteaout - converts to printable representation of byte array + * + * In the traditional escaped format, non-printable characters are + * printed as '\nnn' (octal) and '\' as '\\'. + */ +Datum +byteaout(PG_FUNCTION_ARGS) +{ + bytea *vlena = PG_GETARG_BYTEA_PP(0); + char *result; + char *rp; + + if (bytea_output == BYTEA_OUTPUT_HEX) + { + /* Print hex format */ + rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); + *rp++ = '\\'; + *rp++ = 'x'; + rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); + } + else if (bytea_output == BYTEA_OUTPUT_ESCAPE) + { + /* Print traditional escaped format */ + char *vp; + uint64 len; + int i; + + len = 1; /* empty string has 1 char */ + vp = VARDATA_ANY(vlena); + for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) + { + if (*vp == '\\') + len += 2; + else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) + len += 4; + else + len++; + } + + /* + * In principle len can't overflow uint32 if the input fit in 1GB, but + * for safety let's check rather than relying on palloc's internal + * check. + */ + if (len > MaxAllocSize) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg_internal("result of bytea output conversion is too large"))); + rp = result = (char *) palloc(len); + + vp = VARDATA_ANY(vlena); + for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) + { + if (*vp == '\\') + { + *rp++ = '\\'; + *rp++ = '\\'; + } + else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) + { + int val; /* holds unprintable chars */ + + val = *vp; + rp[0] = '\\'; + rp[3] = DIG(val & 07); + val >>= 3; + rp[2] = DIG(val & 07); + val >>= 3; + rp[1] = DIG(val & 03); + rp += 4; + } + else + *rp++ = *vp; + } + } + else + { + elog(ERROR, "unrecognized \"bytea_output\" setting: %d", + bytea_output); + rp = result = NULL; /* keep compiler quiet */ + } + *rp = '\0'; + PG_RETURN_CSTRING(result); +} + +/* + * bytearecv - converts external binary format to bytea + */ +Datum +bytearecv(PG_FUNCTION_ARGS) +{ + StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); + bytea *result; + int nbytes; + + nbytes = buf->len - buf->cursor; + result = (bytea *) palloc(nbytes + VARHDRSZ); + SET_VARSIZE(result, nbytes + VARHDRSZ); + pq_copymsgbytes(buf, VARDATA(result), nbytes); + PG_RETURN_BYTEA_P(result); +} + +/* + * byteasend - converts bytea to binary format + * + * This is a special case: just copy the input... + */ +Datum +byteasend(PG_FUNCTION_ARGS) +{ + bytea *vlena = PG_GETARG_BYTEA_P_COPY(0); + + PG_RETURN_BYTEA_P(vlena); +} + +Datum +bytea_string_agg_transfn(PG_FUNCTION_ARGS) +{ + StringInfo state; + + state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); + + /* Append the value unless null, preceding it with the delimiter. */ + if (!PG_ARGISNULL(1)) + { + bytea *value = PG_GETARG_BYTEA_PP(1); + bool isfirst = false; + + /* + * You might think we can just throw away the first delimiter, however + * we must keep it as we may be a parallel worker doing partial + * aggregation building a state to send to the main process. We need + * to keep the delimiter of every aggregation so that the combine + * function can properly join up the strings of two separately + * partially aggregated results. The first delimiter is only stripped + * off in the final function. To know how much to strip off the front + * of the string, we store the length of the first delimiter in the + * StringInfo's cursor field, which we don't otherwise need here. + */ + if (state == NULL) + { + MemoryContext aggcontext; + MemoryContext oldcontext; + + if (!AggCheckCallContext(fcinfo, &aggcontext)) + { + /* cannot be called directly because of internal-type argument */ + elog(ERROR, "bytea_string_agg_transfn called in non-aggregate context"); + } + + /* + * Create state in aggregate context. It'll stay there across + * subsequent calls. + */ + oldcontext = MemoryContextSwitchTo(aggcontext); + state = makeStringInfo(); + MemoryContextSwitchTo(oldcontext); + + isfirst = true; + } + + if (!PG_ARGISNULL(2)) + { + bytea *delim = PG_GETARG_BYTEA_PP(2); + + appendBinaryStringInfo(state, VARDATA_ANY(delim), + VARSIZE_ANY_EXHDR(delim)); + if (isfirst) + state->cursor = VARSIZE_ANY_EXHDR(delim); + } + + appendBinaryStringInfo(state, VARDATA_ANY(value), + VARSIZE_ANY_EXHDR(value)); + } + + /* + * The transition type for string_agg() is declared to be "internal", + * which is a pass-by-value type the same size as a pointer. + */ + if (state) + PG_RETURN_POINTER(state); + PG_RETURN_NULL(); +} + +Datum +bytea_string_agg_finalfn(PG_FUNCTION_ARGS) +{ + StringInfo state; + + /* cannot be called directly because of internal-type argument */ + Assert(AggCheckCallContext(fcinfo, NULL)); + + state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); + + if (state != NULL) + { + /* As per comment in transfn, strip data before the cursor position */ + bytea *result; + int strippedlen = state->len - state->cursor; + + result = (bytea *) palloc(strippedlen + VARHDRSZ); + SET_VARSIZE(result, strippedlen + VARHDRSZ); + memcpy(VARDATA(result), &state->data[state->cursor], strippedlen); + PG_RETURN_BYTEA_P(result); + } + else + PG_RETURN_NULL(); +} + +/*------------------------------------------------------------- + * byteaoctetlen + * + * get the number of bytes contained in an instance of type 'bytea' + *------------------------------------------------------------- + */ +Datum +byteaoctetlen(PG_FUNCTION_ARGS) +{ + Datum str = PG_GETARG_DATUM(0); + + /* We need not detoast the input at all */ + PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ); +} + +/* + * byteacat - + * takes two bytea* and returns a bytea* that is the concatenation of + * the two. + * + * Cloned from textcat and modified as required. + */ +Datum +byteacat(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + + PG_RETURN_BYTEA_P(bytea_catenate(t1, t2)); +} + +/* + * byteaoverlay + * Replace specified substring of first string with second + * + * The SQL standard defines OVERLAY() in terms of substring and concatenation. + * This code is a direct implementation of what the standard says. + */ +Datum +byteaoverlay(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int sp = PG_GETARG_INT32(2); /* substring start position */ + int sl = PG_GETARG_INT32(3); /* substring length */ + + PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); +} + +Datum +byteaoverlay_no_len(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int sp = PG_GETARG_INT32(2); /* substring start position */ + int sl; + + sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */ + PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); +} + +/* + * bytea_substr() + * Return a substring starting at the specified position. + * Cloned from text_substr and modified as required. + * + * Input: + * - string + * - starting position (is one-based) + * - string length (optional) + * + * If the starting position is zero or less, then return from the start of the string + * adjusting the length to be consistent with the "negative start" per SQL. + * If the length is less than zero, an ERROR is thrown. If no third argument + * (length) is provided, the length to the end of the string is assumed. + */ +Datum +bytea_substr(PG_FUNCTION_ARGS) +{ + PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), + PG_GETARG_INT32(1), + PG_GETARG_INT32(2), + false)); +} + +/* + * bytea_substr_no_len - + * Wrapper to avoid opr_sanity failure due to + * one function accepting a different number of args. + */ +Datum +bytea_substr_no_len(PG_FUNCTION_ARGS) +{ + PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), + PG_GETARG_INT32(1), + -1, + true)); +} + +/* + * bit_count + */ +Datum +bytea_bit_count(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + + PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1))); +} + +/* + * byteapos - + * Return the position of the specified substring. + * Implements the SQL POSITION() function. + * Cloned from textpos and modified as required. + */ +Datum +byteapos(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + bytea *t2 = PG_GETARG_BYTEA_PP(1); + int pos; + int px, + p; + int len1, + len2; + char *p1, + *p2; + + len1 = VARSIZE_ANY_EXHDR(t1); + len2 = VARSIZE_ANY_EXHDR(t2); + + if (len2 <= 0) + PG_RETURN_INT32(1); /* result for empty pattern */ + + p1 = VARDATA_ANY(t1); + p2 = VARDATA_ANY(t2); + + pos = 0; + px = (len1 - len2); + for (p = 0; p <= px; p++) + { + if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0)) + { + pos = p + 1; + break; + }; + p1++; + }; + + PG_RETURN_INT32(pos); +} + +/*------------------------------------------------------------- + * byteaGetByte + * + * this routine treats "bytea" as an array of bytes. + * It returns the Nth byte (a number between 0 and 255). + *------------------------------------------------------------- + */ +Datum +byteaGetByte(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int32 n = PG_GETARG_INT32(1); + int len; + int byte; + + len = VARSIZE_ANY_EXHDR(v); + + if (n < 0 || n >= len) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %d out of valid range, 0..%d", + n, len - 1))); + + byte = ((unsigned char *) VARDATA_ANY(v))[n]; + + PG_RETURN_INT32(byte); +} + +/*------------------------------------------------------------- + * byteaGetBit + * + * This routine treats a "bytea" type like an array of bits. + * It returns the value of the Nth bit (0 or 1). + * + *------------------------------------------------------------- + */ +Datum +byteaGetBit(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int64 n = PG_GETARG_INT64(1); + int byteNo, + bitNo; + int len; + int byte; + + len = VARSIZE_ANY_EXHDR(v); + + if (n < 0 || n >= (int64) len * 8) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, + n, (int64) len * 8 - 1))); + + /* n/8 is now known < len, so safe to cast to int */ + byteNo = (int) (n / 8); + bitNo = (int) (n % 8); + + byte = ((unsigned char *) VARDATA_ANY(v))[byteNo]; + + if (byte & (1 << bitNo)) + PG_RETURN_INT32(1); + else + PG_RETURN_INT32(0); +} + +/*------------------------------------------------------------- + * byteaSetByte + * + * Given an instance of type 'bytea' creates a new one with + * the Nth byte set to the given value. + * + *------------------------------------------------------------- + */ +Datum +byteaSetByte(PG_FUNCTION_ARGS) +{ + bytea *res = PG_GETARG_BYTEA_P_COPY(0); + int32 n = PG_GETARG_INT32(1); + int32 newByte = PG_GETARG_INT32(2); + int len; + + len = VARSIZE(res) - VARHDRSZ; + + if (n < 0 || n >= len) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %d out of valid range, 0..%d", + n, len - 1))); + + /* + * Now set the byte. + */ + ((unsigned char *) VARDATA(res))[n] = newByte; + + PG_RETURN_BYTEA_P(res); +} + +/*------------------------------------------------------------- + * byteaSetBit + * + * Given an instance of type 'bytea' creates a new one with + * the Nth bit set to the given value. + * + *------------------------------------------------------------- + */ +Datum +byteaSetBit(PG_FUNCTION_ARGS) +{ + bytea *res = PG_GETARG_BYTEA_P_COPY(0); + int64 n = PG_GETARG_INT64(1); + int32 newBit = PG_GETARG_INT32(2); + int len; + int oldByte, + newByte; + int byteNo, + bitNo; + + len = VARSIZE(res) - VARHDRSZ; + + if (n < 0 || n >= (int64) len * 8) + ereport(ERROR, + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), + errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, + n, (int64) len * 8 - 1))); + + /* n/8 is now known < len, so safe to cast to int */ + byteNo = (int) (n / 8); + bitNo = (int) (n % 8); + + /* + * sanity check! + */ + if (newBit != 0 && newBit != 1) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("new bit must be 0 or 1"))); + + /* + * Update the byte. + */ + oldByte = ((unsigned char *) VARDATA(res))[byteNo]; + + if (newBit == 0) + newByte = oldByte & (~(1 << bitNo)); + else + newByte = oldByte | (1 << bitNo); + + ((unsigned char *) VARDATA(res))[byteNo] = newByte; + + PG_RETURN_BYTEA_P(res); +} + +/* + * Return reversed bytea + */ +Datum +bytea_reverse(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + const char *p = VARDATA_ANY(v); + int len = VARSIZE_ANY_EXHDR(v); + const char *endp = p + len; + bytea *result = palloc(len + VARHDRSZ); + char *dst = (char *) VARDATA(result) + len; + + SET_VARSIZE(result, len + VARHDRSZ); + + while (p < endp) + *(--dst) = *p++; + + PG_RETURN_BYTEA_P(result); +} + + +/***************************************************************************** + * Comparison Functions used for bytea + * + * Note: btree indexes need these routines not to leak memory; therefore, + * be careful to free working copies of toasted datums. Most places don't + * need to be so careful. + *****************************************************************************/ + +Datum +byteaeq(PG_FUNCTION_ARGS) +{ + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + bool result; + Size len1, + len2; + + /* + * We can use a fast path for unequal lengths, which might save us from + * having to detoast one or both values. + */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = false; + else + { + bytea *barg1 = DatumGetByteaPP(arg1); + bytea *barg2 = DatumGetByteaPP(arg2); + + result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), + len1 - VARHDRSZ) == 0); + + PG_FREE_IF_COPY(barg1, 0); + PG_FREE_IF_COPY(barg2, 1); + } + + PG_RETURN_BOOL(result); +} + +Datum +byteane(PG_FUNCTION_ARGS) +{ + Datum arg1 = PG_GETARG_DATUM(0); + Datum arg2 = PG_GETARG_DATUM(1); + bool result; + Size len1, + len2; + + /* + * We can use a fast path for unequal lengths, which might save us from + * having to detoast one or both values. + */ + len1 = toast_raw_datum_size(arg1); + len2 = toast_raw_datum_size(arg2); + if (len1 != len2) + result = true; + else + { + bytea *barg1 = DatumGetByteaPP(arg1); + bytea *barg2 = DatumGetByteaPP(arg2); + + result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), + len1 - VARHDRSZ) != 0); + + PG_FREE_IF_COPY(barg1, 0); + PG_FREE_IF_COPY(barg2, 1); + } + + PG_RETURN_BOOL(result); +} + +Datum +bytealt(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2))); +} + +Datum +byteale(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2))); +} + +Datum +byteagt(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2))); +} + +Datum +byteage(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2))); +} + +Datum +byteacmp(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + if ((cmp == 0) && (len1 != len2)) + cmp = (len1 < len2) ? -1 : 1; + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(cmp); +} + +Datum +bytea_larger(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + bytea *result; + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2); + + PG_RETURN_BYTEA_P(result); +} + +Datum +bytea_smaller(PG_FUNCTION_ARGS) +{ + bytea *arg1 = PG_GETARG_BYTEA_PP(0); + bytea *arg2 = PG_GETARG_BYTEA_PP(1); + bytea *result; + int len1, + len2; + int cmp; + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); + result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2); + + PG_RETURN_BYTEA_P(result); +} + +Datum +bytea_sortsupport(PG_FUNCTION_ARGS) +{ + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); + + /* Use generic string SortSupport, forcing "C" collation */ + varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID); + + MemoryContextSwitchTo(oldcontext); + + PG_RETURN_VOID(); +} + +/* Cast bytea -> int2 */ +Datum +bytea_int2(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint16 result; + + /* Check that the byte array is not too long */ + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range")); + + /* Convert it to an integer; most significant bytes come first */ + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT16(result); +} + +/* Cast bytea -> int4 */ +Datum +bytea_int4(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint32 result; + + /* Check that the byte array is not too long */ + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range")); + + /* Convert it to an integer; most significant bytes come first */ + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT32(result); +} + +/* Cast bytea -> int8 */ +Datum +bytea_int8(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + uint64 result; + + /* Check that the byte array is not too long */ + if (len > sizeof(result)) + ereport(ERROR, + errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range")); + + /* Convert it to an integer; most significant bytes come first */ + result = 0; + for (int i = 0; i < len; i++) + { + result <<= BITS_PER_BYTE; + result |= ((unsigned char *) VARDATA_ANY(v))[i]; + } + + PG_RETURN_INT64(result); +} + +/* Cast int2 -> bytea; can just use int2send() */ +Datum +int2_bytea(PG_FUNCTION_ARGS) +{ + return int2send(fcinfo); +} + +/* Cast int4 -> bytea; can just use int4send() */ +Datum +int4_bytea(PG_FUNCTION_ARGS) +{ + return int4send(fcinfo); +} + +/* Cast int8 -> bytea; can just use int8send() */ +Datum +int8_bytea(PG_FUNCTION_ARGS) +{ + return int8send(fcinfo); +} diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c index 4227ab1a72b..344f58b92f7 100644 --- a/src/backend/utils/adt/date.c +++ b/src/backend/utils/adt/date.c @@ -1363,10 +1363,35 @@ timestamp_date(PG_FUNCTION_ARGS) { Timestamp timestamp = PG_GETARG_TIMESTAMP(0); DateADT result; + + result = timestamp2date_opt_overflow(timestamp, NULL); + PG_RETURN_DATEADT(result); +} + +/* + * Convert timestamp to date. + * + * On successful conversion, *overflow is set to zero if it's not NULL. + * + * If the timestamp is finite but out of the valid range for date, then: + * if overflow is NULL, we throw an out-of-range error. + * if overflow is not NULL, we store +1 or -1 there to indicate the sign + * of the overflow, and return the appropriate date infinity. + * + * Note: given the ranges of the types, overflow is only possible at + * the minimum end of the range, but we don't assume that in this code. + */ +DateADT +timestamp2date_opt_overflow(Timestamp timestamp, int *overflow) +{ + DateADT result; struct pg_tm tt, *tm = &tt; fsec_t fsec; + if (overflow) + *overflow = 0; + if (TIMESTAMP_IS_NOBEGIN(timestamp)) DATE_NOBEGIN(result); else if (TIMESTAMP_IS_NOEND(timestamp)) @@ -1374,14 +1399,30 @@ timestamp_date(PG_FUNCTION_ARGS) else { if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0) + { + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + DATE_NOBEGIN(result); + } + else + { + *overflow = 1; /* not actually reachable */ + DATE_NOEND(result); + } + return result; + } ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); + } result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE; } - PG_RETURN_DATEADT(result); + return result; } @@ -1408,11 +1449,36 @@ timestamptz_date(PG_FUNCTION_ARGS) { TimestampTz timestamp = PG_GETARG_TIMESTAMP(0); DateADT result; + + result = timestamptz2date_opt_overflow(timestamp, NULL); + PG_RETURN_DATEADT(result); +} + +/* + * Convert timestamptz to date. + * + * On successful conversion, *overflow is set to zero if it's not NULL. + * + * If the timestamptz is finite but out of the valid range for date, then: + * if overflow is NULL, we throw an out-of-range error. + * if overflow is not NULL, we store +1 or -1 there to indicate the sign + * of the overflow, and return the appropriate date infinity. + * + * Note: given the ranges of the types, overflow is only possible at + * the minimum end of the range, but we don't assume that in this code. + */ +DateADT +timestamptz2date_opt_overflow(TimestampTz timestamp, int *overflow) +{ + DateADT result; struct pg_tm tt, *tm = &tt; fsec_t fsec; int tz; + if (overflow) + *overflow = 0; + if (TIMESTAMP_IS_NOBEGIN(timestamp)) DATE_NOBEGIN(result); else if (TIMESTAMP_IS_NOEND(timestamp)) @@ -1420,14 +1486,30 @@ timestamptz_date(PG_FUNCTION_ARGS) else { if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0) + { + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + DATE_NOBEGIN(result); + } + else + { + *overflow = 1; /* not actually reachable */ + DATE_NOEND(result); + } + return result; + } ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); + } result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE; } - PG_RETURN_DATEADT(result); + return result; } diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c index ba66a9c4ce6..7b97d2be6ca 100644 --- a/src/backend/utils/adt/float.c +++ b/src/backend/utils/adt/float.c @@ -4067,8 +4067,9 @@ float84ge(PG_FUNCTION_ARGS) * with the specified characteristics. An operand smaller than the * lower bound is assigned to bucket 0. An operand greater than or equal * to the upper bound is assigned to an additional bucket (with number - * count+1). We don't allow "NaN" for any of the float8 inputs, and we - * don't allow either of the histogram bounds to be +/- infinity. + * count+1). We don't allow the histogram bounds to be NaN or +/- infinity, + * but we do allow those values for the operand (taking NaN to be larger + * than any other value, as we do in comparisons). */ Datum width_bucket_float8(PG_FUNCTION_ARGS) @@ -4084,12 +4085,11 @@ width_bucket_float8(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), errmsg("count must be greater than zero"))); - if (isnan(operand) || isnan(bound1) || isnan(bound2)) + if (isnan(bound1) || isnan(bound2)) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), - errmsg("operand, lower bound, and upper bound cannot be NaN"))); + errmsg("lower and upper bounds cannot be NaN"))); - /* Note that we allow "operand" to be infinite */ if (isinf(bound1) || isinf(bound2)) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), @@ -4097,15 +4097,15 @@ width_bucket_float8(PG_FUNCTION_ARGS) if (bound1 < bound2) { - if (operand < bound1) - result = 0; - else if (operand >= bound2) + if (isnan(operand) || operand >= bound2) { if (pg_add_s32_overflow(count, 1, &result)) ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("integer out of range"))); } + else if (operand < bound1) + result = 0; else { if (!isinf(bound2 - bound1)) @@ -4135,7 +4135,7 @@ width_bucket_float8(PG_FUNCTION_ARGS) } else if (bound1 > bound2) { - if (operand > bound1) + if (isnan(operand) || operand > bound1) result = 0; else if (operand <= bound2) { diff --git a/src/backend/utils/adt/inet_net_pton.c b/src/backend/utils/adt/inet_net_pton.c index ef2236d9f04..3b0db2a3799 100644 --- a/src/backend/utils/adt/inet_net_pton.c +++ b/src/backend/utils/adt/inet_net_pton.c @@ -115,8 +115,7 @@ inet_cidr_pton_ipv4(const char *src, u_char *dst, size_t size) src++; /* skip x or X. */ while ((ch = *src++) != '\0' && isxdigit((unsigned char) ch)) { - if (isupper((unsigned char) ch)) - ch = tolower((unsigned char) ch); + ch = pg_ascii_tolower((unsigned char) ch); n = strchr(xdigits, ch) - xdigits; assert(n >= 0 && n <= 15); if (dirty == 0) diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 7f4cf614585..4216ac17f43 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -98,7 +98,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale) else if (locale->is_default) return pg_tolower(c); else - return tolower_l(c, locale->info.lt); + return char_tolower(c, locale); } @@ -209,7 +209,17 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) * way. */ - if (pg_database_encoding_max_length() > 1 || (locale->provider == COLLPROVIDER_ICU)) + if (locale->ctype_is_c || + (char_tolower_enabled(locale) && + pg_database_encoding_max_length() == 1)) + { + p = VARDATA_ANY(pat); + plen = VARSIZE_ANY_EXHDR(pat); + s = VARDATA_ANY(str); + slen = VARSIZE_ANY_EXHDR(str); + return SB_IMatchText(s, slen, p, plen, locale); + } + else { pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation, PointerGetDatum(pat))); @@ -224,14 +234,6 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation) else return MB_MatchText(s, slen, p, plen, 0); } - else - { - p = VARDATA_ANY(pat); - plen = VARSIZE_ANY_EXHDR(pat); - s = VARDATA_ANY(str); - slen = VARSIZE_ANY_EXHDR(str); - return SB_IMatchText(s, slen, p, plen, locale); - } } /* diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index 8fdc677371f..999f23f86d5 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -1495,13 +1495,8 @@ pattern_char_isalpha(char c, bool is_multibyte, { if (locale->ctype_is_c) return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); - else if (is_multibyte && IS_HIGHBIT_SET(c)) - return true; - else if (locale->provider != COLLPROVIDER_LIBC) - return IS_HIGHBIT_SET(c) || - (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); else - return isalpha_l((unsigned char) c, locale->info.lt); + return char_is_cased(c, locale); } diff --git a/src/backend/utils/adt/meson.build b/src/backend/utils/adt/meson.build index 244f48f4fd7..ed9bbd7b926 100644 --- a/src/backend/utils/adt/meson.build +++ b/src/backend/utils/adt/meson.build @@ -12,6 +12,7 @@ backend_sources += files( 'arrayutils.c', 'ascii.c', 'bool.c', + 'bytea.c', 'cash.c', 'char.c', 'cryptohashfuncs.c', diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c index 58ad1a65ef7..c9233565d57 100644 --- a/src/backend/utils/adt/numeric.c +++ b/src/backend/utils/adt/numeric.c @@ -1960,8 +1960,9 @@ generate_series_numeric_support(PG_FUNCTION_ARGS) * with the specified characteristics. An operand smaller than the * lower bound is assigned to bucket 0. An operand greater than or equal * to the upper bound is assigned to an additional bucket (with number - * count+1). We don't allow "NaN" for any of the numeric inputs, and we - * don't allow either of the histogram bounds to be +/- infinity. + * count+1). We don't allow the histogram bounds to be NaN or +/- infinity, + * but we do allow those values for the operand (taking NaN to be larger + * than any other value, as we do in comparisons). */ Datum width_bucket_numeric(PG_FUNCTION_ARGS) @@ -1979,17 +1980,13 @@ width_bucket_numeric(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), errmsg("count must be greater than zero"))); - if (NUMERIC_IS_SPECIAL(operand) || - NUMERIC_IS_SPECIAL(bound1) || - NUMERIC_IS_SPECIAL(bound2)) + if (NUMERIC_IS_SPECIAL(bound1) || NUMERIC_IS_SPECIAL(bound2)) { - if (NUMERIC_IS_NAN(operand) || - NUMERIC_IS_NAN(bound1) || - NUMERIC_IS_NAN(bound2)) + if (NUMERIC_IS_NAN(bound1) || NUMERIC_IS_NAN(bound2)) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), - errmsg("operand, lower bound, and upper bound cannot be NaN"))); - /* We allow "operand" to be infinite; cmp_numerics will cope */ + errmsg("lower and upper bounds cannot be NaN"))); + if (NUMERIC_IS_INF(bound1) || NUMERIC_IS_INF(bound2)) ereport(ERROR, (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION), diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index bf1afb24d7d..97c2ac1faf9 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -79,31 +79,6 @@ extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context); extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context); extern char *get_collation_actual_version_libc(const char *collcollate); -extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strfold_builtin(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - -extern size_t strlower_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strfold_icu(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - -extern size_t strlower_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - /* GUC settings */ char *locale_messages; char *locale_monetary; @@ -1092,6 +1067,9 @@ create_pg_locale(Oid collid, MemoryContext context) Assert((result->collate_is_c && result->collate == NULL) || (!result->collate_is_c && result->collate != NULL)); + Assert((result->ctype_is_c && result->ctype == NULL) || + (!result->ctype_is_c && result->ctype != NULL)); + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion, &isnull); if (!isnull) @@ -1256,77 +1234,31 @@ size_t pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strlower_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strlower_icu(dst, dstsize, src, srclen, locale); -#endif - else if (locale->provider == COLLPROVIDER_LIBC) - return strlower_libc(dst, dstsize, src, srclen, locale); - else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strlower(dst, dstsize, src, srclen, locale); } size_t pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strtitle_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strtitle_icu(dst, dstsize, src, srclen, locale); -#endif - else if (locale->provider == COLLPROVIDER_LIBC) - return strtitle_libc(dst, dstsize, src, srclen, locale); - else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strtitle(dst, dstsize, src, srclen, locale); } size_t pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strupper_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strupper_icu(dst, dstsize, src, srclen, locale); -#endif - else if (locale->provider == COLLPROVIDER_LIBC) - return strupper_libc(dst, dstsize, src, srclen, locale); - else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strupper(dst, dstsize, src, srclen, locale); } size_t pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen, pg_locale_t locale) { - if (locale->provider == COLLPROVIDER_BUILTIN) - return strfold_builtin(dst, dstsize, src, srclen, locale); -#ifdef USE_ICU - else if (locale->provider == COLLPROVIDER_ICU) - return strfold_icu(dst, dstsize, src, srclen, locale); -#endif - /* for libc, just use strlower */ - else if (locale->provider == COLLPROVIDER_LIBC) - return strlower_libc(dst, dstsize, src, srclen, locale); + if (locale->ctype->strfold) + return locale->ctype->strfold(dst, dstsize, src, srclen, locale); else - /* shouldn't happen */ - PGLOCALE_SUPPORT_ERROR(locale->provider); - - return 0; /* keep compiler quiet */ + return locale->ctype->strlower(dst, dstsize, src, srclen, locale); } /* @@ -1464,6 +1396,41 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src, } /* + * char_is_cased() + * + * Fuzzy test of whether the given char is case-varying or not. The argument + * is a single byte, so in a multibyte encoding, just assume any non-ASCII + * char is case-varying. + */ +bool +char_is_cased(char ch, pg_locale_t locale) +{ + return locale->ctype->char_is_cased(ch, locale); +} + +/* + * char_tolower_enabled() + * + * Does the provider support char_tolower()? + */ +bool +char_tolower_enabled(pg_locale_t locale) +{ + return (locale->ctype->char_tolower != NULL); +} + +/* + * char_tolower() + * + * Convert char (single-byte encoding) to lowercase. + */ +char +char_tolower(unsigned char ch, pg_locale_t locale) +{ + return locale->ctype->char_tolower(ch, locale); +} + +/* * Return required encoding ID for the given locale, or -1 if any encoding is * valid for the locale. */ diff --git a/src/backend/utils/adt/pg_locale_builtin.c b/src/backend/utils/adt/pg_locale_builtin.c index ce4914a76a1..0c9fbdb40f2 100644 --- a/src/backend/utils/adt/pg_locale_builtin.c +++ b/src/backend/utils/adt/pg_locale_builtin.c @@ -24,15 +24,6 @@ extern pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context); extern char *get_collation_actual_version_builtin(const char *collcollate); -extern size_t strlower_builtin(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_builtin(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_builtin(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strfold_builtin(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); - struct WordBoundaryState { @@ -76,7 +67,7 @@ initcap_wbnext(void *state) return wbstate->len; } -size_t +static size_t strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -84,7 +75,7 @@ strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, locale->info.builtin.casemap_full); } -size_t +static size_t strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -102,7 +93,7 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, initcap_wbnext, &wbstate); } -size_t +static size_t strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -110,7 +101,7 @@ strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, locale->info.builtin.casemap_full); } -size_t +static size_t strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -118,6 +109,98 @@ strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen, locale->info.builtin.casemap_full); } +static bool +wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isdigit(wc, !locale->info.builtin.casemap_full); +} + +static bool +wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isalpha(wc); +} + +static bool +wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isalnum(wc, !locale->info.builtin.casemap_full); +} + +static bool +wc_isupper_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isupper(wc); +} + +static bool +wc_islower_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_islower(wc); +} + +static bool +wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isgraph(wc); +} + +static bool +wc_isprint_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isprint(wc); +} + +static bool +wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_ispunct(wc, !locale->info.builtin.casemap_full); +} + +static bool +wc_isspace_builtin(pg_wchar wc, pg_locale_t locale) +{ + return pg_u_isspace(wc); +} + +static bool +char_is_cased_builtin(char ch, pg_locale_t locale) +{ + return IS_HIGHBIT_SET(ch) || + (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); +} + +static pg_wchar +wc_toupper_builtin(pg_wchar wc, pg_locale_t locale) +{ + return unicode_uppercase_simple(wc); +} + +static pg_wchar +wc_tolower_builtin(pg_wchar wc, pg_locale_t locale) +{ + return unicode_lowercase_simple(wc); +} + +static const struct ctype_methods ctype_methods_builtin = { + .strlower = strlower_builtin, + .strtitle = strtitle_builtin, + .strupper = strupper_builtin, + .strfold = strfold_builtin, + .wc_isdigit = wc_isdigit_builtin, + .wc_isalpha = wc_isalpha_builtin, + .wc_isalnum = wc_isalnum_builtin, + .wc_isupper = wc_isupper_builtin, + .wc_islower = wc_islower_builtin, + .wc_isgraph = wc_isgraph_builtin, + .wc_isprint = wc_isprint_builtin, + .wc_ispunct = wc_ispunct_builtin, + .wc_isspace = wc_isspace_builtin, + .char_is_cased = char_is_cased_builtin, + .wc_tolower = wc_tolower_builtin, + .wc_toupper = wc_toupper_builtin, +}; + pg_locale_t create_pg_locale_builtin(Oid collid, MemoryContext context) { @@ -157,10 +240,11 @@ create_pg_locale_builtin(Oid collid, MemoryContext context) result->info.builtin.locale = MemoryContextStrdup(context, locstr); result->info.builtin.casemap_full = (strcmp(locstr, "PG_UNICODE_FAST") == 0); - result->provider = COLLPROVIDER_BUILTIN; result->deterministic = true; result->collate_is_c = true; result->ctype_is_c = (strcmp(locstr, "C") == 0); + if (!result->ctype_is_c) + result->ctype = &ctype_methods_builtin; return result; } diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c index a32c32a0744..96741e08269 100644 --- a/src/backend/utils/adt/pg_locale_icu.c +++ b/src/backend/utils/adt/pg_locale_icu.c @@ -48,19 +48,22 @@ #define TEXTBUFLEN 1024 extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context); -extern size_t strlower_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strfold_icu(char *dest, size_t destsize, const char *src, - ssize_t srclen, pg_locale_t locale); #ifdef USE_ICU extern UCollator *pg_ucol_open(const char *loc_str); +static size_t strlower_icu(char *dest, size_t destsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static size_t strtitle_icu(char *dest, size_t destsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static size_t strupper_icu(char *dest, size_t destsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static size_t strfold_icu(char *dest, size_t destsize, const char *src, + ssize_t srclen, pg_locale_t locale); +static int strncoll_icu(const char *arg1, ssize_t len1, + const char *arg2, ssize_t len2, + pg_locale_t locale); static size_t strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale); @@ -118,6 +121,25 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity, const char *locale, UErrorCode *pErrorCode); +static bool +char_is_cased_icu(char ch, pg_locale_t locale) +{ + return IS_HIGHBIT_SET(ch) || + (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); +} + +static pg_wchar +toupper_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_toupper(wc); +} + +static pg_wchar +tolower_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_tolower(wc); +} + static const struct collate_methods collate_methods_icu = { .strncoll = strncoll_icu, .strnxfrm = strnxfrm_icu, @@ -136,6 +158,78 @@ static const struct collate_methods collate_methods_icu_utf8 = { .strxfrm_is_safe = true, }; +static bool +wc_isdigit_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isdigit(wc); +} + +static bool +wc_isalpha_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isalpha(wc); +} + +static bool +wc_isalnum_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isalnum(wc); +} + +static bool +wc_isupper_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isupper(wc); +} + +static bool +wc_islower_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_islower(wc); +} + +static bool +wc_isgraph_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isgraph(wc); +} + +static bool +wc_isprint_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isprint(wc); +} + +static bool +wc_ispunct_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_ispunct(wc); +} + +static bool +wc_isspace_icu(pg_wchar wc, pg_locale_t locale) +{ + return u_isspace(wc); +} + +static const struct ctype_methods ctype_methods_icu = { + .strlower = strlower_icu, + .strtitle = strtitle_icu, + .strupper = strupper_icu, + .strfold = strfold_icu, + .wc_isdigit = wc_isdigit_icu, + .wc_isalpha = wc_isalpha_icu, + .wc_isalnum = wc_isalnum_icu, + .wc_isupper = wc_isupper_icu, + .wc_islower = wc_islower_icu, + .wc_isgraph = wc_isgraph_icu, + .wc_isprint = wc_isprint_icu, + .wc_ispunct = wc_ispunct_icu, + .wc_isspace = wc_isspace_icu, + .char_is_cased = char_is_cased_icu, + .wc_toupper = toupper_icu, + .wc_tolower = tolower_icu, +}; #endif pg_locale_t @@ -198,7 +292,6 @@ create_pg_locale_icu(Oid collid, MemoryContext context) result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); result->info.icu.locale = MemoryContextStrdup(context, iculocstr); result->info.icu.ucol = collator; - result->provider = COLLPROVIDER_ICU; result->deterministic = deterministic; result->collate_is_c = false; result->ctype_is_c = false; @@ -206,6 +299,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context) result->collate = &collate_methods_icu_utf8; else result->collate = &collate_methods_icu; + result->ctype = &ctype_methods_icu; return result; #else @@ -379,7 +473,7 @@ make_icu_collator(const char *iculocstr, const char *icurules) } } -size_t +static size_t strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -399,7 +493,7 @@ strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, return result_len; } -size_t +static size_t strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -419,7 +513,7 @@ strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, return result_len; } -size_t +static size_t strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -439,7 +533,7 @@ strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, return result_len; } -size_t +static size_t strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) { @@ -474,8 +568,6 @@ strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2 int result; UErrorCode status; - Assert(locale->provider == COLLPROVIDER_ICU); - Assert(GetDatabaseEncoding() == PG_UTF8); status = U_ZERO_ERROR; @@ -503,8 +595,6 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t uchar_bsize; Size result_bsize; - Assert(locale->provider == COLLPROVIDER_ICU); - init_icu_converter(); ulen = uchar_length(icu_converter, src, srclen); @@ -549,8 +639,6 @@ strnxfrm_prefix_icu_utf8(char *dest, size_t destsize, uint32_t state[2]; UErrorCode status; - Assert(locale->provider == COLLPROVIDER_ICU); - Assert(GetDatabaseEncoding() == PG_UTF8); uiter_setUTF8(&iter, src, srclen); @@ -749,8 +837,6 @@ strncoll_icu(const char *arg1, ssize_t len1, *uchar2; int result; - Assert(locale->provider == COLLPROVIDER_ICU); - /* if encoding is UTF8, use more efficient strncoll_icu_utf8 */ #ifdef HAVE_UCOL_STRCOLLUTF8 Assert(GetDatabaseEncoding() != PG_UTF8); @@ -799,8 +885,6 @@ strnxfrm_prefix_icu(char *dest, size_t destsize, size_t uchar_bsize; Size result_bsize; - Assert(locale->provider == COLLPROVIDER_ICU); - /* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */ Assert(GetDatabaseEncoding() != PG_UTF8); diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index 199857e22db..e9f9fc1e369 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -34,6 +34,46 @@ #endif /* + * For the libc provider, to provide as much functionality as possible on a + * variety of platforms without going so far as to implement everything from + * scratch, we use several implementation strategies depending on the + * situation: + * + * 1. In C/POSIX collations, we use hard-wired code. We can't depend on + * the <ctype.h> functions since those will obey LC_CTYPE. Note that these + * collations don't give a fig about multibyte characters. + * + * 2. When working in UTF8 encoding, we use the <wctype.h> functions. + * This assumes that every platform uses Unicode codepoints directly + * as the wchar_t representation of Unicode. (XXX: ICU makes this assumption + * even for non-UTF8 encodings, which may be a problem.) On some platforms + * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF. + * + * 3. In all other encodings, we use the <ctype.h> functions for pg_wchar + * values up to 255, and punt for values above that. This is 100% correct + * only in single-byte encodings such as LATINn. However, non-Unicode + * multibyte encodings are mostly Far Eastern character sets for which the + * properties being tested here aren't very relevant for higher code values + * anyway. The difficulty with using the <wctype.h> functions with + * non-Unicode multibyte encodings is that we can have no certainty that + * the platform's wchar_t representation matches what we do in pg_wchar + * conversions. + * + * As a special case, in the "default" collation, (2) and (3) force ASCII + * letters to follow ASCII upcase/downcase rules, while in a non-default + * collation we just let the library functions do what they will. The case + * where this matters is treatment of I/i in Turkish, and the behavior is + * meant to match the upper()/lower() SQL functions. + * + * We store the active collation setting in static variables. In principle + * it could be passed down to here via the regex library's "struct vars" data + * structure; but that would require somewhat invasive changes in the regex + * library, and right now there's no real benefit to be gained from that. + * + * NB: the coding here assumes pg_wchar is an unsigned type. + */ + +/* * Size of stack buffer to use for string transformations, used to avoid heap * allocations in typical cases. This should be large enough that most strings * will fit, but small enough that we feel comfortable putting it on the @@ -43,13 +83,6 @@ extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context); -extern size_t strlower_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); -extern size_t strupper_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale); - static int strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, pg_locale_t locale); @@ -85,6 +118,251 @@ static size_t strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale); +static bool +wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isdigit_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isalpha_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isalnum_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isupper_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return islower_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isgraph_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isprint_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return ispunct_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + return isspace_l((unsigned char) wc, locale->info.lt); +} + +static bool +wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswdigit_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswalpha_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswalnum_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswupper_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswlower_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswgraph_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswprint_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswpunct_l((wint_t) wc, locale->info.lt); +} + +static bool +wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + return iswspace_l((wint_t) wc, locale->info.lt); +} + +static char +char_tolower_libc(unsigned char ch, pg_locale_t locale) +{ + Assert(pg_database_encoding_max_length() == 1); + return tolower_l(ch, locale->info.lt); +} + +static bool +char_is_cased_libc(char ch, pg_locale_t locale) +{ + bool is_multibyte = pg_database_encoding_max_length() > 1; + + if (is_multibyte && IS_HIGHBIT_SET(ch)) + return true; + else + return isalpha_l((unsigned char) ch, locale->info.lt); +} + +static pg_wchar +toupper_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() != PG_UTF8); + + /* force C behavior for ASCII characters, per comments above */ + if (locale->is_default && wc <= (pg_wchar) 127) + return pg_ascii_toupper((unsigned char) wc); + if (wc <= (pg_wchar) UCHAR_MAX) + return toupper_l((unsigned char) wc, locale->info.lt); + else + return wc; +} + +static pg_wchar +toupper_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() == PG_UTF8); + + /* force C behavior for ASCII characters, per comments above */ + if (locale->is_default && wc <= (pg_wchar) 127) + return pg_ascii_toupper((unsigned char) wc); + if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF) + return towupper_l((wint_t) wc, locale->info.lt); + else + return wc; +} + +static pg_wchar +tolower_libc_sb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() != PG_UTF8); + + /* force C behavior for ASCII characters, per comments above */ + if (locale->is_default && wc <= (pg_wchar) 127) + return pg_ascii_tolower((unsigned char) wc); + if (wc <= (pg_wchar) UCHAR_MAX) + return tolower_l((unsigned char) wc, locale->info.lt); + else + return wc; +} + +static pg_wchar +tolower_libc_mb(pg_wchar wc, pg_locale_t locale) +{ + Assert(GetDatabaseEncoding() == PG_UTF8); + + /* force C behavior for ASCII characters, per comments above */ + if (locale->is_default && wc <= (pg_wchar) 127) + return pg_ascii_tolower((unsigned char) wc); + if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF) + return towlower_l((wint_t) wc, locale->info.lt); + else + return wc; +} + +static const struct ctype_methods ctype_methods_libc_sb = { + .strlower = strlower_libc_sb, + .strtitle = strtitle_libc_sb, + .strupper = strupper_libc_sb, + .wc_isdigit = wc_isdigit_libc_sb, + .wc_isalpha = wc_isalpha_libc_sb, + .wc_isalnum = wc_isalnum_libc_sb, + .wc_isupper = wc_isupper_libc_sb, + .wc_islower = wc_islower_libc_sb, + .wc_isgraph = wc_isgraph_libc_sb, + .wc_isprint = wc_isprint_libc_sb, + .wc_ispunct = wc_ispunct_libc_sb, + .wc_isspace = wc_isspace_libc_sb, + .char_is_cased = char_is_cased_libc, + .char_tolower = char_tolower_libc, + .wc_toupper = toupper_libc_sb, + .wc_tolower = tolower_libc_sb, + .max_chr = UCHAR_MAX, +}; + +/* + * Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but + * single-byte semantics for pattern matching. + */ +static const struct ctype_methods ctype_methods_libc_other_mb = { + .strlower = strlower_libc_mb, + .strtitle = strtitle_libc_mb, + .strupper = strupper_libc_mb, + .wc_isdigit = wc_isdigit_libc_sb, + .wc_isalpha = wc_isalpha_libc_sb, + .wc_isalnum = wc_isalnum_libc_sb, + .wc_isupper = wc_isupper_libc_sb, + .wc_islower = wc_islower_libc_sb, + .wc_isgraph = wc_isgraph_libc_sb, + .wc_isprint = wc_isprint_libc_sb, + .wc_ispunct = wc_ispunct_libc_sb, + .wc_isspace = wc_isspace_libc_sb, + .char_is_cased = char_is_cased_libc, + .char_tolower = char_tolower_libc, + .wc_toupper = toupper_libc_sb, + .wc_tolower = tolower_libc_sb, + .max_chr = UCHAR_MAX, +}; + +static const struct ctype_methods ctype_methods_libc_utf8 = { + .strlower = strlower_libc_mb, + .strtitle = strtitle_libc_mb, + .strupper = strupper_libc_mb, + .wc_isdigit = wc_isdigit_libc_mb, + .wc_isalpha = wc_isalpha_libc_mb, + .wc_isalnum = wc_isalnum_libc_mb, + .wc_isupper = wc_isupper_libc_mb, + .wc_islower = wc_islower_libc_mb, + .wc_isgraph = wc_isgraph_libc_mb, + .wc_isprint = wc_isprint_libc_mb, + .wc_ispunct = wc_ispunct_libc_mb, + .wc_isspace = wc_isspace_libc_mb, + .char_is_cased = char_is_cased_libc, + .char_tolower = char_tolower_libc, + .wc_toupper = toupper_libc_mb, + .wc_tolower = tolower_libc_mb, +}; + static const struct collate_methods collate_methods_libc = { .strncoll = strncoll_libc, .strnxfrm = strnxfrm_libc, @@ -119,36 +397,6 @@ static const struct collate_methods collate_methods_libc_win32_utf8 = { }; #endif -size_t -strlower_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale) -{ - if (pg_database_encoding_max_length() > 1) - return strlower_libc_mb(dst, dstsize, src, srclen, locale); - else - return strlower_libc_sb(dst, dstsize, src, srclen, locale); -} - -size_t -strtitle_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale) -{ - if (pg_database_encoding_max_length() > 1) - return strtitle_libc_mb(dst, dstsize, src, srclen, locale); - else - return strtitle_libc_sb(dst, dstsize, src, srclen, locale); -} - -size_t -strupper_libc(char *dst, size_t dstsize, const char *src, - ssize_t srclen, pg_locale_t locale) -{ - if (pg_database_encoding_max_length() > 1) - return strupper_libc_mb(dst, dstsize, src, srclen, locale); - else - return strupper_libc_sb(dst, dstsize, src, srclen, locale); -} - static size_t strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen, pg_locale_t locale) @@ -465,7 +713,6 @@ create_pg_locale_libc(Oid collid, MemoryContext context) loc = make_libc_collator(collate, ctype); result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); - result->provider = COLLPROVIDER_LIBC; result->deterministic = true; result->collate_is_c = (strcmp(collate, "C") == 0) || (strcmp(collate, "POSIX") == 0); @@ -481,6 +728,15 @@ create_pg_locale_libc(Oid collid, MemoryContext context) #endif result->collate = &collate_methods_libc; } + if (!result->ctype_is_c) + { + if (GetDatabaseEncoding() == PG_UTF8) + result->ctype = &ctype_methods_libc_utf8; + else if (pg_database_encoding_max_length() > 1) + result->ctype = &ctype_methods_libc_other_mb; + else + result->ctype = &ctype_methods_libc_sb; + } return result; } @@ -576,8 +832,6 @@ strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2, const char *arg2n; int result; - Assert(locale->provider == COLLPROVIDER_LIBC); - if (bufsize1 + bufsize2 > TEXTBUFLEN) buf = palloc(bufsize1 + bufsize2); @@ -632,8 +886,6 @@ strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen, size_t bufsize = srclen + 1; size_t result; - Assert(locale->provider == COLLPROVIDER_LIBC); - if (srclen == -1) return strxfrm_l(dest, src, destsize, locale->info.lt); @@ -742,7 +994,6 @@ strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2, int r; int result; - Assert(locale->provider == COLLPROVIDER_LIBC); Assert(GetDatabaseEncoding() == PG_UTF8); if (len1 == -1) diff --git a/src/backend/utils/adt/regproc.c b/src/backend/utils/adt/regproc.c index 5ee608a2b39..b8bbe95e82e 100644 --- a/src/backend/utils/adt/regproc.c +++ b/src/backend/utils/adt/regproc.c @@ -30,6 +30,7 @@ #include "catalog/pg_ts_config.h" #include "catalog/pg_ts_dict.h" #include "catalog/pg_type.h" +#include "commands/dbcommands.h" #include "lib/stringinfo.h" #include "mb/pg_wchar.h" #include "miscadmin.h" @@ -1764,6 +1765,123 @@ regnamespacesend(PG_FUNCTION_ARGS) } /* + * regdatabasein - converts database name to database OID + * + * We also accept a numeric OID, for symmetry with the output routine. + * + * '-' signifies unknown (OID 0). In all other cases, the input must + * match an existing pg_database entry. + */ +Datum +regdatabasein(PG_FUNCTION_ARGS) +{ + char *db_name_or_oid = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; + Oid result; + List *names; + + /* Handle "-" or numeric OID */ + if (parseDashOrOid(db_name_or_oid, &result, escontext)) + PG_RETURN_OID(result); + + /* The rest of this wouldn't work in bootstrap mode */ + if (IsBootstrapProcessingMode()) + elog(ERROR, "regdatabase values must be OIDs in bootstrap mode"); + + /* Normal case: see if the name matches any pg_database entry. */ + names = stringToQualifiedNameList(db_name_or_oid, escontext); + if (names == NIL) + PG_RETURN_NULL(); + + if (list_length(names) != 1) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_INVALID_NAME), + errmsg("invalid name syntax"))); + + result = get_database_oid(strVal(linitial(names)), true); + + if (!OidIsValid(result)) + ereturn(escontext, (Datum) 0, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("database \"%s\" does not exist", + strVal(linitial(names))))); + + PG_RETURN_OID(result); +} + +/* + * to_regdatabase - converts database name to database OID + * + * If the name is not found, we return NULL. + */ +Datum +to_regdatabase(PG_FUNCTION_ARGS) +{ + char *db_name = text_to_cstring(PG_GETARG_TEXT_PP(0)); + Datum result; + ErrorSaveContext escontext = {T_ErrorSaveContext}; + + if (!DirectInputFunctionCallSafe(regdatabasein, db_name, + InvalidOid, -1, + (Node *) &escontext, + &result)) + PG_RETURN_NULL(); + PG_RETURN_DATUM(result); +} + +/* + * regdatabaseout - converts database OID to database name + */ +Datum +regdatabaseout(PG_FUNCTION_ARGS) +{ + Oid dboid = PG_GETARG_OID(0); + char *result; + + if (dboid == InvalidOid) + { + result = pstrdup("-"); + PG_RETURN_CSTRING(result); + } + + result = get_database_name(dboid); + + if (result) + { + /* pstrdup is not really necessary, but it avoids a compiler warning */ + result = pstrdup(quote_identifier(result)); + } + else + { + /* If OID doesn't match any database, return it numerically */ + result = (char *) palloc(NAMEDATALEN); + snprintf(result, NAMEDATALEN, "%u", dboid); + } + + PG_RETURN_CSTRING(result); +} + +/* + * regdatabaserecv - converts external binary format to regdatabase + */ +Datum +regdatabaserecv(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidrecv, so share code */ + return oidrecv(fcinfo); +} + +/* + * regdatabasesend - converts regdatabase to binary format + */ +Datum +regdatabasesend(PG_FUNCTION_ARGS) +{ + /* Exactly the same as oidsend, so share code */ + return oidsend(fcinfo); +} + +/* * text_regclass: convert text to regclass * * This could be replaced by CoerceViaIO, except that we need to treat diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 1e0f2de0336..ce6a626eba2 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -4619,6 +4619,7 @@ convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue, case REGDICTIONARYOID: case REGROLEOID: case REGNAMESPACEOID: + case REGDATABASEOID: *scaledvalue = convert_numeric_to_scalar(value, valuetypid, &failure); *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid, @@ -4751,6 +4752,7 @@ convert_numeric_to_scalar(Datum value, Oid typid, bool *failure) case REGDICTIONARYOID: case REGROLEOID: case REGNAMESPACEOID: + case REGDATABASEOID: /* we can treat OIDs as integers... */ return (double) DatumGetObjectId(value); } diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 347089b7626..0a5848a4ab2 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -6477,7 +6477,7 @@ timestamp2timestamptz_opt_overflow(Timestamp timestamp, int *overflow) if (TIMESTAMP_NOT_FINITE(timestamp)) return timestamp; - /* We don't expect this to fail, but check it pro forma */ + /* timestamp2tm should not fail on valid timestamps, but cope */ if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) == 0) { tz = DetermineTimeZoneOffset(tm, session_timezone); @@ -6485,23 +6485,22 @@ timestamp2timestamptz_opt_overflow(Timestamp timestamp, int *overflow) result = dt2local(timestamp, -tz); if (IS_VALID_TIMESTAMP(result)) - { return result; + } + + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + TIMESTAMP_NOBEGIN(result); } - else if (overflow) + else { - if (result < MIN_TIMESTAMP) - { - *overflow = -1; - TIMESTAMP_NOBEGIN(result); - } - else - { - *overflow = 1; - TIMESTAMP_NOEND(result); - } - return result; + *overflow = 1; + TIMESTAMP_NOEND(result); } + return result; } ereport(ERROR, @@ -6531,27 +6530,81 @@ timestamptz_timestamp(PG_FUNCTION_ARGS) PG_RETURN_TIMESTAMP(timestamptz2timestamp(timestamp)); } +/* + * Convert timestamptz to timestamp, throwing error for overflow. + */ static Timestamp timestamptz2timestamp(TimestampTz timestamp) { + return timestamptz2timestamp_opt_overflow(timestamp, NULL); +} + +/* + * Convert timestamp with time zone to timestamp. + * + * On successful conversion, *overflow is set to zero if it's not NULL. + * + * If the timestamptz is finite but out of the valid range for timestamp, then: + * if overflow is NULL, we throw an out-of-range error. + * if overflow is not NULL, we store +1 or -1 there to indicate the sign + * of the overflow, and return the appropriate timestamp infinity. + */ +Timestamp +timestamptz2timestamp_opt_overflow(TimestampTz timestamp, int *overflow) +{ Timestamp result; struct pg_tm tt, *tm = &tt; fsec_t fsec; int tz; + if (overflow) + *overflow = 0; + if (TIMESTAMP_NOT_FINITE(timestamp)) result = timestamp; else { if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0) + { + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + TIMESTAMP_NOBEGIN(result); + } + else + { + *overflow = 1; + TIMESTAMP_NOEND(result); + } + return result; + } ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); + } if (tm2timestamp(tm, fsec, NULL, &result) != 0) + { + if (overflow) + { + if (timestamp < 0) + { + *overflow = -1; + TIMESTAMP_NOBEGIN(result); + } + else + { + *overflow = 1; + TIMESTAMP_NOEND(result); + } + return result; + } ereport(ERROR, (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE), errmsg("timestamp out of range"))); + } } return result; } diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 3e4d5568bde..ffae8c23abf 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -35,7 +35,6 @@ #include "port/pg_bswap.h" #include "regex/regex.h" #include "utils/builtins.h" -#include "utils/bytea.h" #include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/memutils.h" @@ -43,10 +42,6 @@ #include "utils/sortsupport.h" #include "utils/varlena.h" - -/* GUC variable */ -int bytea_output = BYTEA_OUTPUT_HEX; - typedef struct varlena VarString; /* @@ -148,12 +143,6 @@ static int text_position_get_match_pos(TextPositionState *state); static void text_position_cleanup(TextPositionState *state); static void check_collation_set(Oid collid); static int text_cmp(text *arg1, text *arg2, Oid collid); -static bytea *bytea_catenate(bytea *t1, bytea *t2); -static bytea *bytea_substring(Datum str, - int S, - int L, - bool length_not_specified); -static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl); static void appendStringInfoText(StringInfo str, const text *t); static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate); static void split_text_accum_result(SplitTextOutputData *tstate, @@ -279,307 +268,6 @@ text_to_cstring_buffer(const text *src, char *dst, size_t dst_len) * USER I/O ROUTINES * *****************************************************************************/ - -#define VAL(CH) ((CH) - '0') -#define DIG(VAL) ((VAL) + '0') - -/* - * byteain - converts from printable representation of byte array - * - * Non-printable characters must be passed as '\nnn' (octal) and are - * converted to internal form. '\' must be passed as '\\'. - * ereport(ERROR, ...) if bad form. - * - * BUGS: - * The input is scanned twice. - * The error checking of input is minimal. - */ -Datum -byteain(PG_FUNCTION_ARGS) -{ - char *inputText = PG_GETARG_CSTRING(0); - Node *escontext = fcinfo->context; - char *tp; - char *rp; - int bc; - bytea *result; - - /* Recognize hex input */ - if (inputText[0] == '\\' && inputText[1] == 'x') - { - size_t len = strlen(inputText); - - bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */ - result = palloc(bc); - bc = hex_decode_safe(inputText + 2, len - 2, VARDATA(result), - escontext); - SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */ - - PG_RETURN_BYTEA_P(result); - } - - /* Else, it's the traditional escaped style */ - for (bc = 0, tp = inputText; *tp != '\0'; bc++) - { - if (tp[0] != '\\') - tp++; - else if ((tp[0] == '\\') && - (tp[1] >= '0' && tp[1] <= '3') && - (tp[2] >= '0' && tp[2] <= '7') && - (tp[3] >= '0' && tp[3] <= '7')) - tp += 4; - else if ((tp[0] == '\\') && - (tp[1] == '\\')) - tp += 2; - else - { - /* - * one backslash, not followed by another or ### valid octal - */ - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type %s", "bytea"))); - } - } - - bc += VARHDRSZ; - - result = (bytea *) palloc(bc); - SET_VARSIZE(result, bc); - - tp = inputText; - rp = VARDATA(result); - while (*tp != '\0') - { - if (tp[0] != '\\') - *rp++ = *tp++; - else if ((tp[0] == '\\') && - (tp[1] >= '0' && tp[1] <= '3') && - (tp[2] >= '0' && tp[2] <= '7') && - (tp[3] >= '0' && tp[3] <= '7')) - { - bc = VAL(tp[1]); - bc <<= 3; - bc += VAL(tp[2]); - bc <<= 3; - *rp++ = bc + VAL(tp[3]); - - tp += 4; - } - else if ((tp[0] == '\\') && - (tp[1] == '\\')) - { - *rp++ = '\\'; - tp += 2; - } - else - { - /* - * We should never get here. The first pass should not allow it. - */ - ereturn(escontext, (Datum) 0, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type %s", "bytea"))); - } - } - - PG_RETURN_BYTEA_P(result); -} - -/* - * byteaout - converts to printable representation of byte array - * - * In the traditional escaped format, non-printable characters are - * printed as '\nnn' (octal) and '\' as '\\'. - */ -Datum -byteaout(PG_FUNCTION_ARGS) -{ - bytea *vlena = PG_GETARG_BYTEA_PP(0); - char *result; - char *rp; - - if (bytea_output == BYTEA_OUTPUT_HEX) - { - /* Print hex format */ - rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1); - *rp++ = '\\'; - *rp++ = 'x'; - rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp); - } - else if (bytea_output == BYTEA_OUTPUT_ESCAPE) - { - /* Print traditional escaped format */ - char *vp; - uint64 len; - int i; - - len = 1; /* empty string has 1 char */ - vp = VARDATA_ANY(vlena); - for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) - { - if (*vp == '\\') - len += 2; - else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) - len += 4; - else - len++; - } - - /* - * In principle len can't overflow uint32 if the input fit in 1GB, but - * for safety let's check rather than relying on palloc's internal - * check. - */ - if (len > MaxAllocSize) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg_internal("result of bytea output conversion is too large"))); - rp = result = (char *) palloc(len); - - vp = VARDATA_ANY(vlena); - for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++) - { - if (*vp == '\\') - { - *rp++ = '\\'; - *rp++ = '\\'; - } - else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e) - { - int val; /* holds unprintable chars */ - - val = *vp; - rp[0] = '\\'; - rp[3] = DIG(val & 07); - val >>= 3; - rp[2] = DIG(val & 07); - val >>= 3; - rp[1] = DIG(val & 03); - rp += 4; - } - else - *rp++ = *vp; - } - } - else - { - elog(ERROR, "unrecognized \"bytea_output\" setting: %d", - bytea_output); - rp = result = NULL; /* keep compiler quiet */ - } - *rp = '\0'; - PG_RETURN_CSTRING(result); -} - -/* - * bytearecv - converts external binary format to bytea - */ -Datum -bytearecv(PG_FUNCTION_ARGS) -{ - StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); - bytea *result; - int nbytes; - - nbytes = buf->len - buf->cursor; - result = (bytea *) palloc(nbytes + VARHDRSZ); - SET_VARSIZE(result, nbytes + VARHDRSZ); - pq_copymsgbytes(buf, VARDATA(result), nbytes); - PG_RETURN_BYTEA_P(result); -} - -/* - * byteasend - converts bytea to binary format - * - * This is a special case: just copy the input... - */ -Datum -byteasend(PG_FUNCTION_ARGS) -{ - bytea *vlena = PG_GETARG_BYTEA_P_COPY(0); - - PG_RETURN_BYTEA_P(vlena); -} - -Datum -bytea_string_agg_transfn(PG_FUNCTION_ARGS) -{ - StringInfo state; - - state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); - - /* Append the value unless null, preceding it with the delimiter. */ - if (!PG_ARGISNULL(1)) - { - bytea *value = PG_GETARG_BYTEA_PP(1); - bool isfirst = false; - - /* - * You might think we can just throw away the first delimiter, however - * we must keep it as we may be a parallel worker doing partial - * aggregation building a state to send to the main process. We need - * to keep the delimiter of every aggregation so that the combine - * function can properly join up the strings of two separately - * partially aggregated results. The first delimiter is only stripped - * off in the final function. To know how much to strip off the front - * of the string, we store the length of the first delimiter in the - * StringInfo's cursor field, which we don't otherwise need here. - */ - if (state == NULL) - { - state = makeStringAggState(fcinfo); - isfirst = true; - } - - if (!PG_ARGISNULL(2)) - { - bytea *delim = PG_GETARG_BYTEA_PP(2); - - appendBinaryStringInfo(state, VARDATA_ANY(delim), - VARSIZE_ANY_EXHDR(delim)); - if (isfirst) - state->cursor = VARSIZE_ANY_EXHDR(delim); - } - - appendBinaryStringInfo(state, VARDATA_ANY(value), - VARSIZE_ANY_EXHDR(value)); - } - - /* - * The transition type for string_agg() is declared to be "internal", - * which is a pass-by-value type the same size as a pointer. - */ - if (state) - PG_RETURN_POINTER(state); - PG_RETURN_NULL(); -} - -Datum -bytea_string_agg_finalfn(PG_FUNCTION_ARGS) -{ - StringInfo state; - - /* cannot be called directly because of internal-type argument */ - Assert(AggCheckCallContext(fcinfo, NULL)); - - state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0); - - if (state != NULL) - { - /* As per comment in transfn, strip data before the cursor position */ - bytea *result; - int strippedlen = state->len - state->cursor; - - result = (bytea *) palloc(strippedlen + VARHDRSZ); - SET_VARSIZE(result, strippedlen + VARHDRSZ); - memcpy(VARDATA(result), &state->data[state->cursor], strippedlen); - PG_RETURN_BYTEA_P(result); - } - else - PG_RETURN_NULL(); -} - /* * textin - converts cstring to internal representation */ @@ -2959,467 +2647,6 @@ bttext_pattern_sortsupport(PG_FUNCTION_ARGS) } -/*------------------------------------------------------------- - * byteaoctetlen - * - * get the number of bytes contained in an instance of type 'bytea' - *------------------------------------------------------------- - */ -Datum -byteaoctetlen(PG_FUNCTION_ARGS) -{ - Datum str = PG_GETARG_DATUM(0); - - /* We need not detoast the input at all */ - PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ); -} - -/* - * byteacat - - * takes two bytea* and returns a bytea* that is the concatenation of - * the two. - * - * Cloned from textcat and modified as required. - */ -Datum -byteacat(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - - PG_RETURN_BYTEA_P(bytea_catenate(t1, t2)); -} - -/* - * bytea_catenate - * Guts of byteacat(), broken out so it can be used by other functions - * - * Arguments can be in short-header form, but not compressed or out-of-line - */ -static bytea * -bytea_catenate(bytea *t1, bytea *t2) -{ - bytea *result; - int len1, - len2, - len; - char *ptr; - - len1 = VARSIZE_ANY_EXHDR(t1); - len2 = VARSIZE_ANY_EXHDR(t2); - - /* paranoia ... probably should throw error instead? */ - if (len1 < 0) - len1 = 0; - if (len2 < 0) - len2 = 0; - - len = len1 + len2 + VARHDRSZ; - result = (bytea *) palloc(len); - - /* Set size of result string... */ - SET_VARSIZE(result, len); - - /* Fill data field of result string... */ - ptr = VARDATA(result); - if (len1 > 0) - memcpy(ptr, VARDATA_ANY(t1), len1); - if (len2 > 0) - memcpy(ptr + len1, VARDATA_ANY(t2), len2); - - return result; -} - -#define PG_STR_GET_BYTEA(str_) \ - DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_))) - -/* - * bytea_substr() - * Return a substring starting at the specified position. - * Cloned from text_substr and modified as required. - * - * Input: - * - string - * - starting position (is one-based) - * - string length (optional) - * - * If the starting position is zero or less, then return from the start of the string - * adjusting the length to be consistent with the "negative start" per SQL. - * If the length is less than zero, an ERROR is thrown. If no third argument - * (length) is provided, the length to the end of the string is assumed. - */ -Datum -bytea_substr(PG_FUNCTION_ARGS) -{ - PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), - PG_GETARG_INT32(1), - PG_GETARG_INT32(2), - false)); -} - -/* - * bytea_substr_no_len - - * Wrapper to avoid opr_sanity failure due to - * one function accepting a different number of args. - */ -Datum -bytea_substr_no_len(PG_FUNCTION_ARGS) -{ - PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0), - PG_GETARG_INT32(1), - -1, - true)); -} - -static bytea * -bytea_substring(Datum str, - int S, - int L, - bool length_not_specified) -{ - int32 S1; /* adjusted start position */ - int32 L1; /* adjusted substring length */ - int32 E; /* end position */ - - /* - * The logic here should generally match text_substring(). - */ - S1 = Max(S, 1); - - if (length_not_specified) - { - /* - * Not passed a length - DatumGetByteaPSlice() grabs everything to the - * end of the string if we pass it a negative value for length. - */ - L1 = -1; - } - else if (L < 0) - { - /* SQL99 says to throw an error for E < S, i.e., negative length */ - ereport(ERROR, - (errcode(ERRCODE_SUBSTRING_ERROR), - errmsg("negative substring length not allowed"))); - L1 = -1; /* silence stupider compilers */ - } - else if (pg_add_s32_overflow(S, L, &E)) - { - /* - * L could be large enough for S + L to overflow, in which case the - * substring must run to end of string. - */ - L1 = -1; - } - else - { - /* - * A zero or negative value for the end position can happen if the - * start was negative or one. SQL99 says to return a zero-length - * string. - */ - if (E < 1) - return PG_STR_GET_BYTEA(""); - - L1 = E - S1; - } - - /* - * If the start position is past the end of the string, SQL99 says to - * return a zero-length string -- DatumGetByteaPSlice() will do that for - * us. We need only convert S1 to zero-based starting position. - */ - return DatumGetByteaPSlice(str, S1 - 1, L1); -} - -/* - * byteaoverlay - * Replace specified substring of first string with second - * - * The SQL standard defines OVERLAY() in terms of substring and concatenation. - * This code is a direct implementation of what the standard says. - */ -Datum -byteaoverlay(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - int sp = PG_GETARG_INT32(2); /* substring start position */ - int sl = PG_GETARG_INT32(3); /* substring length */ - - PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); -} - -Datum -byteaoverlay_no_len(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - int sp = PG_GETARG_INT32(2); /* substring start position */ - int sl; - - sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */ - PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl)); -} - -static bytea * -bytea_overlay(bytea *t1, bytea *t2, int sp, int sl) -{ - bytea *result; - bytea *s1; - bytea *s2; - int sp_pl_sl; - - /* - * Check for possible integer-overflow cases. For negative sp, throw a - * "substring length" error because that's what should be expected - * according to the spec's definition of OVERLAY(). - */ - if (sp <= 0) - ereport(ERROR, - (errcode(ERRCODE_SUBSTRING_ERROR), - errmsg("negative substring length not allowed"))); - if (pg_add_s32_overflow(sp, sl, &sp_pl_sl)) - ereport(ERROR, - (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("integer out of range"))); - - s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false); - s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true); - result = bytea_catenate(s1, t2); - result = bytea_catenate(result, s2); - - return result; -} - -/* - * bit_count - */ -Datum -bytea_bit_count(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - - PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1))); -} - -/* - * byteapos - - * Return the position of the specified substring. - * Implements the SQL POSITION() function. - * Cloned from textpos and modified as required. - */ -Datum -byteapos(PG_FUNCTION_ARGS) -{ - bytea *t1 = PG_GETARG_BYTEA_PP(0); - bytea *t2 = PG_GETARG_BYTEA_PP(1); - int pos; - int px, - p; - int len1, - len2; - char *p1, - *p2; - - len1 = VARSIZE_ANY_EXHDR(t1); - len2 = VARSIZE_ANY_EXHDR(t2); - - if (len2 <= 0) - PG_RETURN_INT32(1); /* result for empty pattern */ - - p1 = VARDATA_ANY(t1); - p2 = VARDATA_ANY(t2); - - pos = 0; - px = (len1 - len2); - for (p = 0; p <= px; p++) - { - if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0)) - { - pos = p + 1; - break; - }; - p1++; - }; - - PG_RETURN_INT32(pos); -} - -/*------------------------------------------------------------- - * byteaGetByte - * - * this routine treats "bytea" as an array of bytes. - * It returns the Nth byte (a number between 0 and 255). - *------------------------------------------------------------- - */ -Datum -byteaGetByte(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int32 n = PG_GETARG_INT32(1); - int len; - int byte; - - len = VARSIZE_ANY_EXHDR(v); - - if (n < 0 || n >= len) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %d out of valid range, 0..%d", - n, len - 1))); - - byte = ((unsigned char *) VARDATA_ANY(v))[n]; - - PG_RETURN_INT32(byte); -} - -/*------------------------------------------------------------- - * byteaGetBit - * - * This routine treats a "bytea" type like an array of bits. - * It returns the value of the Nth bit (0 or 1). - * - *------------------------------------------------------------- - */ -Datum -byteaGetBit(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int64 n = PG_GETARG_INT64(1); - int byteNo, - bitNo; - int len; - int byte; - - len = VARSIZE_ANY_EXHDR(v); - - if (n < 0 || n >= (int64) len * 8) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, - n, (int64) len * 8 - 1))); - - /* n/8 is now known < len, so safe to cast to int */ - byteNo = (int) (n / 8); - bitNo = (int) (n % 8); - - byte = ((unsigned char *) VARDATA_ANY(v))[byteNo]; - - if (byte & (1 << bitNo)) - PG_RETURN_INT32(1); - else - PG_RETURN_INT32(0); -} - -/*------------------------------------------------------------- - * byteaSetByte - * - * Given an instance of type 'bytea' creates a new one with - * the Nth byte set to the given value. - * - *------------------------------------------------------------- - */ -Datum -byteaSetByte(PG_FUNCTION_ARGS) -{ - bytea *res = PG_GETARG_BYTEA_P_COPY(0); - int32 n = PG_GETARG_INT32(1); - int32 newByte = PG_GETARG_INT32(2); - int len; - - len = VARSIZE(res) - VARHDRSZ; - - if (n < 0 || n >= len) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %d out of valid range, 0..%d", - n, len - 1))); - - /* - * Now set the byte. - */ - ((unsigned char *) VARDATA(res))[n] = newByte; - - PG_RETURN_BYTEA_P(res); -} - -/*------------------------------------------------------------- - * byteaSetBit - * - * Given an instance of type 'bytea' creates a new one with - * the Nth bit set to the given value. - * - *------------------------------------------------------------- - */ -Datum -byteaSetBit(PG_FUNCTION_ARGS) -{ - bytea *res = PG_GETARG_BYTEA_P_COPY(0); - int64 n = PG_GETARG_INT64(1); - int32 newBit = PG_GETARG_INT32(2); - int len; - int oldByte, - newByte; - int byteNo, - bitNo; - - len = VARSIZE(res) - VARHDRSZ; - - if (n < 0 || n >= (int64) len * 8) - ereport(ERROR, - (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), - errmsg("index %" PRId64 " out of valid range, 0..%" PRId64, - n, (int64) len * 8 - 1))); - - /* n/8 is now known < len, so safe to cast to int */ - byteNo = (int) (n / 8); - bitNo = (int) (n % 8); - - /* - * sanity check! - */ - if (newBit != 0 && newBit != 1) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("new bit must be 0 or 1"))); - - /* - * Update the byte. - */ - oldByte = ((unsigned char *) VARDATA(res))[byteNo]; - - if (newBit == 0) - newByte = oldByte & (~(1 << bitNo)); - else - newByte = oldByte | (1 << bitNo); - - ((unsigned char *) VARDATA(res))[byteNo] = newByte; - - PG_RETURN_BYTEA_P(res); -} - -/* - * Return reversed bytea - */ -Datum -bytea_reverse(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - const char *p = VARDATA_ANY(v); - int len = VARSIZE_ANY_EXHDR(v); - const char *endp = p + len; - bytea *result = palloc(len + VARHDRSZ); - char *dst = (char *) VARDATA(result) + len; - - SET_VARSIZE(result, len + VARHDRSZ); - - while (p < endp) - *(--dst) = *p++; - - PG_RETURN_BYTEA_P(result); -} - - /* text_name() * Converts a text type to a Name type. */ @@ -3849,331 +3076,6 @@ SplitGUCList(char *rawstring, char separator, return true; } - -/***************************************************************************** - * Comparison Functions used for bytea - * - * Note: btree indexes need these routines not to leak memory; therefore, - * be careful to free working copies of toasted datums. Most places don't - * need to be so careful. - *****************************************************************************/ - -Datum -byteaeq(PG_FUNCTION_ARGS) -{ - Datum arg1 = PG_GETARG_DATUM(0); - Datum arg2 = PG_GETARG_DATUM(1); - bool result; - Size len1, - len2; - - /* - * We can use a fast path for unequal lengths, which might save us from - * having to detoast one or both values. - */ - len1 = toast_raw_datum_size(arg1); - len2 = toast_raw_datum_size(arg2); - if (len1 != len2) - result = false; - else - { - bytea *barg1 = DatumGetByteaPP(arg1); - bytea *barg2 = DatumGetByteaPP(arg2); - - result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), - len1 - VARHDRSZ) == 0); - - PG_FREE_IF_COPY(barg1, 0); - PG_FREE_IF_COPY(barg2, 1); - } - - PG_RETURN_BOOL(result); -} - -Datum -byteane(PG_FUNCTION_ARGS) -{ - Datum arg1 = PG_GETARG_DATUM(0); - Datum arg2 = PG_GETARG_DATUM(1); - bool result; - Size len1, - len2; - - /* - * We can use a fast path for unequal lengths, which might save us from - * having to detoast one or both values. - */ - len1 = toast_raw_datum_size(arg1); - len2 = toast_raw_datum_size(arg2); - if (len1 != len2) - result = true; - else - { - bytea *barg1 = DatumGetByteaPP(arg1); - bytea *barg2 = DatumGetByteaPP(arg2); - - result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2), - len1 - VARHDRSZ) != 0); - - PG_FREE_IF_COPY(barg1, 0); - PG_FREE_IF_COPY(barg2, 1); - } - - PG_RETURN_BOOL(result); -} - -Datum -bytealt(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2))); -} - -Datum -byteale(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2))); -} - -Datum -byteagt(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2))); -} - -Datum -byteage(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2))); -} - -Datum -byteacmp(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - if ((cmp == 0) && (len1 != len2)) - cmp = (len1 < len2) ? -1 : 1; - - PG_FREE_IF_COPY(arg1, 0); - PG_FREE_IF_COPY(arg2, 1); - - PG_RETURN_INT32(cmp); -} - -Datum -bytea_larger(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - bytea *result; - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - result = ((cmp > 0) || ((cmp == 0) && (len1 > len2)) ? arg1 : arg2); - - PG_RETURN_BYTEA_P(result); -} - -Datum -bytea_smaller(PG_FUNCTION_ARGS) -{ - bytea *arg1 = PG_GETARG_BYTEA_PP(0); - bytea *arg2 = PG_GETARG_BYTEA_PP(1); - bytea *result; - int len1, - len2; - int cmp; - - len1 = VARSIZE_ANY_EXHDR(arg1); - len2 = VARSIZE_ANY_EXHDR(arg2); - - cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2)); - result = ((cmp < 0) || ((cmp == 0) && (len1 < len2)) ? arg1 : arg2); - - PG_RETURN_BYTEA_P(result); -} - -Datum -bytea_sortsupport(PG_FUNCTION_ARGS) -{ - SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); - MemoryContext oldcontext; - - oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); - - /* Use generic string SortSupport, forcing "C" collation */ - varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID); - - MemoryContextSwitchTo(oldcontext); - - PG_RETURN_VOID(); -} - -/* Cast bytea -> int2 */ -Datum -bytea_int2(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int len = VARSIZE_ANY_EXHDR(v); - uint16 result; - - /* Check that the byte array is not too long */ - if (len > sizeof(result)) - ereport(ERROR, - errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("smallint out of range")); - - /* Convert it to an integer; most significant bytes come first */ - result = 0; - for (int i = 0; i < len; i++) - { - result <<= BITS_PER_BYTE; - result |= ((unsigned char *) VARDATA_ANY(v))[i]; - } - - PG_RETURN_INT16(result); -} - -/* Cast bytea -> int4 */ -Datum -bytea_int4(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int len = VARSIZE_ANY_EXHDR(v); - uint32 result; - - /* Check that the byte array is not too long */ - if (len > sizeof(result)) - ereport(ERROR, - errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("integer out of range")); - - /* Convert it to an integer; most significant bytes come first */ - result = 0; - for (int i = 0; i < len; i++) - { - result <<= BITS_PER_BYTE; - result |= ((unsigned char *) VARDATA_ANY(v))[i]; - } - - PG_RETURN_INT32(result); -} - -/* Cast bytea -> int8 */ -Datum -bytea_int8(PG_FUNCTION_ARGS) -{ - bytea *v = PG_GETARG_BYTEA_PP(0); - int len = VARSIZE_ANY_EXHDR(v); - uint64 result; - - /* Check that the byte array is not too long */ - if (len > sizeof(result)) - ereport(ERROR, - errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), - errmsg("bigint out of range")); - - /* Convert it to an integer; most significant bytes come first */ - result = 0; - for (int i = 0; i < len; i++) - { - result <<= BITS_PER_BYTE; - result |= ((unsigned char *) VARDATA_ANY(v))[i]; - } - - PG_RETURN_INT64(result); -} - -/* Cast int2 -> bytea; can just use int2send() */ -Datum -int2_bytea(PG_FUNCTION_ARGS) -{ - return int2send(fcinfo); -} - -/* Cast int4 -> bytea; can just use int4send() */ -Datum -int4_bytea(PG_FUNCTION_ARGS) -{ - return int4send(fcinfo); -} - -/* Cast int8 -> bytea; can just use int8send() */ -Datum -int8_bytea(PG_FUNCTION_ARGS) -{ - return int8send(fcinfo); -} - /* * appendStringInfoText * diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index a4150bff2ea..2bd39b6ac4b 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -529,14 +529,36 @@ xmltext(PG_FUNCTION_ARGS) #ifdef USE_LIBXML text *arg = PG_GETARG_TEXT_PP(0); text *result; - xmlChar *xmlbuf = NULL; + volatile xmlChar *xmlbuf = NULL; + PgXmlErrorContext *xmlerrcxt; + + /* Otherwise, we gotta spin up some error handling. */ + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); - xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg)); + PG_TRY(); + { + xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg)); - Assert(xmlbuf); + if (xmlbuf == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlChar"); + + result = cstring_to_text_with_len((const char *) xmlbuf, + xmlStrlen((const xmlChar *) xmlbuf)); + } + PG_CATCH(); + { + if (xmlbuf) + xmlFree((xmlChar *) xmlbuf); + + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); + } + PG_END_TRY(); + + xmlFree((xmlChar *) xmlbuf); + pg_xml_done(xmlerrcxt, false); - result = cstring_to_text_with_len((const char *) xmlbuf, xmlStrlen(xmlbuf)); - xmlFree(xmlbuf); PG_RETURN_XML_P(result); #else NO_XML_SUPPORT(); @@ -770,7 +792,10 @@ xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent) if (oldroot != NULL) xmlFreeNode(oldroot); - xmlAddChildList(root, content_nodes); + if (xmlAddChildList(root, content_nodes) == NULL || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not append xml node list"); /* * We use this node to insert newlines in the dump. Note: in at @@ -931,7 +956,10 @@ xmlelement(XmlExpr *xexpr, xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate xmlTextWriter"); - xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name); + if (xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not start xml element"); forboth(arg, named_arg_strings, narg, xexpr->arg_names) { @@ -939,19 +967,30 @@ xmlelement(XmlExpr *xexpr, char *argname = strVal(lfirst(narg)); if (str) - xmlTextWriterWriteAttribute(writer, - (xmlChar *) argname, - (xmlChar *) str); + { + if (xmlTextWriterWriteAttribute(writer, + (xmlChar *) argname, + (xmlChar *) str) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not write xml attribute"); + } } foreach(arg, arg_strings) { char *str = (char *) lfirst(arg); - xmlTextWriterWriteRaw(writer, (xmlChar *) str); + if (xmlTextWriterWriteRaw(writer, (xmlChar *) str) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not write raw xml text"); } - xmlTextWriterEndElement(writer); + if (xmlTextWriterEndElement(writer) < 0 || + xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, + "could not end xml element"); /* we MUST do this now to flush data out to the buffer ... */ xmlFreeTextWriter(writer); @@ -4220,20 +4259,27 @@ xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt) } else { - xmlChar *str; + volatile xmlChar *str = NULL; - str = xmlXPathCastNodeToString(cur); PG_TRY(); { + char *escaped; + + str = xmlXPathCastNodeToString(cur); + if (str == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate xmlChar"); + /* Here we rely on XML having the same representation as TEXT */ - char *escaped = escape_xml((char *) str); + escaped = escape_xml((char *) str); result = (xmltype *) cstring_to_text(escaped); pfree(escaped); } PG_FINALLY(); { - xmlFree(str); + if (str) + xmlFree((xmlChar *) str); } PG_END_TRY(); } diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 657648996c2..d1b25214376 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -317,6 +317,7 @@ GetCCHashEqFuncs(Oid keytype, CCHashFN *hashfunc, RegProcedure *eqfunc, CCFastEq case REGDICTIONARYOID: case REGROLEOID: case REGNAMESPACEOID: + case REGDATABASEOID: *hashfunc = int4hashfast; *fasteqfunc = int4eqfast; *eqfunc = F_OIDEQ; diff --git a/src/backend/utils/misc/injection_point.c b/src/backend/utils/misc/injection_point.c index f58ebc8ee52..83b887b6978 100644 --- a/src/backend/utils/misc/injection_point.c +++ b/src/backend/utils/misc/injection_point.c @@ -584,3 +584,49 @@ IsInjectionPointAttached(const char *name) return false; /* silence compiler */ #endif } + +/* + * Retrieve a list of all the injection points currently attached. + * + * This list is palloc'd in the current memory context. + */ +List * +InjectionPointList(void) +{ +#ifdef USE_INJECTION_POINTS + List *inj_points = NIL; + uint32 max_inuse; + + LWLockAcquire(InjectionPointLock, LW_SHARED); + + max_inuse = pg_atomic_read_u32(&ActiveInjectionPoints->max_inuse); + + for (uint32 idx = 0; idx < max_inuse; idx++) + { + InjectionPointEntry *entry; + InjectionPointData *inj_point; + uint64 generation; + + entry = &ActiveInjectionPoints->entries[idx]; + generation = pg_atomic_read_u64(&entry->generation); + + /* skip free slots */ + if (generation % 2 == 0) + continue; + + inj_point = (InjectionPointData *) palloc0(sizeof(InjectionPointData)); + inj_point->name = pstrdup(entry->name); + inj_point->library = pstrdup(entry->library); + inj_point->function = pstrdup(entry->function); + inj_points = lappend(inj_points, inj_point); + } + + LWLockRelease(InjectionPointLock); + + return inj_points; + +#else + elog(ERROR, "Injection points are not supported by this build"); + return NIL; /* keep compiler quiet */ +#endif +} diff --git a/src/backend/utils/mmgr/dsa.c b/src/backend/utils/mmgr/dsa.c index 17d4f7a7a06..be43e9351c3 100644 --- a/src/backend/utils/mmgr/dsa.c +++ b/src/backend/utils/mmgr/dsa.c @@ -532,6 +532,21 @@ dsa_attach(dsa_handle handle) } /* + * Returns whether the area with the given handle was already attached by the + * current process. The area must have been created with dsa_create (not + * dsa_create_in_place). + */ +bool +dsa_is_attached(dsa_handle handle) +{ + /* + * An area handle is really a DSM segment handle for the first segment, so + * we can just search for that. + */ + return dsm_find_mapping(handle) != NULL; +} + +/* * Attach to an area that was created with dsa_create_in_place. The caller * must somehow know the location in memory that was used when the area was * created, though it may be mapped at a different virtual address in this |