diff options
Diffstat (limited to 'src/backend/utils/adt/numutils.c')
-rw-r--r-- | src/backend/utils/adt/numutils.c | 306 |
1 files changed, 279 insertions, 27 deletions
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c index 471fbb7ee63..d07a5602076 100644 --- a/src/backend/utils/adt/numutils.c +++ b/src/backend/utils/adt/numutils.c @@ -97,9 +97,18 @@ static const int8 hexlookup[128] = { }; /* - * Convert input string to a signed 16 bit integer. + * Convert input string to a signed 16 bit integer. Input strings may be + * expressed in base-10, hexadecimal, octal, or binary format, all of which + * can be prefixed by an optional sign character, either '+' (the default) or + * '-' for negative numbers. Hex strings are recognized by the digits being + * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O + * prefix. The binary representation is recognized by the 0b or 0B prefix. * - * Allows any number of leading or trailing whitespace characters. + * Allows any number of leading or trailing whitespace characters. Digits may + * optionally be separated by a single underscore character. These can only + * come between digits and not before or after the digits. Underscores have + * no effect on the return value and are supported only to assist in improving + * the human readability of the input strings. * * pg_strtoint16() will throw ereport() upon bad input format or overflow; * while pg_strtoint16_safe() instead returns such complaints in *escontext, @@ -122,9 +131,84 @@ pg_strtoint16_safe(const char *s, Node *escontext) const char *firstdigit; uint16 tmp = 0; bool neg = false; + unsigned char digit; + + /* + * The majority of cases are likely to be base-10 digits without any + * underscore separator characters. We'll first try to parse the string + * with the assumption that's the case and only fallback on a slower + * implementation which handles hex, octal and binary strings and + * underscores if the fastpath version cannot parse the string. + */ + + /* leave it up to the slow path to look for leading spaces */ + + if (*ptr == '-') + { + ptr++; + neg = true; + } + + /* a leading '+' is uncommon so leave that for the slow path */ + + /* process the first digit */ + digit = (*ptr - '0'); + + /* + * Exploit unsigned arithmetic to save having to check both the upper and + * lower bounds of the digit. + */ + if (likely(digit < 10)) + { + ptr++; + tmp = digit; + } + else + { + /* we need at least one digit */ + goto slow; + } + + /* process remaining digits */ + for (;;) + { + digit = (*ptr - '0'); + + if (digit >= 10) + break; + + ptr++; + + if (unlikely(tmp > -(PG_INT16_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + digit; + } + + /* when the string does not end in a digit, let the slow path handle it */ + if (unlikely(*ptr != '\0')) + goto slow; + + if (neg) + { + /* check the negative equivalent will fit without overflowing */ + if (unlikely(tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1)) + goto out_of_range; + return -((int16) tmp); + } + + if (unlikely(tmp > PG_INT16_MAX)) + goto out_of_range; + + return (int16) tmp; + +slow: + tmp = 0; + ptr = s; + /* no need to reset neg */ /* skip leading spaces */ - while (likely(*ptr) && isspace((unsigned char) *ptr)) + while (isspace((unsigned char) *ptr)) ptr++; /* handle sign */ @@ -141,7 +225,7 @@ pg_strtoint16_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (isxdigit((unsigned char) *ptr)) { @@ -165,7 +249,7 @@ pg_strtoint16_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (*ptr >= '0' && *ptr <= '7') { @@ -189,7 +273,7 @@ pg_strtoint16_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (*ptr >= '0' && *ptr <= '1') { @@ -213,9 +297,9 @@ pg_strtoint16_safe(const char *s, Node *escontext) { firstdigit = ptr; - while (*ptr) + for (;;) { - if (isdigit((unsigned char) *ptr)) + if (*ptr >= '0' && *ptr <= '9') { if (unlikely(tmp > -(PG_INT16_MIN / 10))) goto out_of_range; @@ -242,7 +326,7 @@ pg_strtoint16_safe(const char *s, Node *escontext) goto invalid_syntax; /* allow trailing whitespace, but not other trailing chars */ - while (*ptr != '\0' && isspace((unsigned char) *ptr)) + while (isspace((unsigned char) *ptr)) ptr++; if (unlikely(*ptr != '\0')) @@ -275,9 +359,18 @@ invalid_syntax: } /* - * Convert input string to a signed 32 bit integer. + * Convert input string to a signed 32 bit integer. Input strings may be + * expressed in base-10, hexadecimal, octal, or binary format, all of which + * can be prefixed by an optional sign character, either '+' (the default) or + * '-' for negative numbers. Hex strings are recognized by the digits being + * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O + * prefix. The binary representation is recognized by the 0b or 0B prefix. * - * Allows any number of leading or trailing whitespace characters. + * Allows any number of leading or trailing whitespace characters. Digits may + * optionally be separated by a single underscore character. These can only + * come between digits and not before or after the digits. Underscores have + * no effect on the return value and are supported only to assist in improving + * the human readability of the input strings. * * pg_strtoint32() will throw ereport() upon bad input format or overflow; * while pg_strtoint32_safe() instead returns such complaints in *escontext, @@ -300,9 +393,84 @@ pg_strtoint32_safe(const char *s, Node *escontext) const char *firstdigit; uint32 tmp = 0; bool neg = false; + unsigned char digit; + + /* + * The majority of cases are likely to be base-10 digits without any + * underscore separator characters. We'll first try to parse the string + * with the assumption that's the case and only fallback on a slower + * implementation which handles hex, octal and binary strings and + * underscores if the fastpath version cannot parse the string. + */ + + /* leave it up to the slow path to look for leading spaces */ + + if (*ptr == '-') + { + ptr++; + neg = true; + } + + /* a leading '+' is uncommon so leave that for the slow path */ + + /* process the first digit */ + digit = (*ptr - '0'); + + /* + * Exploit unsigned arithmetic to save having to check both the upper and + * lower bounds of the digit. + */ + if (likely(digit < 10)) + { + ptr++; + tmp = digit; + } + else + { + /* we need at least one digit */ + goto slow; + } + + /* process remaining digits */ + for (;;) + { + digit = (*ptr - '0'); + + if (digit >= 10) + break; + + ptr++; + + if (unlikely(tmp > -(PG_INT32_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + digit; + } + + /* when the string does not end in a digit, let the slow path handle it */ + if (unlikely(*ptr != '\0')) + goto slow; + + if (neg) + { + /* check the negative equivalent will fit without overflowing */ + if (unlikely(tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1)) + goto out_of_range; + return -((int32) tmp); + } + + if (unlikely(tmp > PG_INT32_MAX)) + goto out_of_range; + + return (int32) tmp; + +slow: + tmp = 0; + ptr = s; + /* no need to reset neg */ /* skip leading spaces */ - while (likely(*ptr) && isspace((unsigned char) *ptr)) + while (isspace((unsigned char) *ptr)) ptr++; /* handle sign */ @@ -319,7 +487,7 @@ pg_strtoint32_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (isxdigit((unsigned char) *ptr)) { @@ -343,7 +511,7 @@ pg_strtoint32_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (*ptr >= '0' && *ptr <= '7') { @@ -367,7 +535,7 @@ pg_strtoint32_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (*ptr >= '0' && *ptr <= '1') { @@ -391,9 +559,9 @@ pg_strtoint32_safe(const char *s, Node *escontext) { firstdigit = ptr; - while (*ptr) + for (;;) { - if (isdigit((unsigned char) *ptr)) + if (*ptr >= '0' && *ptr <= '9') { if (unlikely(tmp > -(PG_INT32_MIN / 10))) goto out_of_range; @@ -420,7 +588,7 @@ pg_strtoint32_safe(const char *s, Node *escontext) goto invalid_syntax; /* allow trailing whitespace, but not other trailing chars */ - while (*ptr != '\0' && isspace((unsigned char) *ptr)) + while (isspace((unsigned char) *ptr)) ptr++; if (unlikely(*ptr != '\0')) @@ -453,9 +621,18 @@ invalid_syntax: } /* - * Convert input string to a signed 64 bit integer. + * Convert input string to a signed 64 bit integer. Input strings may be + * expressed in base-10, hexadecimal, octal, or binary format, all of which + * can be prefixed by an optional sign character, either '+' (the default) or + * '-' for negative numbers. Hex strings are recognized by the digits being + * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O + * prefix. The binary representation is recognized by the 0b or 0B prefix. * - * Allows any number of leading or trailing whitespace characters. + * Allows any number of leading or trailing whitespace characters. Digits may + * optionally be separated by a single underscore character. These can only + * come between digits and not before or after the digits. Underscores have + * no effect on the return value and are supported only to assist in improving + * the human readability of the input strings. * * pg_strtoint64() will throw ereport() upon bad input format or overflow; * while pg_strtoint64_safe() instead returns such complaints in *escontext, @@ -478,9 +655,84 @@ pg_strtoint64_safe(const char *s, Node *escontext) const char *firstdigit; uint64 tmp = 0; bool neg = false; + unsigned char digit; + + /* + * The majority of cases are likely to be base-10 digits without any + * underscore separator characters. We'll first try to parse the string + * with the assumption that's the case and only fallback on a slower + * implementation which handles hex, octal and binary strings and + * underscores if the fastpath version cannot parse the string. + */ + + /* leave it up to the slow path to look for leading spaces */ + + if (*ptr == '-') + { + ptr++; + neg = true; + } + + /* a leading '+' is uncommon so leave that for the slow path */ + + /* process the first digit */ + digit = (*ptr - '0'); + + /* + * Exploit unsigned arithmetic to save having to check both the upper and + * lower bounds of the digit. + */ + if (likely(digit < 10)) + { + ptr++; + tmp = digit; + } + else + { + /* we need at least one digit */ + goto slow; + } + + /* process remaining digits */ + for (;;) + { + digit = (*ptr - '0'); + + if (digit >= 10) + break; + + ptr++; + + if (unlikely(tmp > -(PG_INT64_MIN / 10))) + goto out_of_range; + + tmp = tmp * 10 + digit; + } + + /* when the string does not end in a digit, let the slow path handle it */ + if (unlikely(*ptr != '\0')) + goto slow; + + if (neg) + { + /* check the negative equivalent will fit without overflowing */ + if (unlikely(tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1)) + goto out_of_range; + return -((int64) tmp); + } + + if (unlikely(tmp > PG_INT64_MAX)) + goto out_of_range; + + return (int64) tmp; + +slow: + tmp = 0; + ptr = s; + /* no need to reset neg */ /* skip leading spaces */ - while (*ptr && isspace((unsigned char) *ptr)) + while (isspace((unsigned char) *ptr)) ptr++; /* handle sign */ @@ -497,7 +749,7 @@ pg_strtoint64_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (isxdigit((unsigned char) *ptr)) { @@ -521,7 +773,7 @@ pg_strtoint64_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (*ptr >= '0' && *ptr <= '7') { @@ -545,7 +797,7 @@ pg_strtoint64_safe(const char *s, Node *escontext) { firstdigit = ptr += 2; - while (*ptr) + for (;;) { if (*ptr >= '0' && *ptr <= '1') { @@ -569,9 +821,9 @@ pg_strtoint64_safe(const char *s, Node *escontext) { firstdigit = ptr; - while (*ptr) + for (;;) { - if (isdigit((unsigned char) *ptr)) + if (*ptr >= '0' && *ptr <= '9') { if (unlikely(tmp > -(PG_INT64_MIN / 10))) goto out_of_range; @@ -598,7 +850,7 @@ pg_strtoint64_safe(const char *s, Node *escontext) goto invalid_syntax; /* allow trailing whitespace, but not other trailing chars */ - while (*ptr != '\0' && isspace((unsigned char) *ptr)) + while (isspace((unsigned char) *ptr)) ptr++; if (unlikely(*ptr != '\0')) |