1 files changed, 279 insertions, 27 deletions
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index 471fbb7ee63..d07a5602076 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -97,9 +97,18 @@ static const int8 hexlookup[128] = {
 };
 
 /*
- * Convert input string to a signed 16 bit integer.
+ * Convert input string to a signed 16 bit integer.  Input strings may be
+ * expressed in base-10, hexadecimal, octal, or binary format, all of which
+ * can be prefixed by an optional sign character, either '+' (the default) or
+ * '-' for negative numbers.  Hex strings are recognized by the digits being
+ * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
+ * prefix.  The binary representation is recognized by the 0b or 0B prefix.
  *
- * Allows any number of leading or trailing whitespace characters.
+ * Allows any number of leading or trailing whitespace characters.  Digits may
+ * optionally be separated by a single underscore character.  These can only
+ * come between digits and not before or after the digits.  Underscores have
+ * no effect on the return value and are supported only to assist in improving
+ * the human readability of the input strings.
  *
  * pg_strtoint16() will throw ereport() upon bad input format or overflow;
  * while pg_strtoint16_safe() instead returns such complaints in *escontext,
@@ -122,9 +131,84 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 	const char *firstdigit;
 	uint16		tmp = 0;
 	bool		neg = false;
+	unsigned char digit;
+
+	/*
+	 * The majority of cases are likely to be base-10 digits without any
+	 * underscore separator characters.  We'll first try to parse the string
+	 * with the assumption that's the case and only fallback on a slower
+	 * implementation which handles hex, octal and binary strings and
+	 * underscores if the fastpath version cannot parse the string.
+	 */
+
+	/* leave it up to the slow path to look for leading spaces */
+
+	if (*ptr == '-')
+	{
+		ptr++;
+		neg = true;
+	}
+
+	/* a leading '+' is uncommon so leave that for the slow path */
+
+	/* process the first digit */
+	digit = (*ptr - '0');
+
+	/*
+	 * Exploit unsigned arithmetic to save having to check both the upper and
+	 * lower bounds of the digit.
+	 */
+	if (likely(digit < 10))
+	{
+		ptr++;
+		tmp = digit;
+	}
+	else
+	{
+		/* we need at least one digit */
+		goto slow;
+	}
+
+	/* process remaining digits */
+	for (;;)
+	{
+		digit = (*ptr - '0');
+
+		if (digit >= 10)
+			break;
+
+		ptr++;
+
+		if (unlikely(tmp > -(PG_INT16_MIN / 10)))
+			goto out_of_range;
+
+		tmp = tmp * 10 + digit;
+	}
+
+	/* when the string does not end in a digit, let the slow path handle it */
+	if (unlikely(*ptr != '\0'))
+		goto slow;
+
+	if (neg)
+	{
+		/* check the negative equivalent will fit without overflowing */
+		if (unlikely(tmp > (uint16) (-(PG_INT16_MIN + 1)) + 1))
+			goto out_of_range;
+		return -((int16) tmp);
+	}
+
+	if (unlikely(tmp > PG_INT16_MAX))
+		goto out_of_range;
+
+	return (int16) tmp;
+
+slow:
+	tmp = 0;
+	ptr = s;
+	/* no need to reset neg */
 
 	/* skip leading spaces */
-	while (likely(*ptr) && isspace((unsigned char) *ptr))
+	while (isspace((unsigned char) *ptr))
 		ptr++;
 
 	/* handle sign */
@@ -141,7 +225,7 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 	{
 		firstdigit = ptr += 2;
 
-		while (*ptr)
+		for (;;)
 		{
 			if (isxdigit((unsigned char) *ptr))
 			{
@@ -165,7 +249,7 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 	{
 		firstdigit = ptr += 2;
 
-		while (*ptr)
+		for (;;)
 		{
 			if (*ptr >= '0' && *ptr <= '7')
 			{
@@ -189,7 +273,7 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 	{
 		firstdigit = ptr += 2;
 
-		while (*ptr)
+		for (;;)
 		{
 			if (*ptr >= '0' && *ptr <= '1')
 			{
@@ -213,9 +297,9 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 	{
 		firstdigit = ptr;
 
-		while (*ptr)
+		for (;;)
 		{
-			if (isdigit((unsigned char) *ptr))
+			if (*ptr >= '0' && *ptr <= '9')
 			{
 				if (unlikely(tmp > -(PG_INT16_MIN / 10)))
 					goto out_of_range;
@@ -242,7 +326,7 @@ pg_strtoint16_safe(const char *s, Node *escontext)
 		goto invalid_syntax;
 
 	/* allow trailing whitespace, but not other trailing chars */
-	while (*ptr != '\0' && isspace((unsigned char) *ptr))
+	while (isspace((unsigned char) *ptr))
 		ptr++;
 
 	if (unlikely(*ptr != '\0'))
@@ -275,9 +359,18 @@ invalid_syntax:
 }
 
 /*
- * Convert input string to a signed 32 bit integer.
+ * Convert input string to a signed 32 bit integer.  Input strings may be
+ * expressed in base-10, hexadecimal, octal, or binary format, all of which
+ * can be prefixed by an optional sign character, either '+' (the default) or
+ * '-' for negative numbers.  Hex strings are recognized by the digits being
+ * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
+ * prefix.  The binary representation is recognized by the 0b or 0B prefix.
  *
- * Allows any number of leading or trailing whitespace characters.
+ * Allows any number of leading or trailing whitespace characters.  Digits may
+ * optionally be separated by a single underscore character.  These can only
+ * come between digits and not before or after the digits.  Underscores have
+ * no effect on the return value and are supported only to assist in improving
+ * the human readability of the input strings.
  *
  * pg_strtoint32() will throw ereport() upon bad input format or overflow;
  * while pg_strtoint32_safe() instead returns such complaints in *escontext,
@@ -300,9 +393,84 @@ pg_strtoint32_safe(const char *s, Node *escontext)
 	const char *firstdigit;
 	uint32		tmp = 0;
 	bool		neg = false;
+	unsigned char digit;
+
+	/*
+	 * The majority of cases are likely to be base-10 digits without any
+	 * underscore separator characters.  We'll first try to parse the string
+	 * with the assumption that's the case and only fallback on a slower
+	 * implementation which handles hex, octal and binary strings and
+	 * underscores if the fastpath version cannot parse the string.
+	 */
+
+	/* leave it up to the slow path to look for leading spaces */
+
+	if (*ptr == '-')
+	{
+		ptr++;
+		neg = true;
+	}
+
+	/* a leading '+' is uncommon so leave that for the slow path */
+
+	/* process the first digit */
+	digit = (*ptr - '0');
+
+	/*
+	 * Exploit unsigned arithmetic to save having to check both the upper and
+	 * lower bounds of the digit.
+	 */
+	if (likely(digit < 10))
+	{
+		ptr++;
+		tmp = digit;
+	}
+	else
+	{
+		/* we need at least one digit */
+		goto slow;
+	}
+
+	/* process remaining digits */
+	for (;;)
+	{
+		digit = (*ptr - '0');
+
+		if (digit >= 10)
+			break;
+
+		ptr++;
+
+		if (unlikely(tmp > -(PG_INT32_MIN / 10)))
+			goto out_of_range;
+
+		tmp = tmp * 10 + digit;
+	}
+
+	/* when the string does not end in a digit, let the slow path handle it */
+	if (unlikely(*ptr != '\0'))
+		goto slow;
+
+	if (neg)
+	{
+		/* check the negative equivalent will fit without overflowing */
+		if (unlikely(tmp > (uint32) (-(PG_INT32_MIN + 1)) + 1))
+			goto out_of_range;
+		return -((int32) tmp);
+	}
+
+	if (unlikely(tmp > PG_INT32_MAX))
+		goto out_of_range;
+
+	return (int32) tmp;
+
+slow:
+	tmp = 0;
+	ptr = s;
+	/* no need to reset neg */
 
 	/* skip leading spaces */
-	while (likely(*ptr) && isspace((unsigned char) *ptr))
+	while (isspace((unsigned char) *ptr))
 		ptr++;
 
 	/* handle sign */
@@ -319,7 +487,7 @@ pg_strtoint32_safe(const char *s, Node *escontext)
 	{
 		firstdigit = ptr += 2;
 
-		while (*ptr)
+		for (;;)
 		{
 			if (isxdigit((unsigned char) *ptr))
 			{
@@ -343,7 +511,7 @@ pg_strtoint32_safe(const char *s, Node *escontext)
 	{
 		firstdigit = ptr += 2;
 
-		while (*ptr)
+		for (;;)
 		{
 			if (*ptr >= '0' && *ptr <= '7')
 			{
@@ -367,7 +535,7 @@ pg_strtoint32_safe(const char *s, Node *escontext)
 	{
 		firstdigit = ptr += 2;
 
-		while (*ptr)
+		for (;;)
 		{
 			if (*ptr >= '0' && *ptr <= '1')
 			{
@@ -391,9 +559,9 @@ pg_strtoint32_safe(const char *s, Node *escontext)
 	{
 		firstdigit = ptr;
 
-		while (*ptr)
+		for (;;)
 		{
-			if (isdigit((unsigned char) *ptr))
+			if (*ptr >= '0' && *ptr <= '9')
 			{
 				if (unlikely(tmp > -(PG_INT32_MIN / 10)))
 					goto out_of_range;
@@ -420,7 +588,7 @@ pg_strtoint32_safe(const char *s, Node *escontext)
 		goto invalid_syntax;
 
 	/* allow trailing whitespace, but not other trailing chars */
-	while (*ptr != '\0' && isspace((unsigned char) *ptr))
+	while (isspace((unsigned char) *ptr))
 		ptr++;
 
 	if (unlikely(*ptr != '\0'))
@@ -453,9 +621,18 @@ invalid_syntax:
 }
 
 /*
- * Convert input string to a signed 64 bit integer.
+ * Convert input string to a signed 64 bit integer.  Input strings may be
+ * expressed in base-10, hexadecimal, octal, or binary format, all of which
+ * can be prefixed by an optional sign character, either '+' (the default) or
+ * '-' for negative numbers.  Hex strings are recognized by the digits being
+ * prefixed by 0x or 0X while octal strings are recognized by the 0o or 0O
+ * prefix.  The binary representation is recognized by the 0b or 0B prefix.
  *
- * Allows any number of leading or trailing whitespace characters.
+ * Allows any number of leading or trailing whitespace characters.  Digits may
+ * optionally be separated by a single underscore character.  These can only
+ * come between digits and not before or after the digits.  Underscores have
+ * no effect on the return value and are supported only to assist in improving
+ * the human readability of the input strings.
  *
  * pg_strtoint64() will throw ereport() upon bad input format or overflow;
  * while pg_strtoint64_safe() instead returns such complaints in *escontext,
@@ -478,9 +655,84 @@ pg_strtoint64_safe(const char *s, Node *escontext)
 	const char *firstdigit;
 	uint64		tmp = 0;
 	bool		neg = false;
+	unsigned char digit;
+
+	/*
+	 * The majority of cases are likely to be base-10 digits without any
+	 * underscore separator characters.  We'll first try to parse the string
+	 * with the assumption that's the case and only fallback on a slower
+	 * implementation which handles hex, octal and binary strings and
+	 * underscores if the fastpath version cannot parse the string.
+	 */
+
+	/* leave it up to the slow path to look for leading spaces */
+
+	if (*ptr == '-')
+	{
+		ptr++;
+		neg = true;
+	}
+
+	/* a leading '+' is uncommon so leave that for the slow path */
+
+	/* process the first digit */
+	digit = (*ptr - '0');
+
+	/*
+	 * Exploit unsigned arithmetic to save having to check both the upper and
+	 * lower bounds of the digit.
+	 */
+	if (likely(digit < 10))
+	{
+		ptr++;
+		tmp = digit;
+	}
+	else
+	{
+		/* we need at least one digit */
+		goto slow;
+	}
+
+	/* process remaining digits */
+	for (;;)
+	{
+		digit = (*ptr - '0');
+
+		if (digit >= 10)
+			break;
+
+		ptr++;
+
+		if (unlikely(tmp > -(PG_INT64_MIN / 10)))
+			goto out_of_range;
+
+		tmp = tmp * 10 + digit;
+	}
+
+	/* when the string does not end in a digit, let the slow path handle it */
+	if (unlikely(*ptr != '\0'))
+		goto slow;
+
+	if (neg)
+	{
+		/* check the negative equivalent will fit without overflowing */
+		if (unlikely(tmp > (uint64) (-(PG_INT64_MIN + 1)) + 1))
+			goto out_of_range;
+		return -((int64) tmp);
+	}
+
+	if (unlikely(tmp > PG_INT64_MAX))
+		goto out_of_range;
+
+	return (int64) tmp;
+
+slow:
+	tmp = 0;
+	ptr = s;
+	/* no need to reset neg */
 
 	/* skip leading spaces */
-	while (*ptr && isspace((unsigned char) *ptr))
+	while (isspace((unsigned char) *ptr))
 		ptr++;
 
 	/* handle sign */
@@ -497,7 +749,7 @@ pg_strtoint64_safe(const char *s, Node *escontext)
 	{
 		firstdigit = ptr += 2;
 
-		while (*ptr)
+		for (;;)
 		{
 			if (isxdigit((unsigned char) *ptr))
 			{
@@ -521,7 +773,7 @@ pg_strtoint64_safe(const char *s, Node *escontext)
 	{
 		firstdigit = ptr += 2;
 
-		while (*ptr)
+		for (;;)
 		{
 			if (*ptr >= '0' && *ptr <= '7')
 			{
@@ -545,7 +797,7 @@ pg_strtoint64_safe(const char *s, Node *escontext)
 	{
 		firstdigit = ptr += 2;
 
-		while (*ptr)
+		for (;;)
 		{
 			if (*ptr >= '0' && *ptr <= '1')
 			{
@@ -569,9 +821,9 @@ pg_strtoint64_safe(const char *s, Node *escontext)
 	{
 		firstdigit = ptr;
 
-		while (*ptr)
+		for (;;)
 		{
-			if (isdigit((unsigned char) *ptr))
+			if (*ptr >= '0' && *ptr <= '9')
 			{
 				if (unlikely(tmp > -(PG_INT64_MIN / 10)))
 					goto out_of_range;
@@ -598,7 +850,7 @@ pg_strtoint64_safe(const char *s, Node *escontext)
 		goto invalid_syntax;
 
 	/* allow trailing whitespace, but not other trailing chars */
-	while (*ptr != '\0' && isspace((unsigned char) *ptr))
+	while (isspace((unsigned char) *ptr))
 		ptr++;
 
 	if (unlikely(*ptr != '\0'))