Modify wchar conversion routines to not fetch the next byte past the end

of a counted input string. Marinos Yannikos' recent crash report turns out to be due to applying pg_ascii2wchar_with_len to a TEXT object that is smack up against the end of memory. This is the second just-barely- reproducible bug report I have seen that traces to some bit of code fetching one more byte than it is allowed to. Let's be more careful out there, boys and girls. While at it, I changed the code to not risk a similar crash when there is a truncated multibyte character at the end of an input string. The output in this case might not be the most reasonable output possible; if anyone wants to improve it further, step right up...
author: Tom Lane <tgl@sss.pgh.pa.us> 2001-03-08 00:24:34 +0000
committer: Tom Lane <tgl@sss.pgh.pa.us> 2001-03-08 00:24:34 +0000
commit: 572fda27118f7e54c7e4ebb75b48d33896e5f776 (patch)
tree: 744a314265185c7d65b9e7ebfca3c28c608409d7 /src/backend/utils
parent: b109b03feaec96aab6c635137b5a668389df8d31 (diff)
download: postgresql-572fda27118f7e54c7e4ebb75b48d33896e5f776.tar.gz
postgresql-572fda27118f7e54c7e4ebb75b48d33896e5f776.zip
2 files changed, 33 insertions, 35 deletions
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 0d3d8cb69bd..2abae59d62b 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -3,7 +3,7 @@
  * client encoding and server internal encoding.
  * (currently mule internal code (mic) is used)
  * Tatsuo Ishii
- * $Id: mbutils.c,v 1.15 2001/02/10 02:31:27 tgl Exp $
+ * $Id: mbutils.c,v 1.16 2001/03/08 00:24:34 tgl Exp $
  */
 #include "postgres.h"
 
@@ -230,7 +230,7 @@ pg_mbstrlen_with_len(const unsigned char *mbstr, int limit)
 	int			len = 0;
 	int			l;
 
-	while (*mbstr && limit > 0)
+	while (limit > 0 && *mbstr)
 	{
 		l = pg_mblen(mbstr);
 		limit -= l;
@@ -252,7 +252,7 @@ pg_mbcliplen(const unsigned char *mbstr, int len, int limit)
 	int			clen = 0;
 	int			l;
 
-	while (*mbstr && len > 0)
+	while (len > 0 && *mbstr)
 	{
 		l = pg_mblen(mbstr);
 		if ((clen + l) > limit)
@@ -267,7 +267,7 @@ pg_mbcliplen(const unsigned char *mbstr, int len, int limit)
 }
 
 /*
- * fuctions for utils/init
+ * functions for utils/init
  */
 static int	DatabaseEncoding = MULTIBYTE;
 
diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c
index a4bf1131ad2..6d10cad020a 100644
--- a/src/backend/utils/mb/wchar.c
+++ b/src/backend/utils/mb/wchar.c
@@ -1,7 +1,7 @@
 /*
  * conversion functions between pg_wchar and multi-byte streams.
  * Tatsuo Ishii
- * $Id: wchar.c,v 1.15 2001/02/11 01:59:22 ishii Exp $
+ * $Id: wchar.c,v 1.16 2001/03/08 00:24:34 tgl Exp $
  *
  * WIN1250 client encoding updated by Pavel Behal
  *
@@ -27,7 +27,7 @@ static int pg_ascii2wchar_with_len
 {
 	int cnt = 0;
 
-	while (*from && len > 0)
+	while (len > 0 && *from)
 	{
 		*to++ = *from++;
 		len--;
@@ -52,23 +52,22 @@ static int pg_euc2wchar_with_len
 {
 	int cnt = 0;
 
-	while (*from && len > 0)
+	while (len > 0 && *from)
 	{
-		if (*from == SS2)
+		if (*from == SS2 && len >= 2)
 		{
 			from++;
-			len--;
 			*to = 0xff & *from++;
-			len--;
+			len -= 2;
 		}
-		else if (*from == SS3)
+		else if (*from == SS3 && len >= 3)
 		{
 			from++;
 			*to = *from++ << 8;
 			*to |= 0x3f & *from++;
 			len -= 3;
 		}
-		else if (*from & 0x80)
+		else if ((*from & 0x80) && len >= 2)
 		{
 			*to = *from++ << 8;
 			*to |= *from++;
@@ -140,24 +139,23 @@ static int pg_euccn2wchar_with_len
 {
 	int cnt = 0;
 
-	while (*from && len > 0)
+	while (len > 0 && *from)
 	{
-		if (*from == SS2)
+		if (*from == SS2 && len >= 3)
 		{
 			from++;
-			len--;
 			*to = 0x3f00 & (*from++ << 8);
 			*to = *from++;
-			len -= 2;
+			len -= 3;
 		}
-		else if (*from == SS3)
+		else if (*from == SS3 && len >= 3)
 		{
 			from++;
 			*to = *from++ << 8;
 			*to |= 0x3f & *from++;
 			len -= 3;
 		}
-		else if (*from & 0x80)
+		else if ((*from & 0x80) && len >= 2)
 		{
 			*to = *from++ << 8;
 			*to |= *from++;
@@ -195,25 +193,24 @@ static int pg_euctw2wchar_with_len
 {
 	int cnt = 0;
 
-	while (*from && len > 0)
+	while (len > 0 && *from)
 	{
-		if (*from == SS2)
+		if (*from == SS2 && len >= 4)
 		{
 			from++;
-			len--;
 			*to = *from++ << 16;
 			*to |= *from++ << 8;
 			*to |= *from++;
-			len -= 3;
+			len -= 4;
 		}
-		else if (*from == SS3)
+		else if (*from == SS3 && len >= 3)
 		{
 			from++;
 			*to = *from++ << 8;
 			*to |= 0x3f & *from++;
 			len -= 3;
 		}
-		else if (*from & 0x80)
+		else if ((*from & 0x80) && len >= 2)
 		{
 			*to = *from++ << 8;
 			*to |= *from++;
@@ -261,30 +258,30 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar * to, int len)
 				c3;
 	int cnt = 0;
 
-	while (*from && len > 0)
+	while (len > 0 && *from)
 	{
 		if ((*from & 0x80) == 0)
 		{
 			*to = *from++;
 			len--;
 		}
-		else if ((*from & 0xe0) == 0xc0)
+		else if ((*from & 0xe0) == 0xc0 && len >= 2)
 		{
 			c1 = *from++ & 0x1f;
 			c2 = *from++ & 0x3f;
-			len -= 2;
 			*to = c1 << 6;
 			*to |= c2;
+			len -= 2;
 		}
-		else if ((*from & 0xe0) == 0xe0)
+		else if ((*from & 0xe0) == 0xe0 && len >= 3)
 		{
 			c1 = *from++ & 0x0f;
 			c2 = *from++ & 0x3f;
 			c3 = *from++ & 0x3f;
-			len -= 3;
 			*to = c1 << 12;
 			*to |= c2 << 6;
 			*to |= c3;
+			len -= 3;
 		}
 		else
 		{
@@ -326,29 +323,29 @@ pg_mule2wchar_with_len(const unsigned char *from, pg_wchar * to, int len)
 {
 	int cnt = 0;
 
-	while (*from && len > 0)
+	while (len > 0 && *from)
 	{
-		if (IS_LC1(*from))
+		if (IS_LC1(*from) && len >= 2)
 		{
 			*to = *from++ << 16;
 			*to |= *from++;
 			len -= 2;
 		}
-		else if (IS_LCPRV1(*from))
+		else if (IS_LCPRV1(*from) && len >= 3)
 		{
 			from++;
 			*to = *from++ << 16;
 			*to |= *from++;
 			len -= 3;
 		}
-		else if (IS_LC2(*from))
+		else if (IS_LC2(*from) && len >= 3)
 		{
 			*to = *from++ << 16;
 			*to |= *from++ << 8;
 			*to |= *from++;
 			len -= 3;
 		}
-		else if (IS_LCPRV2(*from))
+		else if (IS_LCPRV2(*from) && len >= 4)
 		{
 			from++;
 			*to = *from++ << 16;
@@ -396,9 +393,10 @@ pg_latin12wchar_with_len(const unsigned char *from, pg_wchar * to, int len)
 {
 	int cnt = 0;
 
-	while (*from && len-- > 0)
+	while (len > 0 && *from)
 	{
 		*to++ = *from++;
+		len--;
 		cnt++;
 	}
 	*to = 0;
author	Tom Lane <tgl@sss.pgh.pa.us>	2001-03-08 00:24:34 +0000
committer	Tom Lane <tgl@sss.pgh.pa.us>	2001-03-08 00:24:34 +0000
commit	572fda27118f7e54c7e4ebb75b48d33896e5f776 (patch)
tree	744a314265185c7d65b9e7ebfca3c28c608409d7 /src/backend/utils
parent	b109b03feaec96aab6c635137b5a668389df8d31 (diff)
download	postgresql-572fda27118f7e54c7e4ebb75b48d33896e5f776.tar.gz postgresql-572fda27118f7e54c7e4ebb75b48d33896e5f776.zip