aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/encode.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2020-06-29 11:41:19 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2020-06-29 11:41:19 -0400
commit16e3ad5d143795b05a21dc887c2ab384cce4bcb8 (patch)
treeb78276a44499996048b25b151d88d8b792b70cc4 /src/backend/utils/adt/encode.c
parent78c887679d7632c1211f85eb95723f3226bf1b46 (diff)
downloadpostgresql-16e3ad5d143795b05a21dc887c2ab384cce4bcb8.tar.gz
postgresql-16e3ad5d143795b05a21dc887c2ab384cce4bcb8.zip
Avoid using %c printf format for potentially non-ASCII characters.
Since %c only passes a C "char" to printf, it's incapable of dealing with multibyte characters. Passing just the first byte of such a character leads to an output string that is visibly not correctly encoded, resulting in undesirable behavior such as encoding conversion failures while sending error messages to clients. We've lived with this issue for a long time because it was inconvenient to avoid in a portable fashion. However, now that we always use our own snprintf code, it's reasonable to use the %.*s format to print just one possibly-multibyte character in a string. (We previously avoided that obvious-looking answer in order to work around glibc's bug #6530, cf commits 54cd4f045 and ed437e2b2.) Hence, run around and fix a bunch of places that used %c to report a character found in a user-supplied string. For simplicity, I did not touch places that were emitting non-user-facing debug messages, or reporting catalog data that should always be ASCII. (It's also unclear how useful this approach could be in frontend code, where it's less certain that we know what encoding we're dealing with.) In passing, improve a couple of poorly-written error messages in pageinspect/heapfuncs.c. This is a longstanding issue, but I'm hesitant to back-patch because of the impact on translatable message strings. In any case this fix would not work reliably before v12. Tom Lane and Quan Zongliang Discussion: https://postgr.es/m/a120087c-4c88-d9d4-1ec5-808d7a7f133d@gmail.com
Diffstat (limited to 'src/backend/utils/adt/encode.c')
-rw-r--r--src/backend/utils/adt/encode.c20
1 files changed, 13 insertions, 7 deletions
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index 61d318d93ca..a609d49c12c 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -15,6 +15,7 @@
#include <ctype.h>
+#include "mb/pg_wchar.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
@@ -171,17 +172,19 @@ hex_encode(const char *src, size_t len, char *dst)
}
static inline char
-get_hex(char c)
+get_hex(const char *cp)
{
+ unsigned char c = (unsigned char) *cp;
int res = -1;
- if (c > 0 && c < 127)
- res = hexlookup[(unsigned char) c];
+ if (c < 127)
+ res = hexlookup[c];
if (res < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("invalid hexadecimal digit: \"%c\"", c)));
+ errmsg("invalid hexadecimal digit: \"%.*s\"",
+ pg_mblen(cp), cp)));
return (char) res;
}
@@ -205,13 +208,15 @@ hex_decode(const char *src, size_t len, char *dst)
s++;
continue;
}
- v1 = get_hex(*s++) << 4;
+ v1 = get_hex(s) << 4;
+ s++;
if (s >= srcend)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid hexadecimal data: odd number of digits")));
- v2 = get_hex(*s++);
+ v2 = get_hex(s);
+ s++;
*p++ = v1 | v2;
}
@@ -338,7 +343,8 @@ pg_base64_decode(const char *src, size_t len, char *dst)
if (b < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
- errmsg("invalid symbol \"%c\" while decoding base64 sequence", (int) c)));
+ errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence",
+ pg_mblen(s - 1), s - 1)));
}
/* add it to buffer */
buf = (buf << 6) + b;