diff options
Diffstat (limited to 'src/pl/plperl/plperl_helpers.h')
-rw-r--r-- | src/pl/plperl/plperl_helpers.h | 61 |
1 files changed, 45 insertions, 16 deletions
diff --git a/src/pl/plperl/plperl_helpers.h b/src/pl/plperl/plperl_helpers.h index 1b6648be1d1..ed99194ed1e 100644 --- a/src/pl/plperl/plperl_helpers.h +++ b/src/pl/plperl/plperl_helpers.h @@ -3,21 +3,29 @@ /* * convert from utf8 to database encoding + * + * Returns a palloc'ed copy of the original string */ static inline char * -utf_u2e(const char *utf8_str, size_t len) +utf_u2e(char *utf8_str, size_t len) { int enc = GetDatabaseEncoding(); - - char *ret = (char *) pg_do_encoding_conversion((unsigned char *) utf8_str, len, PG_UTF8, enc); + char *ret; /* - * when we are a PG_UTF8 or SQL_ASCII database pg_do_encoding_conversion() - * will not do any conversion or verification. we need to do it manually - * instead. + * When we are in a PG_UTF8 or SQL_ASCII database + * pg_do_encoding_conversion() will not do any conversion (which is good) + * or verification (not so much), so we need to run the verification step + * separately. */ if (enc == PG_UTF8 || enc == PG_SQL_ASCII) - pg_verify_mbstr_len(PG_UTF8, utf8_str, len, false); + { + pg_verify_mbstr_len(enc, utf8_str, len, false); + ret = utf8_str; + } + else + ret = (char *) pg_do_encoding_conversion((unsigned char *) utf8_str, + len, PG_UTF8, enc); if (ret == utf8_str) ret = pstrdup(ret); @@ -27,11 +35,15 @@ utf_u2e(const char *utf8_str, size_t len) /* * convert from database encoding to utf8 + * + * Returns a palloc'ed copy of the original string */ static inline char * utf_e2u(const char *str) { - char *ret = (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str), GetDatabaseEncoding(), PG_UTF8); + char *ret = + (char *) pg_do_encoding_conversion((unsigned char *) str, strlen(str), + GetDatabaseEncoding(), PG_UTF8); if (ret == str) ret = pstrdup(ret); @@ -41,6 +53,8 @@ utf_e2u(const char *str) /* * Convert an SV to a char * in the current database encoding + * + * Returns a palloc'ed copy of the original string */ static inline char * sv2cstr(SV *sv) @@ -51,7 +65,9 @@ sv2cstr(SV *sv) /* * get a utf8 encoded char * out of perl. *note* it may not be valid utf8! - * + */ + + /* * SvPVutf8() croaks nastily on certain things, like typeglobs and * readonly objects such as $^V. That's a perl bug - it's not supposed to * happen. To avoid crashing the backend, we make a copy of the sv before @@ -63,18 +79,27 @@ sv2cstr(SV *sv) (SvTYPE(sv) > SVt_PVLV && SvTYPE(sv) != SVt_PVFM)) sv = newSVsv(sv); else - + { /* * increase the reference count so we can just SvREFCNT_dec() it when * we are done */ SvREFCNT_inc_simple_void(sv); + } - val = SvPVutf8(sv, len); + /* + * Request the string from Perl, in UTF-8 encoding; but if we're in a + * SQL_ASCII database, just request the byte soup without trying to make it + * UTF8, because that might fail. + */ + if (GetDatabaseEncoding() == PG_SQL_ASCII) + val = SvPV(sv, len); + else + val = SvPVutf8(sv, len); /* - * we use perl's length in the event we had an embedded null byte to - * ensure we error out properly + * Now convert to database encoding. We use perl's length in the event we + * had an embedded null byte to ensure we error out properly. */ res = utf_u2e(val, len); @@ -88,16 +113,20 @@ sv2cstr(SV *sv) * Create a new SV from a string assumed to be in the current database's * encoding. */ - static inline SV * cstr2sv(const char *str) { SV *sv; - char *utf8_str = utf_e2u(str); + char *utf8_str; + + /* no conversion when SQL_ASCII */ + if (GetDatabaseEncoding() == PG_SQL_ASCII) + return newSVpv(str, 0); + + utf8_str = utf_e2u(str); sv = newSVpv(utf8_str, 0); SvUTF8_on(sv); - pfree(utf8_str); return sv; |