diff options
Diffstat (limited to 'src/backend/utils')
-rw-r--r-- | src/backend/utils/adt/formatting.c | 107 |
1 files changed, 67 insertions, 40 deletions
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 79df80704d7..8736ada4be2 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1922,6 +1922,47 @@ str_toupper(const char *buff, size_t nbytes, Oid collid) return result; } +struct WordBoundaryState +{ + const char *str; + size_t len; + size_t offset; + bool init; + bool prev_alnum; +}; + +/* + * Simple word boundary iterator that draws boundaries each time the result of + * pg_u_isalnum() changes. + */ +static size_t +initcap_wbnext(void *state) +{ + struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state; + + while (wbstate->offset < wbstate->len && + wbstate->str[wbstate->offset] != '\0') + { + pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str + + wbstate->offset); + bool curr_alnum = pg_u_isalnum(u, true); + + if (!wbstate->init || curr_alnum != wbstate->prev_alnum) + { + size_t prev_offset = wbstate->offset; + + wbstate->init = true; + wbstate->offset += unicode_utf8len(u); + wbstate->prev_alnum = curr_alnum; + return prev_offset; + } + + wbstate->offset += unicode_utf8len(u); + } + + return wbstate->len; +} + /* * collation-aware, wide-character-aware initcap function * @@ -1980,56 +2021,42 @@ str_initcap(const char *buff, size_t nbytes, Oid collid) #endif if (mylocale && mylocale->provider == COLLPROVIDER_BUILTIN) { - const unsigned char *src = (unsigned char *) buff; + const char *src = buff; size_t srclen = nbytes; - unsigned char *dst; size_t dstsize; - int srcoff = 0; - int dstoff = 0; + char *dst; + size_t needed; + struct WordBoundaryState wbstate = { + .str = src, + .len = srclen, + .offset = 0, + .init = false, + .prev_alnum = false, + }; Assert(GetDatabaseEncoding() == PG_UTF8); - /* overflow paranoia */ - if ((srclen + 1) > (INT_MAX / MAX_MULTIBYTE_CHAR_LEN)) - ereport(ERROR, - (errcode(ERRCODE_OUT_OF_MEMORY), - errmsg("out of memory"))); - - /* result is at most srclen codepoints plus terminating NUL */ - dstsize = srclen * MAX_MULTIBYTE_CHAR_LEN + 1; - dst = (unsigned char *) palloc(dstsize); + /* first try buffer of equal size plus terminating NUL */ + dstsize = srclen + 1; + dst = palloc(dstsize); - while (srcoff < nbytes) + needed = unicode_strtitle(dst, dstsize, src, srclen, + initcap_wbnext, &wbstate); + if (needed + 1 > dstsize) { - pg_wchar u1 = utf8_to_unicode(src + srcoff); - pg_wchar u2; - int u1len = unicode_utf8len(u1); - int u2len; - - if (wasalnum) - u2 = unicode_lowercase_simple(u1); - else - u2 = unicode_uppercase_simple(u1); + /* reset iterator */ + wbstate.offset = 0; + wbstate.init = false; - u2len = unicode_utf8len(u2); - - Assert(dstoff + u2len + 1 <= dstsize); - - wasalnum = pg_u_isalnum(u2, true); - - unicode_to_utf8(u2, dst + dstoff); - srcoff += u1len; - dstoff += u2len; + /* grow buffer if needed and retry */ + dstsize = needed + 1; + dst = repalloc(dst, dstsize); + needed = unicode_strtitle(dst, dstsize, src, srclen, + initcap_wbnext, &wbstate); + Assert(needed + 1 == dstsize); } - Assert(dstoff + 1 <= dstsize); - *(dst + dstoff) = '\0'; - dstoff++; - - /* allocate result buffer of the right size and free workspace */ - result = palloc(dstoff); - memcpy(result, dst, dstoff); - pfree(dst); + result = dst; } else { |