aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/unicode_norm.c47
1 files changed, 22 insertions, 25 deletions
diff --git a/src/common/unicode_norm.c b/src/common/unicode_norm.c
index d46e33d322c..fd7bdef2928 100644
--- a/src/common/unicode_norm.c
+++ b/src/common/unicode_norm.c
@@ -105,6 +105,23 @@ get_code_entry(pg_wchar code)
#endif
}
+/*
+ * Get the combining class of the given codepoint.
+ */
+static uint8
+get_canonical_class(pg_wchar code)
+{
+ const pg_unicode_decomposition *entry = get_code_entry(code);
+
+ /*
+ * If no entries are found, the character used is either an Hangul
+ * character or a character with a class of 0 and no decompositions.
+ */
+ if (!entry)
+ return 0;
+ else
+ return entry->comb_class;
+}
/*
* Given a decomposition entry looked up earlier, get the decomposed
@@ -430,16 +447,8 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
pg_wchar prev = decomp_chars[count - 1];
pg_wchar next = decomp_chars[count];
pg_wchar tmp;
- const pg_unicode_decomposition *prevEntry = get_code_entry(prev);
- const pg_unicode_decomposition *nextEntry = get_code_entry(next);
-
- /*
- * If no entries are found, the character used is either an Hangul
- * character or a character with a class of 0 and no decompositions,
- * so move to next result.
- */
- if (prevEntry == NULL || nextEntry == NULL)
- continue;
+ const uint8 prevClass = get_canonical_class(prev);
+ const uint8 nextClass = get_canonical_class(next);
/*
* Per Unicode (https://www.unicode.org/reports/tr15/tr15-18.html)
@@ -449,10 +458,10 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
* combining class for the second, and the second is not a starter. A
* character is a starter if its combining class is 0.
*/
- if (nextEntry->comb_class == 0x0 || prevEntry->comb_class == 0x0)
+ if (prevClass == 0 || nextClass == 0)
continue;
- if (prevEntry->comb_class <= nextEntry->comb_class)
+ if (prevClass <= nextClass)
continue;
/* exchange can happen */
@@ -489,8 +498,7 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
for (count = 1; count < decomp_size; count++)
{
pg_wchar ch = decomp_chars[count];
- const pg_unicode_decomposition *ch_entry = get_code_entry(ch);
- int ch_class = (ch_entry == NULL) ? 0 : ch_entry->comb_class;
+ int ch_class = get_canonical_class(ch);
pg_wchar composite;
if (last_class < ch_class &&
@@ -527,17 +535,6 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
/* We only need this in the backend. */
#ifndef FRONTEND
-static uint8
-get_canonical_class(pg_wchar ch)
-{
- const pg_unicode_decomposition *entry = get_code_entry(ch);
-
- if (!entry)
- return 0;
- else
- return entry->comb_class;
-}
-
static const pg_unicode_normprops *
qc_hash_lookup(pg_wchar ch, const pg_unicode_norminfo *norminfo)
{