/*------------------------------------------------------------------------- * category_test.c * Program to test Unicode general category functions. * * Portions Copyright (c) 2017-2024, PostgreSQL Global Development Group * * IDENTIFICATION * src/common/unicode/category_test.c * *------------------------------------------------------------------------- */ #include "postgres_fe.h" #include #include #include #ifdef USE_ICU #include #endif #include "common/unicode_category.h" #include "common/unicode_version.h" /* * Parse version into integer for easy comparison. */ #ifdef USE_ICU static int parse_unicode_version(const char *version) { int n PG_USED_FOR_ASSERTS_ONLY; int major; int minor; n = sscanf(version, "%d.%d", &major, &minor); Assert(n == 2); Assert(minor < 100); return major * 100 + minor; } #endif /* * Exhaustively test that the Unicode category for each codepoint matches that * returned by ICU. */ int main(int argc, char **argv) { #ifdef USE_ICU int pg_unicode_version = parse_unicode_version(PG_UNICODE_VERSION); int icu_unicode_version = parse_unicode_version(U_UNICODE_VERSION); int pg_skipped_codepoints = 0; int icu_skipped_codepoints = 0; printf("category_test: Postgres Unicode version:\t%s\n", PG_UNICODE_VERSION); printf("category_test: ICU Unicode version:\t\t%s\n", U_UNICODE_VERSION); for (UChar32 code = 0; code <= 0x10ffff; code++) { uint8_t pg_category = unicode_category(code); uint8_t icu_category = u_charType(code); if (pg_category != icu_category) { /* * A version mismatch means that some assigned codepoints in the * newer version may be unassigned in the older version. That's * OK, though the test will not cover those codepoints marked * unassigned in the older version (that is, it will no longer be * an exhaustive test). */ if (pg_category == PG_U_UNASSIGNED && pg_unicode_version < icu_unicode_version) pg_skipped_codepoints++; else if (icu_category == PG_U_UNASSIGNED && icu_unicode_version < pg_unicode_version) icu_skipped_codepoints++; else { printf("category_test: FAILURE for codepoint 0x%06x\n", code); printf("category_test: Postgres category: %02d %s %s\n", pg_category, unicode_category_abbrev(pg_category), unicode_category_string(pg_category)); printf("category_test: ICU category: %02d %s %s\n", icu_category, unicode_category_abbrev(icu_category), unicode_category_string(icu_category)); printf("\n"); exit(1); } } } if (pg_skipped_codepoints > 0) printf("category_test: skipped %d codepoints unassigned in Postgres due to Unicode version mismatch\n", pg_skipped_codepoints); if (icu_skipped_codepoints > 0) printf("category_test: skipped %d codepoints unassigned in ICU due to Unicode version mismatch\n", icu_skipped_codepoints); printf("category_test: success\n"); exit(0); #else printf("category_test: ICU support required for test; skipping\n"); exit(0); #endif }