aboutsummaryrefslogtreecommitdiff
path: root/src/backend/snowball/libstemmer/stem_UTF_8_irish.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2025-02-18 21:13:46 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2025-02-18 21:13:54 -0500
commitb464e51ab32fbf09cf5d9c911a8e26f491ad1f44 (patch)
tree0fa15dd6477c1995919f63efe9e38b652a3ab25b /src/backend/snowball/libstemmer/stem_UTF_8_irish.c
parent71d02dc478d574c75bd0af82cec774c7b9059a61 (diff)
downloadpostgresql-b464e51ab32fbf09cf5d9c911a8e26f491ad1f44.tar.gz
postgresql-b464e51ab32fbf09cf5d9c911a8e26f491ad1f44.zip
Update to latest Snowball sources.
It's been some time since we did this, partly because the upstream snowball project hasn't formally tagged a new release since 2021. The main motivation for doing it now is to absorb a bug fix (their commit e322673a841d9abd69994ae8cd20e191090b6ef4), which prevents a null pointer dereference crash if SN_create_env() gets a malloc failure at just the wrong point. We'll patch the back branches with only that change, but we might as well do the full sync dance on HEAD. Aside from a bunch of mostly-minor tweaks to existing stemmers, this update adds a new stemmer for Estonian. It also removes the existing stemmer for Romanian using ISO-8859-2 encoding. Upstream apparently concluded that ISO-8859-2 doesn't provide an adequate representation of some Romanian characters, and the UTF-8 implementation should be used instead. While at it, update the README's instructions for doing a sync, which have not been adjusted during the addition of meson tooling. Thanks to Maksim Korotkov for discovering the null-pointer bug and submitting the fix to upstream snowball. Reported-by: Maksim Korotkov <m.korotkov@postgrespro.ru> Discussion: https://postgr.es/m/1d1a46-67ab1000-21-80c451@83151435
Diffstat (limited to 'src/backend/snowball/libstemmer/stem_UTF_8_irish.c')
-rw-r--r--src/backend/snowball/libstemmer/stem_UTF_8_irish.c31
1 files changed, 16 insertions, 15 deletions
diff --git a/src/backend/snowball/libstemmer/stem_UTF_8_irish.c b/src/backend/snowball/libstemmer/stem_UTF_8_irish.c
index 9410819dd98..c79b9ee57e6 100644
--- a/src/backend/snowball/libstemmer/stem_UTF_8_irish.c
+++ b/src/backend/snowball/libstemmer/stem_UTF_8_irish.c
@@ -225,24 +225,28 @@ static int r_mark_regions(struct SN_env * z) {
z->I[1] = z->l;
z->I[0] = z->l;
{ int c1 = z->c;
- {
+
+ {
int ret = out_grouping_U(z, g_v, 97, 250, 1);
if (ret < 0) goto lab0;
z->c += ret;
}
z->I[2] = z->c;
- {
+
+ {
int ret = in_grouping_U(z, g_v, 97, 250, 1);
if (ret < 0) goto lab0;
z->c += ret;
}
z->I[1] = z->c;
- {
+
+ {
int ret = out_grouping_U(z, g_v, 97, 250, 1);
if (ret < 0) goto lab0;
z->c += ret;
}
- {
+
+ {
int ret = in_grouping_U(z, g_v, 97, 250, 1);
if (ret < 0) goto lab0;
z->c += ret;
@@ -258,7 +262,7 @@ static int r_initial_morph(struct SN_env * z) {
int among_var;
z->bra = z->c;
among_var = find_among(z, a_0, 24);
- if (!(among_var)) return 0;
+ if (!among_var) return 0;
z->ket = z->c;
switch (among_var) {
case 1:
@@ -316,25 +320,22 @@ static int r_initial_morph(struct SN_env * z) {
}
static int r_RV(struct SN_env * z) {
- if (!(z->I[2] <= z->c)) return 0;
- return 1;
+ return z->I[2] <= z->c;
}
static int r_R1(struct SN_env * z) {
- if (!(z->I[1] <= z->c)) return 0;
- return 1;
+ return z->I[1] <= z->c;
}
static int r_R2(struct SN_env * z) {
- if (!(z->I[0] <= z->c)) return 0;
- return 1;
+ return z->I[0] <= z->c;
}
static int r_noun_sfx(struct SN_env * z) {
int among_var;
z->ket = z->c;
among_var = find_among_b(z, a_1, 16);
- if (!(among_var)) return 0;
+ if (!among_var) return 0;
z->bra = z->c;
switch (among_var) {
case 1:
@@ -361,7 +362,7 @@ static int r_deriv(struct SN_env * z) {
int among_var;
z->ket = z->c;
among_var = find_among_b(z, a_2, 25);
- if (!(among_var)) return 0;
+ if (!among_var) return 0;
z->bra = z->c;
switch (among_var) {
case 1:
@@ -406,7 +407,7 @@ static int r_verb_sfx(struct SN_env * z) {
z->ket = z->c;
if (z->c - 2 <= z->lb || z->p[z->c - 1] >> 5 != 3 || !((282896 >> (z->p[z->c - 1] & 0x1f)) & 1)) return 0;
among_var = find_among_b(z, a_3, 12);
- if (!(among_var)) return 0;
+ if (!among_var) return 0;
z->bra = z->c;
switch (among_var) {
case 1:
@@ -436,7 +437,7 @@ extern int irish_UTF_8_stem(struct SN_env * z) {
}
z->c = c1;
}
-
+
{ int ret = r_mark_regions(z);
if (ret < 0) return ret;
}