diff options
Diffstat (limited to 'src/backend/snowball/libstemmer/utilities.c')
-rw-r--r-- | src/backend/snowball/libstemmer/utilities.c | 68 |
1 files changed, 39 insertions, 29 deletions
diff --git a/src/backend/snowball/libstemmer/utilities.c b/src/backend/snowball/libstemmer/utilities.c index 681bca09c2f..1ecd2410fe7 100644 --- a/src/backend/snowball/libstemmer/utilities.c +++ b/src/backend/snowball/libstemmer/utilities.c @@ -18,38 +18,48 @@ extern void lose_s(symbol * p) { } /* - new_p = skip_utf8(p, c, lb, l, n); skips n characters forwards from p + c - if n +ve, or n characters backwards from p + c - 1 if n -ve. new_p is the new - position, or -1 on failure. + new_p = skip_utf8(p, c, l, n); skips n characters forwards from p + c. + new_p is the new position, or -1 on failure. -- used to implement hop and next in the utf8 case. */ -extern int skip_utf8(const symbol * p, int c, int lb, int l, int n) { +extern int skip_utf8(const symbol * p, int c, int limit, int n) { int b; - if (n >= 0) { - for (; n > 0; n--) { - if (c >= l) return -1; - b = p[c++]; - if (b >= 0xC0) { /* 1100 0000 */ - while (c < l) { - b = p[c]; - if (b >= 0xC0 || b < 0x80) break; - /* break unless b is 10------ */ - c++; - } + if (n < 0) return -1; + for (; n > 0; n--) { + if (c >= limit) return -1; + b = p[c++]; + if (b >= 0xC0) { /* 1100 0000 */ + while (c < limit) { + b = p[c]; + if (b >= 0xC0 || b < 0x80) break; + /* break unless b is 10------ */ + c++; } } - } else { - for (; n < 0; n++) { - if (c <= lb) return -1; - b = p[--c]; - if (b >= 0x80) { /* 1000 0000 */ - while (c > lb) { - b = p[c]; - if (b >= 0xC0) break; /* 1100 0000 */ - c--; - } + } + return c; +} + +/* + new_p = skip_b_utf8(p, c, lb, n); skips n characters backwards from p + c - 1 + new_p is the new position, or -1 on failure. + + -- used to implement hop and next in the utf8 case. +*/ + +extern int skip_b_utf8(const symbol * p, int c, int limit, int n) { + int b; + if (n < 0) return -1; + for (; n > 0; n--) { + if (c <= limit) return -1; + b = p[--c]; + if (b >= 0x80) { /* 1000 0000 */ + while (c > limit) { + b = p[c]; + if (b >= 0xC0) break; /* 1100 0000 */ + c--; } } } @@ -76,7 +86,7 @@ static int get_utf8(const symbol * p, int c, int l, int * slot) { *slot = (b0 & 0xF) << 12 | b1 << 6 | b2; return 3; } - *slot = (b0 & 0xE) << 18 | b1 << 12 | b2 << 6 | (p[c] & 0x3F); + *slot = (b0 & 0x7) << 18 | b1 << 12 | b2 << 6 | (p[c] & 0x3F); return 4; } @@ -100,7 +110,7 @@ static int get_b_utf8(const symbol * p, int c, int lb, int * slot) { *slot = (b & 0xF) << 12 | a; return 3; } - *slot = (p[--c] & 0xE) << 18 | (b & 0x3F) << 12 | a; + *slot = (p[--c] & 0x7) << 18 | (b & 0x3F) << 12 | a; return 4; } @@ -226,7 +236,7 @@ extern int find_among(struct SN_env * z, const struct among * v, int v_size) { int j = v_size; int c = z->c; int l = z->l; - symbol * q = z->p + c; + const symbol * q = z->p + c; const struct among * w; @@ -291,7 +301,7 @@ extern int find_among_b(struct SN_env * z, const struct among * v, int v_size) { int j = v_size; int c = z->c; int lb = z->lb; - symbol * q = z->p + c - 1; + const symbol * q = z->p + c - 1; const struct among * w; |