From d074b4e50d11768ab6da696b13d40ec05e4823fb Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 31 Jul 2013 11:31:22 -0400 Subject: Fix regexp_matches() handling of zero-length matches. We'd find the same match twice if it was of zero length and not immediately adjacent to the previous match. replace_text_regexp() got similar cases right, so adjust this search logic to match that. Note that even though the regexp_split_to_xxx() functions share this code, they did not display equivalent misbehavior, because the second match would be considered degenerate and ignored. Jeevan Chalke, with some cosmetic changes by me. --- src/backend/utils/adt/regexp.c | 13 ++++++------- src/backend/utils/adt/varlena.c | 5 ++++- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'src/backend/utils/adt') diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index 6a89fabf4bf..ee37dfe991a 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -957,14 +957,13 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags, break; /* - * Advance search position. Normally we start just after the end of - * the previous match, but always advance at least one character (the - * special case can occur if the pattern matches zero characters just - * after the prior match or at the end of the string). + * Advance search position. Normally we start the next search at the + * end of the previous match; but if the match was of zero length, we + * have to advance by one character, or we'd just find the same match + * again. */ - if (start_search < pmatch[0].rm_eo) - start_search = pmatch[0].rm_eo; - else + start_search = prev_match_end; + if (pmatch[0].rm_so == pmatch[0].rm_eo) start_search++; if (start_search > wide_len) break; diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 4aa344896f9..5e2c2ddc532 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -3083,7 +3083,10 @@ replace_text_regexp(text *src_text, void *regexp, break; /* - * Search from next character when the matching text is zero width. + * Advance search position. Normally we start the next search at the + * end of the previous match; but if the match was of zero length, we + * have to advance by one character, or we'd just find the same match + * again. */ search_start = data_pos; if (pmatch[0].rm_so == pmatch[0].rm_eo) -- cgit v1.2.3