aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/regexp.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2007-09-21 22:52:52 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2007-09-21 22:52:52 +0000
commit7583f9a7cab95e067581606d86b4962525fa81f5 (patch)
tree2fa2bfbd6c1c97c89b224a0a2c03d46745b538b0 /src/backend/utils/adt/regexp.c
parentcc59049daf78c3d351c1ec78fb319b5fdeb20d53 (diff)
downloadpostgresql-7583f9a7cab95e067581606d86b4962525fa81f5.tar.gz
postgresql-7583f9a7cab95e067581606d86b4962525fa81f5.zip
Fix regex, LIKE, and some other second-rank text-manipulation functions
to not cause needless copying of text datums that have 1-byte headers. Greg Stark, in response to performance gripe from Guillaume Smet and ITAGAKI Takahiro.
Diffstat (limited to 'src/backend/utils/adt/regexp.c')
-rw-r--r--src/backend/utils/adt/regexp.c129
1 files changed, 66 insertions, 63 deletions
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index 55901585a09..2ce25bb2e4c 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.73 2007/08/11 19:16:41 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.74 2007/09/21 22:52:52 tgl Exp $
*
* Alistair Crooks added the code for the regex caching
* agc - cached the regular expressions used - there's a good chance
@@ -35,8 +35,8 @@
#include "utils/builtins.h"
#include "utils/guc.h"
-#define PG_GETARG_TEXT_P_IF_EXISTS(_n) \
- (PG_NARGS() > (_n) ? PG_GETARG_TEXT_P(_n) : NULL)
+#define PG_GETARG_TEXT_PP_IF_EXISTS(_n) \
+ (PG_NARGS() > (_n) ? PG_GETARG_TEXT_PP(_n) : NULL)
/* GUC-settable flavor parameter */
@@ -97,7 +97,8 @@ typedef struct regexp_matches_ctx
/* this structure describes one cached regular expression */
typedef struct cached_re_str
{
- text *cre_pat; /* original RE (untoasted TEXT form) */
+ char *cre_pat; /* original RE (not null terminated!) */
+ int cre_pat_len; /* length of original RE, in bytes */
int cre_flags; /* compile flags: extended,icase etc */
regex_t cre_re; /* the compiled regular expression */
} cached_re_str;
@@ -122,7 +123,7 @@ static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
*
* Returns regex_t *
*
- * text_re --- the pattern, expressed as an *untoasted* TEXT object
+ * text_re --- the pattern, expressed as a TEXT object
* cflags --- compile options for the pattern
*
* Pattern is given in the database encoding. We internally convert to
@@ -131,7 +132,8 @@ static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
static regex_t *
RE_compile_and_cache(text *text_re, int cflags)
{
- int text_re_len = VARSIZE(text_re);
+ int text_re_len = VARSIZE_ANY_EXHDR(text_re);
+ char *text_re_val = VARDATA_ANY(text_re);
pg_wchar *pattern;
int pattern_len;
int i;
@@ -146,9 +148,9 @@ RE_compile_and_cache(text *text_re, int cflags)
*/
for (i = 0; i < num_res; i++)
{
- if (VARSIZE(re_array[i].cre_pat) == text_re_len &&
- memcmp(re_array[i].cre_pat, text_re, text_re_len) == 0 &&
- re_array[i].cre_flags == cflags)
+ if (re_array[i].cre_pat_len == text_re_len &&
+ re_array[i].cre_flags == cflags &&
+ memcmp(re_array[i].cre_pat, text_re_val, text_re_len) == 0)
{
/*
* Found a match; move it to front if not there already.
@@ -170,10 +172,10 @@ RE_compile_and_cache(text *text_re, int cflags)
*/
/* Convert pattern string to wide characters */
- pattern = (pg_wchar *) palloc((text_re_len - VARHDRSZ + 1) * sizeof(pg_wchar));
- pattern_len = pg_mb2wchar_with_len(VARDATA(text_re),
+ pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
+ pattern_len = pg_mb2wchar_with_len(text_re_val,
pattern,
- text_re_len - VARHDRSZ);
+ text_re_len);
regcomp_result = pg_regcomp(&re_temp.cre_re,
pattern,
@@ -204,7 +206,8 @@ RE_compile_and_cache(text *text_re, int cflags)
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
}
- memcpy(re_temp.cre_pat, text_re, text_re_len);
+ memcpy(re_temp.cre_pat, text_re_val, text_re_len);
+ re_temp.cre_pat_len = text_re_len;
re_temp.cre_flags = cflags;
/*
@@ -308,7 +311,7 @@ RE_execute(regex_t *re, char *dat, int dat_len,
*
* Returns TRUE on match, FALSE on no match
*
- * text_re --- the pattern, expressed as an *untoasted* TEXT object
+ * text_re --- the pattern, expressed as a TEXT object
* dat --- the data to match against (need not be null-terminated)
* dat_len --- the length of the data string
* cflags --- compile options for the pattern
@@ -334,7 +337,7 @@ RE_compile_and_execute(text *text_re, char *dat, int dat_len,
* parse_re_flags - parse the options argument of regexp_matches and friends
*
* flags --- output argument, filled with desired options
- * opts --- *untoasted* TEXT object, or NULL for defaults
+ * opts --- TEXT object, or NULL for defaults
*
* This accepts all the options allowed by any of the callers; callers that
* don't want some have to reject them after the fact.
@@ -348,8 +351,8 @@ parse_re_flags(pg_re_flags *flags, text *opts)
if (opts)
{
- char *opt_p = VARDATA(opts);
- int opt_len = VARSIZE(opts) - VARHDRSZ;
+ char *opt_p = VARDATA_ANY(opts);
+ int opt_len = VARSIZE_ANY_EXHDR(opts);
int i;
for (i = 0; i < opt_len; i++)
@@ -454,7 +457,7 @@ Datum
nameregexeq(PG_FUNCTION_ARGS)
{
Name n = PG_GETARG_NAME(0);
- text *p = PG_GETARG_TEXT_P(1);
+ text *p = PG_GETARG_TEXT_PP(1);
PG_RETURN_BOOL(RE_compile_and_execute(p,
NameStr(*n),
@@ -467,7 +470,7 @@ Datum
nameregexne(PG_FUNCTION_ARGS)
{
Name n = PG_GETARG_NAME(0);
- text *p = PG_GETARG_TEXT_P(1);
+ text *p = PG_GETARG_TEXT_PP(1);
PG_RETURN_BOOL(!RE_compile_and_execute(p,
NameStr(*n),
@@ -479,12 +482,12 @@ nameregexne(PG_FUNCTION_ARGS)
Datum
textregexeq(PG_FUNCTION_ARGS)
{
- text *s = PG_GETARG_TEXT_P(0);
- text *p = PG_GETARG_TEXT_P(1);
+ text *s = PG_GETARG_TEXT_PP(0);
+ text *p = PG_GETARG_TEXT_PP(1);
PG_RETURN_BOOL(RE_compile_and_execute(p,
- VARDATA(s),
- VARSIZE(s) - VARHDRSZ,
+ VARDATA_ANY(s),
+ VARSIZE_ANY_EXHDR(s),
regex_flavor,
0, NULL));
}
@@ -492,12 +495,12 @@ textregexeq(PG_FUNCTION_ARGS)
Datum
textregexne(PG_FUNCTION_ARGS)
{
- text *s = PG_GETARG_TEXT_P(0);
- text *p = PG_GETARG_TEXT_P(1);
+ text *s = PG_GETARG_TEXT_PP(0);
+ text *p = PG_GETARG_TEXT_PP(1);
PG_RETURN_BOOL(!RE_compile_and_execute(p,
- VARDATA(s),
- VARSIZE(s) - VARHDRSZ,
+ VARDATA_ANY(s),
+ VARSIZE_ANY_EXHDR(s),
regex_flavor,
0, NULL));
}
@@ -513,7 +516,7 @@ Datum
nameicregexeq(PG_FUNCTION_ARGS)
{
Name n = PG_GETARG_NAME(0);
- text *p = PG_GETARG_TEXT_P(1);
+ text *p = PG_GETARG_TEXT_PP(1);
PG_RETURN_BOOL(RE_compile_and_execute(p,
NameStr(*n),
@@ -526,7 +529,7 @@ Datum
nameicregexne(PG_FUNCTION_ARGS)
{
Name n = PG_GETARG_NAME(0);
- text *p = PG_GETARG_TEXT_P(1);
+ text *p = PG_GETARG_TEXT_PP(1);
PG_RETURN_BOOL(!RE_compile_and_execute(p,
NameStr(*n),
@@ -538,12 +541,12 @@ nameicregexne(PG_FUNCTION_ARGS)
Datum
texticregexeq(PG_FUNCTION_ARGS)
{
- text *s = PG_GETARG_TEXT_P(0);
- text *p = PG_GETARG_TEXT_P(1);
+ text *s = PG_GETARG_TEXT_PP(0);
+ text *p = PG_GETARG_TEXT_PP(1);
PG_RETURN_BOOL(RE_compile_and_execute(p,
- VARDATA(s),
- VARSIZE(s) - VARHDRSZ,
+ VARDATA_ANY(s),
+ VARSIZE_ANY_EXHDR(s),
regex_flavor | REG_ICASE,
0, NULL));
}
@@ -551,12 +554,12 @@ texticregexeq(PG_FUNCTION_ARGS)
Datum
texticregexne(PG_FUNCTION_ARGS)
{
- text *s = PG_GETARG_TEXT_P(0);
- text *p = PG_GETARG_TEXT_P(1);
+ text *s = PG_GETARG_TEXT_PP(0);
+ text *p = PG_GETARG_TEXT_PP(1);
PG_RETURN_BOOL(!RE_compile_and_execute(p,
- VARDATA(s),
- VARSIZE(s) - VARHDRSZ,
+ VARDATA_ANY(s),
+ VARSIZE_ANY_EXHDR(s),
regex_flavor | REG_ICASE,
0, NULL));
}
@@ -569,8 +572,8 @@ texticregexne(PG_FUNCTION_ARGS)
Datum
textregexsubstr(PG_FUNCTION_ARGS)
{
- text *s = PG_GETARG_TEXT_P(0);
- text *p = PG_GETARG_TEXT_P(1);
+ text *s = PG_GETARG_TEXT_PP(0);
+ text *p = PG_GETARG_TEXT_PP(1);
bool match;
regmatch_t pmatch[2];
@@ -581,8 +584,8 @@ textregexsubstr(PG_FUNCTION_ARGS)
* return what the whole regexp matched.
*/
match = RE_compile_and_execute(p,
- VARDATA(s),
- VARSIZE(s) - VARHDRSZ,
+ VARDATA_ANY(s),
+ VARSIZE_ANY_EXHDR(s),
regex_flavor,
2, pmatch);
@@ -620,9 +623,9 @@ textregexsubstr(PG_FUNCTION_ARGS)
Datum
textregexreplace_noopt(PG_FUNCTION_ARGS)
{
- text *s = PG_GETARG_TEXT_P(0);
- text *p = PG_GETARG_TEXT_P(1);
- text *r = PG_GETARG_TEXT_P(2);
+ text *s = PG_GETARG_TEXT_PP(0);
+ text *p = PG_GETARG_TEXT_PP(1);
+ text *r = PG_GETARG_TEXT_PP(2);
regex_t *re;
re = RE_compile_and_cache(p, regex_flavor);
@@ -637,10 +640,10 @@ textregexreplace_noopt(PG_FUNCTION_ARGS)
Datum
textregexreplace(PG_FUNCTION_ARGS)
{
- text *s = PG_GETARG_TEXT_P(0);
- text *p = PG_GETARG_TEXT_P(1);
- text *r = PG_GETARG_TEXT_P(2);
- text *opt = PG_GETARG_TEXT_P(3);
+ text *s = PG_GETARG_TEXT_PP(0);
+ text *p = PG_GETARG_TEXT_PP(1);
+ text *r = PG_GETARG_TEXT_PP(2);
+ text *opt = PG_GETARG_TEXT_PP(3);
regex_t *re;
pg_re_flags flags;
@@ -673,9 +676,9 @@ similar_escape(PG_FUNCTION_ARGS)
/* This function is not strict, so must test explicitly */
if (PG_ARGISNULL(0))
PG_RETURN_NULL();
- pat_text = PG_GETARG_TEXT_P(0);
- p = VARDATA(pat_text);
- plen = (VARSIZE(pat_text) - VARHDRSZ);
+ pat_text = PG_GETARG_TEXT_PP(0);
+ p = VARDATA_ANY(pat_text);
+ plen = VARSIZE_ANY_EXHDR(pat_text);
if (PG_ARGISNULL(1))
{
/* No ESCAPE clause provided; default to backslash as escape */
@@ -684,9 +687,9 @@ similar_escape(PG_FUNCTION_ARGS)
}
else
{
- esc_text = PG_GETARG_TEXT_P(1);
- e = VARDATA(esc_text);
- elen = (VARSIZE(esc_text) - VARHDRSZ);
+ esc_text = PG_GETARG_TEXT_PP(1);
+ e = VARDATA_ANY(esc_text);
+ elen = VARSIZE_ANY_EXHDR(esc_text);
if (elen == 0)
e = NULL; /* no escape character */
else if (elen != 1)
@@ -785,8 +788,8 @@ regexp_matches(PG_FUNCTION_ARGS)
if (SRF_IS_FIRSTCALL())
{
- text *pattern = PG_GETARG_TEXT_P(1);
- text *flags = PG_GETARG_TEXT_P_IF_EXISTS(2);
+ text *pattern = PG_GETARG_TEXT_PP(1);
+ text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
MemoryContext oldcontext;
funcctx = SRF_FIRSTCALL_INIT();
@@ -863,9 +866,9 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
matchctx->orig_str = orig_str;
/* convert string to pg_wchar form for matching */
- orig_len = VARSIZE(orig_str) - VARHDRSZ;
+ orig_len = VARSIZE_ANY_EXHDR(orig_str);
wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1));
- wide_len = pg_mb2wchar_with_len(VARDATA(orig_str), wide_str, orig_len);
+ wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
/* determine options */
parse_re_flags(&re_flags, flags);
@@ -1043,8 +1046,8 @@ regexp_split_to_table(PG_FUNCTION_ARGS)
if (SRF_IS_FIRSTCALL())
{
- text *pattern = PG_GETARG_TEXT_P(1);
- text *flags = PG_GETARG_TEXT_P_IF_EXISTS(2);
+ text *pattern = PG_GETARG_TEXT_PP(1);
+ text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
MemoryContext oldcontext;
funcctx = SRF_FIRSTCALL_INIT();
@@ -1091,9 +1094,9 @@ Datum regexp_split_to_array(PG_FUNCTION_ARGS)
ArrayBuildState *astate = NULL;
regexp_matches_ctx *splitctx;
- splitctx = setup_regexp_matches(PG_GETARG_TEXT_P(0),
- PG_GETARG_TEXT_P(1),
- PG_GETARG_TEXT_P_IF_EXISTS(2),
+ splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0),
+ PG_GETARG_TEXT_PP(1),
+ PG_GETARG_TEXT_PP_IF_EXISTS(2),
true, false, true);
while (splitctx->next_match <= splitctx->nmatches)