aboutsummaryrefslogtreecommitdiff
path: root/ext/misc/regexp.c
diff options
context:
space:
mode:
authordrh <>2022-07-18 13:55:21 +0000
committerdrh <>2022-07-18 13:55:21 +0000
commit449b473ee6dacd93d31672396bdd4742dd916d35 (patch)
treeeaabdbbefcc98ddf5dbcf48397b44b6f72776f64 /ext/misc/regexp.c
parent18934137ddfd765025e73b2eb1a1bb83ee5fc03b (diff)
downloadsqlite-449b473ee6dacd93d31672396bdd4742dd916d35.tar.gz
sqlite-449b473ee6dacd93d31672396bdd4742dd916d35.zip
Enhance the REGEXP extension so that it will accept the start-of-input
mark ("^") in the middle of parentheses. [forum:/forumpost/0d6a9160f81ef1a8|Forum post 0d6a9160f81ef1a8]. FossilOrigin-Name: ed8a8ebd62a319b5dabbdf67ee27141153b9899d7c8f08eeb4bdf35271015c71
Diffstat (limited to 'ext/misc/regexp.c')
-rw-r--r--ext/misc/regexp.c14
1 files changed, 13 insertions, 1 deletions
diff --git a/ext/misc/regexp.c b/ext/misc/regexp.c
index 8a3e13115..7413ab80e 100644
--- a/ext/misc/regexp.c
+++ b/ext/misc/regexp.c
@@ -72,6 +72,7 @@ SQLITE_EXTENSION_INIT1
/* The end-of-input character */
#define RE_EOF 0 /* End of input */
+#define RE_START 0xfffffff /* Start of input - larger than an UTF-8 */
/* The NFA is implemented as sequence of opcodes taken from the following
** set. Each opcode has a single integer argument.
@@ -93,6 +94,7 @@ SQLITE_EXTENSION_INIT1
#define RE_OP_SPACE 15 /* space: [ \t\n\r\v\f] */
#define RE_OP_NOTSPACE 16 /* Not a digit */
#define RE_OP_BOUNDARY 17 /* Boundary between word and non-word */
+#define RE_OP_ATSTART 18 /* Currently at the start of the string */
#if defined(SQLITE_DEBUG)
/* Opcode names used for symbolic debugging */
@@ -115,6 +117,7 @@ static const char *ReOpName[] = {
"SPACE",
"NOTSPACE",
"BOUNDARY",
+ "ATSTART",
};
#endif /* SQLITE_DEBUG */
@@ -225,7 +228,7 @@ static int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){
ReStateNumber *pToFree;
unsigned int i = 0;
unsigned int iSwap = 0;
- int c = RE_EOF+1;
+ int c = RE_START;
int cPrev = 0;
int rc = 0;
ReInput in;
@@ -244,6 +247,7 @@ static int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){
in.i++;
}
if( in.i+pRe->nInit>in.mx ) return 0;
+ c = RE_START-1;
}
if( pRe->nState<=(sizeof(aSpace)/(sizeof(aSpace[0])*2)) ){
@@ -272,6 +276,10 @@ static int re_match(ReCompiled *pRe, const unsigned char *zIn, int nIn){
if( pRe->aArg[x]==c ) re_add_state(pNext, x+1);
break;
}
+ case RE_OP_ATSTART: {
+ if( cPrev==RE_START ) re_add_state(pThis, x+1);
+ break;
+ }
case RE_OP_ANY: {
if( c!=0 ) re_add_state(pNext, x+1);
break;
@@ -550,6 +558,10 @@ static const char *re_subcompile_string(ReCompiled *p){
re_append(p, RE_OP_MATCH, RE_EOF);
break;
}
+ case '^': {
+ re_append(p, RE_OP_ATSTART, 0);
+ break;
+ }
case '{': {
int m = 0, n = 0;
int sz, j;