diff options
author | drh <> | 2022-11-17 19:24:39 +0000 |
---|---|---|
committer | drh <> | 2022-11-17 19:24:39 +0000 |
commit | d8e48fffdff1ebfbf5e0b2ded9e12ce00b1ba427 (patch) | |
tree | 2df40032c792b09372302e6e4c65cb9df6410ba6 /ext/misc/regexp.c | |
parent | 3c1572ddb43ba7f4cbae57ee4b019a2cd45ee015 (diff) | |
download | sqlite-d8e48fffdff1ebfbf5e0b2ded9e12ce00b1ba427.tar.gz sqlite-d8e48fffdff1ebfbf5e0b2ded9e12ce00b1ba427.zip |
Fix corner cases in UTF8 handling in the REGEXP extension.
[forum:/forumpost/3ffe058b04|Forum post 3ffe058b04].
FossilOrigin-Name: abb18f61c5cec0f524acc41453b4c06b61c5af51ff46417588837fc0c3967288
Diffstat (limited to 'ext/misc/regexp.c')
-rw-r--r-- | ext/misc/regexp.c | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/ext/misc/regexp.c b/ext/misc/regexp.c index d0c8ee5cf..086ef564c 100644 --- a/ext/misc/regexp.c +++ b/ext/misc/regexp.c @@ -185,7 +185,7 @@ static unsigned re_next_char(ReInput *p){ c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f); p->i += 2; if( c<=0x7ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd; - }else if( (c&0xf8)==0xf0 && p->i+3<p->mx && (p->z[p->i]&0xc0)==0x80 + }else if( (c&0xf8)==0xf0 && p->i+2<p->mx && (p->z[p->i]&0xc0)==0x80 && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){ c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6) | (p->z[p->i+2]&0x3f); @@ -712,15 +712,15 @@ static const char *re_compile(ReCompiled **ppRe, const char *zIn, int noCase){ ** one or more matching characters, enter those matching characters into ** zInit[]. The re_match() routine can then search ahead in the input ** string looking for the initial match without having to run the whole - ** regex engine over the string. Do not worry able trying to match + ** regex engine over the string. Do not worry about trying to match ** unicode characters beyond plane 0 - those are very rare and this is ** just an optimization. */ if( pRe->aOp[0]==RE_OP_ANYSTAR && !noCase ){ for(j=0, i=1; j<(int)sizeof(pRe->zInit)-2 && pRe->aOp[i]==RE_OP_MATCH; i++){ unsigned x = pRe->aArg[i]; - if( x<=127 ){ + if( x<=0x7f ){ pRe->zInit[j++] = (unsigned char)x; - }else if( x<=0xfff ){ + }else if( x<=0x7ff ){ pRe->zInit[j++] = (unsigned char)(0xc0 | (x>>6)); pRe->zInit[j++] = 0x80 | (x&0x3f); }else if( x<=0xffff ){ |