From 6ad0e25fa41bf51f6ee5302ac62842cf5546223d Mon Sep 17 00:00:00 2001 From: drh Date: Thu, 19 Dec 2019 21:11:48 +0000 Subject: Fix the regexp extension so that it correctly translates all over-length 3-byte UTF8 sequences into 0xfffd. FossilOrigin-Name: 3d4c0bf8904135fa68c75801bfa738715cacc3b19dc8ad6ef550b11798d4b121 --- ext/misc/regexp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'ext/misc/regexp.c') diff --git a/ext/misc/regexp.c b/ext/misc/regexp.c index 3359109ab..a97290511 100644 --- a/ext/misc/regexp.c +++ b/ext/misc/regexp.c @@ -156,7 +156,7 @@ static unsigned re_next_char(ReInput *p){ && (p->z[p->i+1]&0xc0)==0x80 ){ c = (c&0x0f)<<12 | ((p->z[p->i]&0x3f)<<6) | (p->z[p->i+1]&0x3f); p->i += 2; - if( c<=0x3ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd; + if( c<=0x7ff || (c>=0xd800 && c<=0xdfff) ) c = 0xfffd; }else if( (c&0xf8)==0xf0 && p->i+3mx && (p->z[p->i]&0xc0)==0x80 && (p->z[p->i+1]&0xc0)==0x80 && (p->z[p->i+2]&0xc0)==0x80 ){ c = (c&0x07)<<18 | ((p->z[p->i]&0x3f)<<12) | ((p->z[p->i+1]&0x3f)<<6) -- cgit v1.2.3