From 91ec00c25acd25bd9a6bc2345436370d23723653 Mon Sep 17 00:00:00 2001 From: drh <> Date: Wed, 6 Dec 2023 14:50:48 +0000 Subject: Increased rigor in comparisons between object labels in JSON. FossilOrigin-Name: 2bc86d145fccc07107b7753cb1a69122676d4096fe59c454497bd81a6142d45e --- src/json.c | 311 +++++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 218 insertions(+), 93 deletions(-) (limited to 'src/json.c') diff --git a/src/json.c b/src/json.c index 8165eb4f3..37a0ebfe1 100644 --- a/src/json.c +++ b/src/json.c @@ -2119,6 +2119,188 @@ static void jsonBlobEdit( if( nIns && aIns ) memcpy(&pParse->aBlob[iDel], aIns, nIns); } +/* +** Return the number of escaped newlines to be ignored. +** An escaped newline is a one of the following byte sequences: +** +** 0x5c 0x0a +** 0x5c 0x0d +** 0x5c 0x0d 0x0a +** 0x5c 0xe2 0x80 0xa8 +** 0x5c 0xe2 0x80 0xa9 +*/ +static u32 jsonBytesToBypass(const char *z, u32 n){ + u32 i = 0; + while( i+10 ); + assert( z[0]=='\\' ); + if( n<2 ){ + *piOut = 0xFFFD; + return n; + } + switch( (u8)z[1] ){ + case 'u': { + u32 v, vlo; + if( n<6 ){ + *piOut = 0xFFFD; + return n; + } + v = jsonHexToInt4(&z[2]); + if( (v & 0xfc00)==0xd800 + && n>=12 + && z[6]=='\\' + && z[7]=='u' + && ((vlo = jsonHexToInt4(&z[8]))&0xfc00)==0xdc00 + ){ + *piOut = ((v&0x3ff)<<10) + (vlo&0x3ff) + 0x10000; + return 12; + }else{ + *piOut = v; + return 6; + } + } + case 'b': { *piOut = '\b'; return 2; } + case 'f': { *piOut = '\f'; return 2; } + case 'n': { *piOut = '\n'; return 2; } + case 'r': { *piOut = '\r'; return 2; } + case 't': { *piOut = '\t'; return 2; } + case 'v': { *piOut = '\v'; return 2; } + case '0': { *piOut = 0; return 2; } + case '\'': + case '"': + case '/': + case '\\':{ *piOut = z[1]; return 2; } + case 'x': { + if( n<4 ){ + *piOut = 0xFFFD; + return n; + } + *piOut = (jsonHexToInt(z[2])<<4) | jsonHexToInt(z[3]); + return 4; + } + case 0xe2: + case '\r': + case '\n': { + u32 nSkip = jsonBytesToBypass(z, n); + if( nSkip==0 ){ + *piOut = 0xFFFD; + return n; + }else if( nSkip==n ){ + *piOut = 0; + return n; + }else if( z[nSkip]=='\\' ){ + return nSkip + jsonUnescapeOneChar(&z[nSkip], n-nSkip, piOut); + }else{ + *piOut = z[nSkip]; + return nSkip+1; + } + } + default: { + *piOut = 0xFFFD; + return 2; + } + } +} + + +/* +** Compare two object labels. Return 1 if they are equal and +** 0 if they differ. +** +** In this version, we know that one or the other or both of the +** two comparands contains an escape sequence. +*/ +static SQLITE_NOINLINE int jsonLabelCompareEscaped( + const char *zLeft, /* The left label */ + u32 nLeft, /* Size of the left label in bytes */ + int rawLeft, /* True if zLeft contains no escapes */ + const char *zRight, /* The right label */ + u32 nRight, /* Size of the right label in bytes */ + int rawRight /* True if zRight is escape-free */ +){ + u32 cLeft, cRight; + assert( rawLeft==0 || rawRight==0 ); + while( nLeft>0 && nRight>0 ){ + if( rawLeft || zLeft[0]!='\\' ){ + cLeft = ((u8*)zLeft)[0]; + zLeft++; + nLeft--; + }else{ + u32 n = jsonUnescapeOneChar(zLeft, nLeft, &cLeft); + zLeft += n; + assert( n<=nLeft ); + nLeft -= n; + } + if( rawRight || zRight[0]!='\\' ){ + cRight = ((u8*)zRight)[0]; + zRight++; + nRight--; + }else{ + u32 n = jsonUnescapeOneChar(zRight, nRight, &cRight); + zRight += n; + assert( n<=nRight ); + nRight -= n; + } + if( cLeft!=cRight ) return 0; + } + return nLeft==0 && nRight==0; +} + +/* +** Compare two object labels. Return 1 if they are equal and +** 0 if they differ. Return -1 if an OOM occurs. +*/ +static int jsonLabelCompare( + const char *zLeft, /* The left label */ + u32 nLeft, /* Size of the left label in bytes */ + int rawLeft, /* True if zLeft contains no escapes */ + const char *zRight, /* The right label */ + u32 nRight, /* Size of the right label in bytes */ + int rawRight /* True if zRight is escape-free */ +){ + if( rawLeft && rawRight ){ + /* Simpliest case: Neither label contains escapes. A simple + ** memcmp() is sufficient. */ + if( nLeft!=nRight ) return 0; + return memcmp(zLeft, zRight, nLeft)==0; + }else{ + return jsonLabelCompareEscaped(zLeft, nLeft, rawLeft, + zRight, nRight, rawRight); + } +} + /* ** Error returns from jsonLookupStep() */ @@ -2224,6 +2406,7 @@ static u32 jsonLookupStep( return iRoot; } if( zPath[0]=='.' ){ + int rawKey = 1; x = pParse->aBlob[iRoot]; zPath++; if( zPath[0]=='"' ){ @@ -2236,6 +2419,7 @@ static u32 jsonLookupStep( return JSON_LOOKUP_PATHERROR; } testcase( nKey==0 ); + rawKey = memchr(zKey, '\\', nKey)==0; }else{ zKey = zPath; for(i=0; zPath[i] && zPath[i]!='.' && zPath[i]!='['; i++){} @@ -2249,13 +2433,17 @@ static u32 jsonLookupStep( j = iRoot + n; /* j is the index of a label */ iEnd = j+sz; while( jaBlob[j] & 0x0f; if( xJSONB_TEXTRAW ) return JSON_LOOKUP_ERROR; n = jsonbPayloadSize(pParse, j, &sz); if( n==0 ) return JSON_LOOKUP_ERROR; k = j+n; /* k is the index of the label text */ if( k+sz>=iEnd ) return JSON_LOOKUP_ERROR; - if( sz==nKey && memcmp(&pParse->aBlob[k], zKey, nKey)==0 ){ + zLabel = (const char*)&pParse->aBlob[k]; + rawLabel = x==JSONB_TEXT || x==JSONB_TEXTRAW; + if( jsonLabelCompare(zKey, nKey, rawKey, zLabel, sz, rawLabel) ){ u32 v = k+sz; /* v is the index of the value */ if( ((pParse->aBlob[v])&0x0f)>JSONB_OBJECT ) return JSON_LOOKUP_ERROR; n = jsonbPayloadSize(pParse, v, &sz); @@ -2279,7 +2467,7 @@ static u32 jsonLookupStep( testcase( pParse->eEdit==JEDIT_INS ); testcase( pParse->eEdit==JEDIT_SET ); memset(&ix, 0, sizeof(ix)); - jsonBlobAppendNode(&ix,JSONB_TEXTRAW, nKey, 0); + jsonBlobAppendNode(&ix, rawKey?JSONB_TEXTRAW:JSONB_TEXT5, nKey, 0); pParse->oom |= ix.oom; rc = jsonCreateEditSubstructure(pParse, &v, &zPath[i]); if( !JSON_LOOKUP_ISERROR(rc) @@ -2483,72 +2671,27 @@ static void jsonReturnFromBlob( for(iIn=iOut=0; iIn>6)); - zOut[iOut++] = 0x80 | (v&0x3f); - }else{ - u32 vlo; - if( (v&0xfc00)==0xd800 - && iIn>18); - zOut[iOut++] = 0x80 | ((v>>12)&0x3f); - zOut[iOut++] = 0x80 | ((v>>6)&0x3f); - zOut[iOut++] = 0x80 | (v&0x3f); - }else{ - zOut[iOut++] = 0xe0 | (v>>12); - zOut[iOut++] = 0x80 | ((v>>6)&0x3f); - zOut[iOut++] = 0x80 | (v&0x3f); - } - } - continue; - }else if( c=='b' ){ - c = '\b'; - }else if( c=='f' ){ - c = '\f'; - }else if( c=='n' ){ - c = '\n'; - }else if( c=='r' ){ - c = '\r'; - }else if( c=='t' ){ - c = '\t'; - }else if( c=='v' ){ - c = '\v'; - }else if( c=='\'' || c=='"' || c=='/' || c=='\\' ){ - /* pass through unchanged */ - }else if( c=='0' ){ - c = 0; - }else if( c=='x' ){ - c = (jsonHexToInt(z[iIn+1])<<4) | jsonHexToInt(z[iIn+2]); - iIn += 2; - }else if( c=='\r' && z[i+1]=='\n' ){ - iIn++; - continue; - }else if( 0xe2==(u8)c - && iIn>6)); + zOut[iOut++] = 0x80 | (v&0x3f); + }else if( v<0x10000 ){ + zOut[iOut++] = 0xe0 | (v>>12); + zOut[iOut++] = 0x80 | ((v>>6)&0x3f); + zOut[iOut++] = 0x80 | (v&0x3f); }else{ - continue; + zOut[iOut++] = 0xf0 | (v>>18); + zOut[iOut++] = 0x80 | ((v>>12)&0x3f); + zOut[iOut++] = 0x80 | ((v>>6)&0x3f); + zOut[iOut++] = 0x80 | (v&0x3f); } - } /* end if( c=='\\' ) */ - zOut[iOut++] = c; + iIn += szEscape - 1; + }else{ + zOut[iOut++] = c; + } } /* end for() */ zOut[iOut] = 0; sqlite3_result_text(pCtx, zOut, iOut, sqlite3_free); @@ -3384,6 +3527,7 @@ static int jsonMergePatch( iTCursor = iTStart; iTEnd = iTEndBE + pTarget->delta; while( iTCursoraBlob[iTCursor] & 0x0f; if( eTLabelJSONB_TEXTRAW ){ @@ -3396,33 +3540,14 @@ static int jsonMergePatch( nTValue = jsonbPayloadSize(pTarget, iTValue, &szTValue); if( nTValue==0 ) return JSON_MERGE_BADTARGET; if( iTValue + nTValue + szTValue > iTEnd ) return JSON_MERGE_BADTARGET; - if( eTLabel==ePLabel ){ - /* Common case */ - if( szTLabel==szPLabel - && memcmp(&pTarget->aBlob[iTLabel+nTLabel], - &pPatch->aBlob[iPLabel+nPLabel], szTLabel)==0 - ){ - break; /* Labels match. */ - } - }else{ - /* Should rarely happen */ - JsonString s1, s2; - int isEqual, isOom; - jsonStringInit(&s1, 0); - jsonXlateBlobToText(pTarget, iTLabel, &s1); - jsonStringInit(&s2, 0); - jsonXlateBlobToText(pPatch, iPLabel, &s2); - isOom = s1.eErr || s2.eErr; - if( s1.nUsed==s2.nUsed && memcmp(s1.zBuf, s2.zBuf, s1.nUsed)==0 ){ - isEqual = 1; - }else{ - isEqual = 0; - } - jsonStringReset(&s1); - jsonStringReset(&s2); - if( isOom ) return JSON_MERGE_OOM; - if( isEqual ) break; - } + isEqual = jsonLabelCompare( + (const char*)&pPatch->aBlob[iPLabel+nPLabel], + szPLabel, + (ePLabel==JSONB_TEXT || ePLabel==JSONB_TEXTRAW), + (const char*)&pTarget->aBlob[iTLabel+nTLabel], + szTLabel, + (eTLabel==JSONB_TEXT || eTLabel==JSONB_TEXTRAW)); + if( isEqual ) break; iTCursor = iTValue + nTValue + szTValue; } x = pPatch->aBlob[iPValue] & 0x0f; -- cgit v1.2.3 From 6a8581d828e2c6b940cdf19868c309dde4322ea9 Mon Sep 17 00:00:00 2001 From: drh <> Date: Wed, 6 Dec 2023 15:35:38 +0000 Subject: The rule for the RHS of the ->> and -> operators when the RHS does not begin with $ is that it must be (1) all digits, or (2) all alphanumerics, or (3) contained within [..] or else it will become a quoted label. FossilOrigin-Name: 0e059a546ec11fa5c6d007bd65c249ee2422f1facbdb2792c53e0bc0ccc97e14 --- src/json.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'src/json.c') diff --git a/src/json.c b/src/json.c index 37a0ebfe1..44abe2d16 100644 --- a/src/json.c +++ b/src/json.c @@ -3272,6 +3272,20 @@ static void jsonArrayLengthFunc( jsonParseFree(p); } +/* True if the string is all digits */ +static int jsonAllDigits(const char *z, int n){ + int i; + for(i=0; i"(JSON,PATH) @@ -3329,15 +3343,19 @@ static void jsonExtractFunc( ** [NUMBER] ==> $[NUMBER] // Not PG. Purely for convenience */ jsonStringInit(&jx, ctx); - if( sqlite3Isdigit(zPath[0]) ){ + if( jsonAllDigits(zPath, nPath) ){ jsonAppendRawNZ(&jx, "[", 1); jsonAppendRaw(&jx, zPath, nPath); jsonAppendRawNZ(&jx, "]", 2); - }else if( zPath[0]!='[' ){ + }else if( jsonAllAlphanum(zPath, nPath) ){ jsonAppendRawNZ(&jx, ".", 1); jsonAppendRaw(&jx, zPath, nPath); + }else if( zPath[0]=='[' && nPath>=3 && zPath[nPath-1]==']' ){ + jsonAppendRaw(&jx, zPath, nPath); }else{ + jsonAppendRawNZ(&jx, ".\"", 2); jsonAppendRaw(&jx, zPath, nPath); + jsonAppendRawNZ(&jx, "\"", 1); } jsonStringTerminate(&jx); j = jsonLookupStep(p, 0, jx.zBuf, 0); -- cgit v1.2.3 From 9df01b5ccf78535dd44e1f8c0b83fcee40ea5042 Mon Sep 17 00:00:00 2001 From: drh <> Date: Wed, 6 Dec 2023 16:57:18 +0000 Subject: Fix the routine that determines the json_tree.path value for the first row so that it correctly takes into account escape sequences in the path argument. FossilOrigin-Name: b9243ee8a37c62eb8848e765bd4af83bc1b3d3eb24fb4268a1357ad1f8b2e1fb --- src/json.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'src/json.c') diff --git a/src/json.c b/src/json.c index 44abe2d16..c9014f74a 100644 --- a/src/json.c +++ b/src/json.c @@ -4489,22 +4489,23 @@ static int jsonEachNext(sqlite3_vtab_cursor *cur){ */ static int jsonEachPathLength(JsonEachCursor *p){ u32 n = p->path.nUsed; + const char *z = p->path.zBuf; if( p->iRowid==0 && p->bRecursive && n>1 ){ - if( p->path.zBuf[n-1]==']' ){ + if( z[n-1]==']' ){ do{ - assert( n>0 ); + assert( n>1 ); n--; - }while( p->path.zBuf[n]!='[' ); + }while( z[n]!='[' ); + }else if( z[n-1]=='"' ){ + do{ + assert( n>1 ); + n--; + }while( z[n]!='.' || z[n+1]!='"' ); }else{ - u32 sz = 0; - jsonbPayloadSize(&p->sParse, p->i, &sz); - if( p->path.zBuf[n-1]=='"' ) sz += 2; - assert( szpath.zBuf[n]!='.' && ALWAYS(n>0) ){ + do{ + assert( n>1 ); n--; - assert( n>0 ); - } + }while( z[n]!='.' ); } } return n; -- cgit v1.2.3