aboutsummaryrefslogtreecommitdiff
path: root/ext/misc/normalize.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/misc/normalize.c')
-rw-r--r--ext/misc/normalize.c67
1 files changed, 62 insertions, 5 deletions
diff --git a/ext/misc/normalize.c b/ext/misc/normalize.c
index 914a154d9..fe5fb01b4 100644
--- a/ext/misc/normalize.c
+++ b/ext/misc/normalize.c
@@ -35,11 +35,19 @@
**
** The purpose of normalization is two-fold:
**
-** (1) Sanitize queries by removing possibly sensitive information contained
-** in literals.
+** (1) Sanitize queries by removing potentially private or sensitive
+** information contained in literals.
**
** (2) Identify structurally identical queries by comparing their
** normalized forms.
+**
+** Command-Line Utility
+** --------------------
+**
+** This file also contains code for a command-line utility that converts
+** SQL queries in text files into their normalized forms. To build the
+** command-line program, compile this file with -DSQLITE_NORMALIZE_CLI
+** and link it against the SQLite library.
*/
#include <sqlite3.h>
#include <string.h>
@@ -48,9 +56,13 @@
** Implementation note:
**
** Much of the tokenizer logic is copied out of the tokenize.c source file
-** of SQLite. This logic could be simplified for this particular application,
+** of SQLite. That logic could be simplified for this particular application,
** but that would impose a risk of introducing subtle errors. It is best to
** keep the code as close to the original as possible.
+**
+** The tokenize code is in sync with the SQLite core as of 2018-01-08.
+** Any future changes to the core tokenizer might require corresponding
+** adjustments to the tokenizer logic in this module.
*/
@@ -572,13 +584,54 @@ char *sqlite3_normalize(const char *zSql){
while( j>0 && z[j-1]==' ' ){ j--; }
if( i>0 && z[j-1]!=';' ){ z[j++] = ';'; }
z[j] = 0;
+
+ /* Make a second pass converting "in(...)" where the "..." is not a
+ ** SELECT statement into "in(?,?,?)" */
+ for(i=0; i<j; i=n){
+ char *zIn = strstr(z+i, "in(");
+ int nParen;
+ if( zIn==0 ) break;
+ n = (int)(zIn-z)+3; /* Index of first char past "in(" */
+ if( n && IdChar(zIn[-1]) ) continue;
+ if( strncmp(zIn, "in(select",9)==0 && !IdChar(zIn[9]) ) continue;
+ if( strncmp(zIn, "in(with",7)==0 && !IdChar(zIn[7]) ) continue;
+ for(nParen=1, k=0; z[n+k]; k++){
+ if( z[n+k]=='(' ) nParen++;
+ if( z[n+k]==')' ){
+ nParen--;
+ if( nParen==0 ) break;
+ }
+ }
+ /* k is the number of bytes in the "..." within "in(...)" */
+ if( k<5 ){
+ z = sqlite3_realloc64(z, j+(5-k)+1);
+ if( z==0 ) return 0;
+ memmove(z+n+5, z+n+k, j-(n+k));
+ }else if( k>5 ){
+ memmove(z+n+5, z+n+k, j-(n+k));
+ }
+ j = j-k+5;
+ z[j] = 0;
+ memcpy(z+n, "?,?,?", 5);
+ }
return z;
}
-#ifdef NORMALIZE_TEST
+/*
+** For testing purposes, or to build a stand-alone SQL normalizer program,
+** compile this one source file with the -DSQLITE_NORMALIZE_CLI and link
+** it against any SQLite library. The resulting command-line program will
+** run sqlite3_normalize() over the text of all files named on the command-
+** line and show the result on standard output.
+*/
+#ifdef SQLITE_NORMALIZE_CLI
#include <stdio.h>
#include <stdlib.h>
+/*
+** Break zIn up into separate SQL statements and run sqlite3_normalize()
+** on each one. Print the result of each run.
+*/
static void normalizeFile(char *zIn){
int i;
if( zIn==0 ) return;
@@ -604,6 +657,10 @@ static void normalizeFile(char *zIn){
}
}
+/*
+** The main routine for "sql_normalize". Read files named on the
+** command-line and run the text of each through sqlite3_normalize().
+*/
int main(int argc, char **argv){
int i;
FILE *in;
@@ -636,4 +693,4 @@ int main(int argc, char **argv){
}
sqlite3_free(zBuf);
}
-#endif /* NORMALIZE_TEST */
+#endif /* SQLITE_NORMALIZE_CLI */