aboutsummaryrefslogtreecommitdiff
path: root/ext/misc/regexp.c
diff options
context:
space:
mode:
authordrh <>2022-07-18 11:44:16 +0000
committerdrh <>2022-07-18 11:44:16 +0000
commit67a0bf383f00d4f25d9ca27864caabf2a0f572b2 (patch)
treea48b6ea1f40b57f292bd61342718f0df340886fc /ext/misc/regexp.c
parentb496eef6903dd6a64a9c22611225717bb75e2371 (diff)
downloadsqlite-67a0bf383f00d4f25d9ca27864caabf2a0f572b2.tar.gz
sqlite-67a0bf383f00d4f25d9ca27864caabf2a0f572b2.zip
Enhance the ext/misc/regexp.c code so that when it is compiled with
SQLITE_DEBUG, a new function named regexp_bytecode() is available that prints out the compiled NFA as human-readable text, for debugging purposes. FossilOrigin-Name: cb5c08978fe8f074e6ae16953575213709e98b8bbae4359e0d2e6de67a7ea9e5
Diffstat (limited to 'ext/misc/regexp.c')
-rw-r--r--ext/misc/regexp.c95
1 files changed, 94 insertions, 1 deletions
diff --git a/ext/misc/regexp.c b/ext/misc/regexp.c
index d5b387205..54c505233 100644
--- a/ext/misc/regexp.c
+++ b/ext/misc/regexp.c
@@ -94,6 +94,31 @@ SQLITE_EXTENSION_INIT1
#define RE_OP_NOTSPACE 16 /* Not a digit */
#define RE_OP_BOUNDARY 17 /* Boundary between word and non-word */
+#if defined(SQLITE_DEBUG)
+/* Opcode names used for symbolic debugging */
+static const char *ReOpName[] = {
+ "EOF",
+ "MATCH",
+ "ANY",
+ "ANYSTAR",
+ "FORK",
+ "GOTO",
+ "ACCEPT",
+ "CC_INC",
+ "CC_EXC",
+ "CC_VALUE",
+ "CC_RANGE",
+ "WORD",
+ "NOTWORD",
+ "DIGIT",
+ "NOTDIGIT",
+ "SPACE",
+ "NOTSPACE",
+ "BOUNDARY",
+};
+#endif /* SQLITE_DEBUG */
+
+
/* Each opcode is a "state" in the NFA */
typedef unsigned short ReStateNumber;
@@ -127,7 +152,7 @@ struct ReCompiled {
int *aArg; /* Arguments to each operator */
unsigned (*xNextChar)(ReInput*); /* Next character function */
unsigned char zInit[12]; /* Initial text to match */
- int nInit; /* Number of characters in zInit */
+ int nInit; /* Number of bytes in zInit */
unsigned nState; /* Number of entries in aOp[] and aArg[] */
unsigned nAlloc; /* Slots allocated for aOp[] and aArg[] */
};
@@ -745,6 +770,67 @@ static void re_sql_func(
}
}
+#if defined(SQLITE_DEBUG)
+/*
+** This function is used for testing and debugging only. It is only available
+** if the SQLITE_DEBUG compile-time option is used.
+**
+** Compile a regular expression and then convert the compiled expression into
+** text and return that text.
+*/
+static void re_bytecode_func(
+ sqlite3_context *context,
+ int argc,
+ sqlite3_value **argv
+){
+ const char *zPattern;
+ const char *zErr;
+ ReCompiled *pRe;
+ sqlite3_str *pStr;
+ int i;
+ int n;
+ char *z;
+
+ zPattern = (const char*)sqlite3_value_text(argv[0]);
+ if( zPattern==0 ) return;
+ zErr = re_compile(&pRe, zPattern, sqlite3_user_data(context)!=0);
+ if( zErr ){
+ re_free(pRe);
+ sqlite3_result_error(context, zErr, -1);
+ return;
+ }
+ if( pRe==0 ){
+ sqlite3_result_error_nomem(context);
+ return;
+ }
+ pStr = sqlite3_str_new(0);
+ if( pStr==0 ) goto re_bytecode_func_err;
+ if( pRe->nInit>0 ){
+ sqlite3_str_appendf(pStr, "INIT ");
+ for(i=0; i<pRe->nInit; i++){
+ sqlite3_str_appendf(pStr, "%02x", pRe->zInit[i]);
+ }
+ sqlite3_str_appendf(pStr, "\n");
+ }
+ for(i=0; i<pRe->nState; i++){
+ sqlite3_str_appendf(pStr, "%-8s %4d\n",
+ ReOpName[(unsigned char)pRe->aOp[i]], pRe->aArg[i]);
+ }
+ n = sqlite3_str_length(pStr);
+ z = sqlite3_str_finish(pStr);
+ if( n==0 ){
+ sqlite3_free(z);
+ }else{
+ sqlite3_result_text(context, z, n-1, sqlite3_free);
+ }
+
+re_bytecode_func_err:
+ re_free(pRe);
+}
+
+#endif /* SQLITE_DEBUG */
+
+
/*
** Invoke this routine to register the regexp() function with the
** SQLite database connection.
@@ -769,6 +855,13 @@ int sqlite3_regexp_init(
rc = sqlite3_create_function(db, "regexpi", 2,
SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC,
(void*)db, re_sql_func, 0, 0);
+#if defined(SQLITE_DEBUG)
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_create_function(db, "regexp_bytecode", 1,
+ SQLITE_UTF8|SQLITE_INNOCUOUS|SQLITE_DETERMINISTIC,
+ 0, re_bytecode_func, 0, 0);
+ }
+#endif /* SQLITE_DEBUG */
}
return rc;
}