aboutsummaryrefslogtreecommitdiff
path: root/src/include/regex/regguts.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/regex/regguts.h')
-rw-r--r--src/include/regex/regguts.h24
1 files changed, 21 insertions, 3 deletions
diff --git a/src/include/regex/regguts.h b/src/include/regex/regguts.h
index fb6789b560f..d420ea8316e 100644
--- a/src/include/regex/regguts.h
+++ b/src/include/regex/regguts.h
@@ -372,10 +372,28 @@ struct cnfa
/*
* subexpression tree
+ *
+ * "op" is one of:
+ * '=' plain regex without interesting substructure (implemented as DFA)
+ * 'b' back-reference (has no substructure either)
+ * '(' capture node: captures the match of its single child
+ * '.' concatenation: matches a match for left, then a match for right
+ * '|' alternation: matches a match for left or a match for right
+ * '*' iteration: matches some number of matches of its single child
+ *
+ * Note: the right child of an alternation must be another alternation or
+ * NULL; hence, an N-way branch requires N alternation nodes, not N-1 as you
+ * might expect. This could stand to be changed. Actually I'd rather see
+ * a single alternation node with N children, but that will take revising
+ * the representation of struct subre.
+ *
+ * Note: when a backref is directly quantified, we stick the min/max counts
+ * into the backref rather than plastering an iteration node on top. This is
+ * for efficiency: there is no need to search for possible division points.
*/
struct subre
{
- char op; /* '|', '.' (concat), 'b' (backref), '(', '=' */
+ char op; /* see type codes above */
char flags;
#define LONGER 01 /* prefers longer match */
#define SHORTER 02 /* prefers shorter match */
@@ -393,8 +411,8 @@ struct subre
#define COMBINE(f1, f2) (UP((f1)|(f2)) | PREF2(f1, f2))
short retry; /* index into retry memory */
int subno; /* subexpression number (for 'b' and '(') */
- short min; /* min repetitions, for backref only */
- short max; /* max repetitions, for backref only */
+ short min; /* min repetitions for iteration or backref */
+ short max; /* max repetitions for iteration or backref */
struct subre *left; /* left child, if any (also freelist chain) */
struct subre *right; /* right child, if any */
struct state *begin; /* outarcs from here... */