aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2010-09-24 14:35:01 -0400
committerRobert Haas <rhaas@postgresql.org>2010-09-24 14:36:39 -0400
commit12679b8bc908f941710bed185aa142ad5de539c6 (patch)
tree262014b8fc82b9fd602361fb8af6fd322f0b294d
parent54c88dee46ae63d1f183ed864b624881ed05d370 (diff)
downloadpostgresql-12679b8bc908f941710bed185aa142ad5de539c6.tar.gz
postgresql-12679b8bc908f941710bed185aa142ad5de539c6.zip
In levenshtein_internal(), describe algorithm a bit more clearly.
-rw-r--r--contrib/fuzzystrmatch/fuzzystrmatch.c24
1 files changed, 17 insertions, 7 deletions
diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.c b/contrib/fuzzystrmatch/fuzzystrmatch.c
index 63921d92717..01084da4072 100644
--- a/contrib/fuzzystrmatch/fuzzystrmatch.c
+++ b/contrib/fuzzystrmatch/fuzzystrmatch.c
@@ -277,15 +277,25 @@ levenshtein_internal(text *s, text *t,
++n;
/*
- * Instead of building an (m+1)x(n+1) array, we'll use two different
- * arrays of size m+1 for storing accumulated values. At each step one
- * represents the "previous" row and one is the "current" row of the
- * notional large array.
+ * One way to compute Levenshtein distance is to incrementally construct
+ * an (m+1)x(n+1) matrix where cell (i, j) represents the minimum number
+ * of operations required to transform the first i characters of s into
+ * the first j characters of t. The last column of the final row is the
+ * answer.
+ *
+ * We use that algorithm here with some modification. In lieu of holding
+ * the entire array in memory at once, we'll just use two arrays of size
+ * m+1 for storing accumulated values. At each step one array represents
+ * the "previous" row and one is the "current" row of the notional large
+ * array.
*/
prev = (int *) palloc(2 * m * sizeof(int));
curr = prev + m;
- /* Initialize the "previous" row to 0..cols */
+ /*
+ * To transform the first i characters of s into the first 0 characters
+ * of t, we must perform i deletions.
+ */
for (i = 0; i < m; i++)
prev[i] = i * del_c;
@@ -297,8 +307,8 @@ levenshtein_internal(text *s, text *t,
int y_char_len = n != t_bytes + 1 ? pg_mblen(y) : 1;
/*
- * First cell must increment sequentially, as we're on the j'th row of
- * the (m+1)x(n+1) array.
+ * To transform the first 0 characters of s into the first j
+ * characters of t, we must perform j insertions.
*/
curr[0] = j * ins_c;