aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/parser/parse_expr.c9
-rw-r--r--src/backend/parser/parse_func.c2
-rw-r--r--src/backend/parser/parse_relation.c266
-rw-r--r--src/backend/utils/adt/levenshtein.c9
-rw-r--r--src/include/parser/parse_relation.h21
-rw-r--r--src/test/regress/expected/alter_table.out3
-rw-r--r--src/test/regress/expected/join.out38
-rw-r--r--src/test/regress/sql/join.sql24
8 files changed, 334 insertions, 38 deletions
diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c
index 7829bcbac16..130e52b26c4 100644
--- a/src/backend/parser/parse_expr.c
+++ b/src/backend/parser/parse_expr.c
@@ -556,7 +556,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
colname = strVal(field2);
/* Try to identify as a column of the RTE */
- node = scanRTEForColumn(pstate, rte, colname, cref->location);
+ node = scanRTEForColumn(pstate, rte, colname, cref->location,
+ 0, NULL);
if (node == NULL)
{
/* Try it as a function call on the whole row */
@@ -601,7 +602,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
colname = strVal(field3);
/* Try to identify as a column of the RTE */
- node = scanRTEForColumn(pstate, rte, colname, cref->location);
+ node = scanRTEForColumn(pstate, rte, colname, cref->location,
+ 0, NULL);
if (node == NULL)
{
/* Try it as a function call on the whole row */
@@ -659,7 +661,8 @@ transformColumnRef(ParseState *pstate, ColumnRef *cref)
colname = strVal(field4);
/* Try to identify as a column of the RTE */
- node = scanRTEForColumn(pstate, rte, colname, cref->location);
+ node = scanRTEForColumn(pstate, rte, colname, cref->location,
+ 0, NULL);
if (node == NULL)
{
/* Try it as a function call on the whole row */
diff --git a/src/backend/parser/parse_func.c b/src/backend/parser/parse_func.c
index a20080400e4..53bbaecac39 100644
--- a/src/backend/parser/parse_func.c
+++ b/src/backend/parser/parse_func.c
@@ -1779,7 +1779,7 @@ ParseComplexProjection(ParseState *pstate, char *funcname, Node *first_arg,
((Var *) first_arg)->varno,
((Var *) first_arg)->varlevelsup);
/* Return a Var if funcname matches a column, else NULL */
- return scanRTEForColumn(pstate, rte, funcname, location);
+ return scanRTEForColumn(pstate, rte, funcname, location, 0, NULL);
}
/*
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index f416fc2a45e..80daeb9dc4a 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -33,6 +33,8 @@
#include "utils/syscache.h"
+#define MAX_FUZZY_DISTANCE 3
+
static RangeTblEntry *scanNameSpaceForRefname(ParseState *pstate,
const char *refname, int location);
static RangeTblEntry *scanNameSpaceForRelid(ParseState *pstate, Oid relid,
@@ -520,6 +522,101 @@ GetCTEForRTE(ParseState *pstate, RangeTblEntry *rte, int rtelevelsup)
}
/*
+ * updateFuzzyAttrMatchState
+ * Using Levenshtein distance, consider if column is best fuzzy match.
+ */
+static void
+updateFuzzyAttrMatchState(int fuzzy_rte_penalty,
+ FuzzyAttrMatchState *fuzzystate, RangeTblEntry *rte,
+ const char *actual, const char *match, int attnum)
+{
+ int columndistance;
+ int matchlen;
+
+ /* Bail before computing the Levenshtein distance if there's no hope. */
+ if (fuzzy_rte_penalty > fuzzystate->distance)
+ return;
+
+ /*
+ * Outright reject dropped columns, which can appear here with apparent
+ * empty actual names, per remarks within scanRTEForColumn().
+ */
+ if (actual[0] == '\0')
+ return;
+
+ /* Use Levenshtein to compute match distance. */
+ matchlen = strlen(match);
+ columndistance =
+ varstr_levenshtein_less_equal(actual, strlen(actual), match, matchlen,
+ 1, 1, 1,
+ fuzzystate->distance + 1
+ - fuzzy_rte_penalty);
+
+ /*
+ * If more than half the characters are different, don't treat it as a
+ * match, to avoid making ridiculous suggestions.
+ */
+ if (columndistance > matchlen / 2)
+ return;
+
+ /*
+ * From this point on, we can ignore the distinction between the
+ * RTE-name distance and the column-name distance.
+ */
+ columndistance += fuzzy_rte_penalty;
+
+ /*
+ * If the new distance is less than or equal to that of the best match
+ * found so far, update fuzzystate.
+ */
+ if (columndistance < fuzzystate->distance)
+ {
+ /* Store new lowest observed distance for RTE */
+ fuzzystate->distance = columndistance;
+ fuzzystate->rfirst = rte;
+ fuzzystate->first = attnum;
+ fuzzystate->rsecond = NULL;
+ fuzzystate->second = InvalidAttrNumber;
+ }
+ else if (columndistance == fuzzystate->distance)
+ {
+ /*
+ * This match distance may equal a prior match within this same
+ * range table. When that happens, the prior match may also be
+ * given, but only if there is no more than two equally distant
+ * matches from the RTE (in turn, our caller will only accept
+ * two equally distant matches overall).
+ */
+ if (AttributeNumberIsValid(fuzzystate->second))
+ {
+ /* Too many RTE-level matches */
+ fuzzystate->rfirst = NULL;
+ fuzzystate->first = InvalidAttrNumber;
+ fuzzystate->rsecond = NULL;
+ fuzzystate->second = InvalidAttrNumber;
+ /* Clearly, distance is too low a bar (for *any* RTE) */
+ fuzzystate->distance = columndistance - 1;
+ }
+ else if (AttributeNumberIsValid(fuzzystate->first))
+ {
+ /* Record as provisional second match for RTE */
+ fuzzystate->rsecond = rte;
+ fuzzystate->second = attnum;
+ }
+ else if (fuzzystate->distance <= MAX_FUZZY_DISTANCE)
+ {
+ /*
+ * Record as provisional first match (this can occasionally
+ * occur because previous lowest distance was "too low a
+ * bar", rather than being associated with a real match)
+ */
+ fuzzystate->rfirst = rte;
+ fuzzystate->first = attnum;
+ }
+ }
+}
+
+/*
* scanRTEForColumn
* Search the column names of a single RTE for the given name.
* If found, return an appropriate Var node, else return NULL.
@@ -527,10 +624,14 @@ GetCTEForRTE(ParseState *pstate, RangeTblEntry *rte, int rtelevelsup)
*
* Side effect: if we find a match, mark the RTE as requiring read access
* for the column.
+ *
+ * Additional side effect: if fuzzystate is non-NULL, check non-system columns
+ * for an approximate match and update fuzzystate accordingly.
*/
Node *
scanRTEForColumn(ParseState *pstate, RangeTblEntry *rte, char *colname,
- int location)
+ int location, int fuzzy_rte_penalty,
+ FuzzyAttrMatchState *fuzzystate)
{
Node *result = NULL;
int attnum = 0;
@@ -548,12 +649,16 @@ scanRTEForColumn(ParseState *pstate, RangeTblEntry *rte, char *colname,
* Should this somehow go wrong and we try to access a dropped column,
* we'll still catch it by virtue of the checks in
* get_rte_attribute_type(), which is called by make_var(). That routine
- * has to do a cache lookup anyway, so the check there is cheap.
+ * has to do a cache lookup anyway, so the check there is cheap. Callers
+ * interested in finding match with shortest distance need to defend
+ * against this directly, though.
*/
foreach(c, rte->eref->colnames)
{
+ const char *attcolname = strVal(lfirst(c));
+
attnum++;
- if (strcmp(strVal(lfirst(c)), colname) == 0)
+ if (strcmp(attcolname, colname) == 0)
{
if (result)
ereport(ERROR,
@@ -566,6 +671,11 @@ scanRTEForColumn(ParseState *pstate, RangeTblEntry *rte, char *colname,
markVarForSelectPriv(pstate, var, rte);
result = (Node *) var;
}
+
+ /* Updating fuzzy match state, if provided. */
+ if (fuzzystate != NULL)
+ updateFuzzyAttrMatchState(fuzzy_rte_penalty, fuzzystate,
+ rte, attcolname, colname, attnum);
}
/*
@@ -642,7 +752,8 @@ colNameToVar(ParseState *pstate, char *colname, bool localonly,
continue;
/* use orig_pstate here to get the right sublevels_up */
- newresult = scanRTEForColumn(orig_pstate, rte, colname, location);
+ newresult = scanRTEForColumn(orig_pstate, rte, colname, location,
+ 0, NULL);
if (newresult)
{
@@ -668,8 +779,8 @@ colNameToVar(ParseState *pstate, char *colname, bool localonly,
/*
* searchRangeTableForCol
- * See if any RangeTblEntry could possibly provide the given column name.
- * If so, return a pointer to the RangeTblEntry; else return NULL.
+ * See if any RangeTblEntry could possibly provide the given column name (or
+ * find the best match available). Returns state with relevant details.
*
* This is different from colNameToVar in that it considers every entry in
* the ParseState's rangetable(s), not only those that are currently visible
@@ -677,11 +788,31 @@ colNameToVar(ParseState *pstate, char *colname, bool localonly,
* and it may give ambiguous results (there might be multiple equally valid
* matches, but only one will be returned). This must be used ONLY as a
* heuristic in giving suitable error messages. See errorMissingColumn.
+ *
+ * This function is also different in that it will consider approximate
+ * matches -- if the user entered an alias/column pair that is only slightly
+ * different from a valid pair, we may be able to infer what they meant to
+ * type and provide a reasonable hint.
+ *
+ * The FuzzyAttrMatchState will have 'rfirst' pointing to the best RTE
+ * containing the most promising match for the alias and column name. If
+ * the alias and column names match exactly, 'first' will be InvalidAttrNumber;
+ * otherwise, it will be the attribute number for the match. In the latter
+ * case, 'rsecond' may point to a second, equally close approximate match,
+ * and 'second' will contain the attribute number for the second match.
*/
-static RangeTblEntry *
-searchRangeTableForCol(ParseState *pstate, char *colname, int location)
+static FuzzyAttrMatchState *
+searchRangeTableForCol(ParseState *pstate, const char *alias, char *colname,
+ int location)
{
ParseState *orig_pstate = pstate;
+ FuzzyAttrMatchState *fuzzystate = palloc(sizeof(FuzzyAttrMatchState));
+
+ fuzzystate->distance = MAX_FUZZY_DISTANCE + 1;
+ fuzzystate->rfirst = NULL;
+ fuzzystate->rsecond = NULL;
+ fuzzystate->first = InvalidAttrNumber;
+ fuzzystate->second = InvalidAttrNumber;
while (pstate != NULL)
{
@@ -689,15 +820,51 @@ searchRangeTableForCol(ParseState *pstate, char *colname, int location)
foreach(l, pstate->p_rtable)
{
- RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
+ RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
+ int fuzzy_rte_penalty = 0;
- if (scanRTEForColumn(orig_pstate, rte, colname, location))
- return rte;
+ /*
+ * Typically, it is not useful to look for matches within join
+ * RTEs; they effectively duplicate other RTEs for our purposes,
+ * and if a match is chosen from a join RTE, an unhelpful alias is
+ * displayed in the final diagnostic message.
+ */
+ if (rte->rtekind == RTE_JOIN)
+ continue;
+
+ /*
+ * If the user didn't specify an alias, then matches against one
+ * RTE are as good as another. But if the user did specify an
+ * alias, then we want at least a fuzzy - and preferably an exact
+ * - match for the range table entry.
+ */
+ if (alias != NULL)
+ fuzzy_rte_penalty =
+ varstr_levenshtein(alias, strlen(alias),
+ rte->eref->aliasname,
+ strlen(rte->eref->aliasname),
+ 1, 1, 1);
+
+ /*
+ * Scan for a matching column; if we find an exact match, we're
+ * done. Otherwise, update fuzzystate.
+ */
+ if (scanRTEForColumn(orig_pstate, rte, colname, location,
+ fuzzy_rte_penalty, fuzzystate)
+ && fuzzy_rte_penalty == 0)
+ {
+ fuzzystate->rfirst = rte;
+ fuzzystate->first = InvalidAttrNumber;
+ fuzzystate->rsecond = NULL;
+ fuzzystate->second = InvalidAttrNumber;
+ return fuzzystate;
+ }
}
pstate = pstate->parentParseState;
}
- return NULL;
+
+ return fuzzystate;
}
/*
@@ -2860,34 +3027,67 @@ void
errorMissingColumn(ParseState *pstate,
char *relname, char *colname, int location)
{
- RangeTblEntry *rte;
+ FuzzyAttrMatchState *state;
+ char *closestfirst = NULL;
/*
- * If relname was given, just play dumb and report it. (In practice, a
- * bad qualification name should end up at errorMissingRTE, not here, so
- * no need to work hard on this case.)
+ * Search the entire rtable looking for possible matches. If we find one,
+ * emit a hint about it.
+ *
+ * TODO: improve this code (and also errorMissingRTE) to mention using
+ * LATERAL if appropriate.
*/
- if (relname)
- ereport(ERROR,
- (errcode(ERRCODE_UNDEFINED_COLUMN),
- errmsg("column %s.%s does not exist", relname, colname),
- parser_errposition(pstate, location)));
+ state = searchRangeTableForCol(pstate, relname, colname, location);
/*
- * Otherwise, search the entire rtable looking for possible matches. If
- * we find one, emit a hint about it.
+ * Extract closest col string for best match, if any.
*
- * TODO: improve this code (and also errorMissingRTE) to mention using
- * LATERAL if appropriate.
+ * Infer an exact match referenced despite not being visible from the fact
+ * that an attribute number was not present in state passed back -- this is
+ * what is reported when !closestfirst. There might also be an exact match
+ * that was qualified with an incorrect alias, in which case closestfirst
+ * will be set (so hint is the same as generic fuzzy case).
*/
- rte = searchRangeTableForCol(pstate, colname, location);
-
- ereport(ERROR,
- (errcode(ERRCODE_UNDEFINED_COLUMN),
- errmsg("column \"%s\" does not exist", colname),
- rte ? errhint("There is a column named \"%s\" in table \"%s\", but it cannot be referenced from this part of the query.",
- colname, rte->eref->aliasname) : 0,
- parser_errposition(pstate, location)));
+ if (state->rfirst && AttributeNumberIsValid(state->first))
+ closestfirst = strVal(list_nth(state->rfirst->eref->colnames,
+ state->first - 1));
+
+ if (!state->rsecond)
+ {
+ /*
+ * Handle case where there is zero or one column suggestions to hint,
+ * including exact matches referenced but not visible.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ relname ?
+ errmsg("column %s.%s does not exist", relname, colname):
+ errmsg("column \"%s\" does not exist", colname),
+ state->rfirst ? closestfirst ?
+ errhint("Perhaps you meant to reference the column \"%s\".\"%s\".",
+ state->rfirst->eref->aliasname, closestfirst):
+ errhint("There is a column named \"%s\" in table \"%s\", but it cannot be referenced from this part of the query.",
+ colname, state->rfirst->eref->aliasname): 0,
+ parser_errposition(pstate, location)));
+ }
+ else
+ {
+ /* Handle case where there are two equally useful column hints */
+ char *closestsecond;
+
+ closestsecond = strVal(list_nth(state->rsecond->eref->colnames,
+ state->second - 1));
+
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ relname ?
+ errmsg("column %s.%s does not exist", relname, colname):
+ errmsg("column \"%s\" does not exist", colname),
+ errhint("Perhaps you meant to reference the column \"%s\".\"%s\" or the column \"%s\".\"%s\".",
+ state->rfirst->eref->aliasname, closestfirst,
+ state->rsecond->eref->aliasname, closestsecond),
+ parser_errposition(pstate, location)));
+ }
}
diff --git a/src/backend/utils/adt/levenshtein.c b/src/backend/utils/adt/levenshtein.c
index 3669adc18a0..f6e2ca6452a 100644
--- a/src/backend/utils/adt/levenshtein.c
+++ b/src/backend/utils/adt/levenshtein.c
@@ -95,6 +95,15 @@ varstr_levenshtein(const char *source, int slen, const char *target, int tlen,
#define STOP_COLUMN m
#endif
+ /*
+ * A common use for Levenshtein distance is to match attributes when building
+ * diagnostic, user-visible messages. Restrict the size of
+ * MAX_LEVENSHTEIN_STRLEN at compile time so that this is guaranteed to
+ * work.
+ */
+ StaticAssertStmt(NAMEDATALEN <= MAX_LEVENSHTEIN_STRLEN,
+ "Levenshtein hinting mechanism restricts NAMEDATALEN");
+
m = pg_mbstrlen_with_len(source, slen);
n = pg_mbstrlen_with_len(target, tlen);
diff --git a/src/include/parser/parse_relation.h b/src/include/parser/parse_relation.h
index fc0a2577f31..9dc0d5846b0 100644
--- a/src/include/parser/parse_relation.h
+++ b/src/include/parser/parse_relation.h
@@ -16,6 +16,24 @@
#include "parser/parse_node.h"
+
+/*
+ * Support for fuzzily matching column.
+ *
+ * This is for building diagnostic messages, where non-exact matching
+ * attributes are suggested to the user. The struct's fields may be facets of
+ * a particular RTE, or of an entire range table, depending on context.
+ */
+typedef struct
+{
+ int distance; /* Weighted distance (lowest so far) */
+ RangeTblEntry *rfirst; /* RTE of first */
+ AttrNumber first; /* Closest attribute so far */
+ RangeTblEntry *rsecond; /* RTE of second */
+ AttrNumber second; /* Second closest attribute so far */
+} FuzzyAttrMatchState;
+
+
extern RangeTblEntry *refnameRangeTblEntry(ParseState *pstate,
const char *schemaname,
const char *refname,
@@ -35,7 +53,8 @@ extern RangeTblEntry *GetRTEByRangeTablePosn(ParseState *pstate,
extern CommonTableExpr *GetCTEForRTE(ParseState *pstate, RangeTblEntry *rte,
int rtelevelsup);
extern Node *scanRTEForColumn(ParseState *pstate, RangeTblEntry *rte,
- char *colname, int location);
+ char *colname, int location,
+ int fuzzy_rte_penalty, FuzzyAttrMatchState *fuzzystate);
extern Node *colNameToVar(ParseState *pstate, char *colname, bool localonly,
int location);
extern void markVarForSelectPriv(ParseState *pstate, Var *var,
diff --git a/src/test/regress/expected/alter_table.out b/src/test/regress/expected/alter_table.out
index d2337108715..51db1b67927 100644
--- a/src/test/regress/expected/alter_table.out
+++ b/src/test/regress/expected/alter_table.out
@@ -536,6 +536,7 @@ create table atacc1 ( test int );
-- add a check constraint (fails)
alter table atacc1 add constraint atacc_test1 check (test1>3);
ERROR: column "test1" does not exist
+HINT: Perhaps you meant to reference the column "atacc1"."test".
drop table atacc1;
-- something a little more complicated
create table atacc1 ( test int, test2 int, test3 int);
@@ -1342,6 +1343,7 @@ select f1 from c1;
ERROR: column "f1" does not exist
LINE 1: select f1 from c1;
^
+HINT: Perhaps you meant to reference the column "c1"."f2".
drop table p1 cascade;
NOTICE: drop cascades to table c1
create table p1 (f1 int, f2 int);
@@ -1355,6 +1357,7 @@ select f1 from c1;
ERROR: column "f1" does not exist
LINE 1: select f1 from c1;
^
+HINT: Perhaps you meant to reference the column "c1"."f2".
drop table p1 cascade;
NOTICE: drop cascades to table c1
create table p1 (f1 int, f2 int);
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index ca3a17b283e..1e3fe073504 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -2222,6 +2222,12 @@ select * from t1 left join t2 on (t1.a = t2.a);
200 | 1000 | 200 | 2001
(5 rows)
+-- Test matching of column name with wrong alias
+select t1.x from t1 join t3 on (t1.a = t3.x);
+ERROR: column t1.x does not exist
+LINE 1: select t1.x from t1 join t3 on (t1.a = t3.x);
+ ^
+HINT: Perhaps you meant to reference the column "t3"."x".
--
-- regression test for 8.1 merge right join bug
--
@@ -3434,6 +3440,38 @@ select * from
(0 rows)
--
+-- Test hints given on incorrect column references are useful
+--
+select t1.uunique1 from
+ tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t1" suggestipn
+ERROR: column t1.uunique1 does not exist
+LINE 1: select t1.uunique1 from
+ ^
+HINT: Perhaps you meant to reference the column "t1"."unique1".
+select t2.uunique1 from
+ tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t2" suggestion
+ERROR: column t2.uunique1 does not exist
+LINE 1: select t2.uunique1 from
+ ^
+HINT: Perhaps you meant to reference the column "t2"."unique1".
+select uunique1 from
+ tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, suggest both at once
+ERROR: column "uunique1" does not exist
+LINE 1: select uunique1 from
+ ^
+HINT: Perhaps you meant to reference the column "t1"."unique1" or the column "t2"."unique1".
+--
+-- Take care to reference the correct RTE
+--
+select atts.relid::regclass, s.* from pg_stats s join
+ pg_attribute a on s.attname = a.attname and s.tablename =
+ a.attrelid::regclass::text join (select unnest(indkey) attnum,
+ indexrelid from pg_index i) atts on atts.attnum = a.attnum where
+ schemaname != 'pg_catalog';
+ERROR: column atts.relid does not exist
+LINE 1: select atts.relid::regclass, s.* from pg_stats s join
+ ^
+--
-- Test LATERAL
--
select unique2, x.*
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 6005476c162..7a08bdff7bf 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -397,6 +397,10 @@ insert into t2a values (200, 2001);
select * from t1 left join t2 on (t1.a = t2.a);
+-- Test matching of column name with wrong alias
+
+select t1.x from t1 join t3 on (t1.a = t3.x);
+
--
-- regression test for 8.1 merge right join bug
--
@@ -1060,6 +1064,26 @@ select * from
int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on q1 = f1; -- ok
--
+-- Test hints given on incorrect column references are useful
+--
+
+select t1.uunique1 from
+ tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t1" suggestipn
+select t2.uunique1 from
+ tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, prefer "t2" suggestion
+select uunique1 from
+ tenk1 t1 join tenk2 t2 on t1.two = t2.two; -- error, suggest both at once
+
+--
+-- Take care to reference the correct RTE
+--
+
+select atts.relid::regclass, s.* from pg_stats s join
+ pg_attribute a on s.attname = a.attname and s.tablename =
+ a.attrelid::regclass::text join (select unnest(indkey) attnum,
+ indexrelid from pg_index i) atts on atts.attnum = a.attnum where
+ schemaname != 'pg_catalog';
+--
-- Test LATERAL
--