aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/ruleutils.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/ruleutils.c')
-rw-r--r--src/backend/utils/adt/ruleutils.c188
1 files changed, 165 insertions, 23 deletions
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index ee1b7f3dc94..badbf111ee0 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -224,6 +224,10 @@ typedef struct
* of aliases to columns of the right input. Thus, positions in the printable
* column alias list are not necessarily one-for-one with varattnos of the
* JOIN, so we need a separate new_colnames[] array for printing purposes.
+ *
+ * Finally, when dealing with wide tables we risk O(N^2) costs in assigning
+ * non-duplicate column names. We ameliorate that by using a hash table that
+ * holds all the strings appearing in colnames, new_colnames, and parentUsing.
*/
typedef struct
{
@@ -291,6 +295,15 @@ typedef struct
int *leftattnos; /* left-child varattnos of join cols, or 0 */
int *rightattnos; /* right-child varattnos of join cols, or 0 */
List *usingNames; /* names assigned to merged columns */
+
+ /*
+ * Hash table holding copies of all the strings appearing in this struct's
+ * colnames, new_colnames, and parentUsing. We use a hash table only for
+ * sufficiently wide relations, and only during the colname-assignment
+ * functions set_relation_column_names and set_join_column_names;
+ * otherwise, names_hash is NULL.
+ */
+ HTAB *names_hash; /* entries are just strings */
} deparse_columns;
/* This macro is analogous to rt_fetch(), but for deparse_columns structs */
@@ -376,6 +389,9 @@ static bool colname_is_unique(const char *colname, deparse_namespace *dpns,
static char *make_colname_unique(char *colname, deparse_namespace *dpns,
deparse_columns *colinfo);
static void expand_colnames_array_to(deparse_columns *colinfo, int n);
+static void build_colinfo_names_hash(deparse_columns *colinfo);
+static void add_to_names_hash(deparse_columns *colinfo, const char *name);
+static void destroy_colinfo_names_hash(deparse_columns *colinfo);
static void identify_join_columns(JoinExpr *j, RangeTblEntry *jrte,
deparse_columns *colinfo);
static char *get_rtable_name(int rtindex, deparse_context *context);
@@ -4133,6 +4149,10 @@ has_dangerous_join_using(deparse_namespace *dpns, Node *jtnode)
*
* parentUsing is a list of all USING aliases assigned in parent joins of
* the current jointree node. (The passed-in list must not be modified.)
+ *
+ * Note that we do not use per-deparse_columns hash tables in this function.
+ * The number of names that need to be assigned should be small enough that
+ * we don't need to trouble with that.
*/
static void
set_using_names(deparse_namespace *dpns, Node *jtnode, List *parentUsing)
@@ -4408,6 +4428,9 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
colinfo->new_colnames = (char **) palloc(ncolumns * sizeof(char *));
colinfo->is_new_col = (bool *) palloc(ncolumns * sizeof(bool));
+ /* If the RTE is wide enough, use a hash table to avoid O(N^2) costs */
+ build_colinfo_names_hash(colinfo);
+
/*
* Scan the columns, select a unique alias for each one, and store it in
* colinfo->colnames and colinfo->new_colnames. The former array has NULL
@@ -4443,6 +4466,7 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
colname = make_colname_unique(colname, dpns, colinfo);
colinfo->colnames[i] = colname;
+ add_to_names_hash(colinfo, colname);
}
/* Put names of non-dropped columns in new_colnames[] too */
@@ -4456,6 +4480,9 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
changed_any = true;
}
+ /* We're now done needing the colinfo's names_hash */
+ destroy_colinfo_names_hash(colinfo);
+
/*
* Set correct length for new_colnames[] array. (Note: if columns have
* been added, colinfo->num_cols includes them, which is not really quite
@@ -4526,6 +4553,9 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
expand_colnames_array_to(colinfo, noldcolumns);
Assert(colinfo->num_cols == noldcolumns);
+ /* If the RTE is wide enough, use a hash table to avoid O(N^2) costs */
+ build_colinfo_names_hash(colinfo);
+
/*
* Scan the join output columns, select an alias for each one, and store
* it in colinfo->colnames. If there are USING columns, set_using_names()
@@ -4563,6 +4593,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
if (rte->alias == NULL)
{
colinfo->colnames[i] = real_colname;
+ add_to_names_hash(colinfo, real_colname);
continue;
}
@@ -4579,6 +4610,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
colname = make_colname_unique(colname, dpns, colinfo);
colinfo->colnames[i] = colname;
+ add_to_names_hash(colinfo, colname);
}
/* Remember if any assigned aliases differ from "real" name */
@@ -4677,6 +4709,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
}
else
colinfo->new_colnames[j] = child_colname;
+ add_to_names_hash(colinfo, colinfo->new_colnames[j]);
}
colinfo->is_new_col[j] = leftcolinfo->is_new_col[jc];
@@ -4726,6 +4759,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
}
else
colinfo->new_colnames[j] = child_colname;
+ add_to_names_hash(colinfo, colinfo->new_colnames[j]);
}
colinfo->is_new_col[j] = rightcolinfo->is_new_col[jc];
@@ -4740,6 +4774,9 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
Assert(j == nnewcolumns);
#endif
+ /* We're now done needing the colinfo's names_hash */
+ destroy_colinfo_names_hash(colinfo);
+
/*
* For a named join, print column aliases if we changed any from the child
* names. Unnamed joins cannot print aliases.
@@ -4762,38 +4799,59 @@ colname_is_unique(const char *colname, deparse_namespace *dpns,
int i;
ListCell *lc;
- /* Check against already-assigned column aliases within RTE */
- for (i = 0; i < colinfo->num_cols; i++)
- {
- char *oldname = colinfo->colnames[i];
-
- if (oldname && strcmp(oldname, colname) == 0)
- return false;
- }
-
/*
- * If we're building a new_colnames array, check that too (this will be
- * partially but not completely redundant with the previous checks)
+ * If we have a hash table, consult that instead of linearly scanning the
+ * colinfo's strings.
*/
- for (i = 0; i < colinfo->num_new_cols; i++)
+ if (colinfo->names_hash)
{
- char *oldname = colinfo->new_colnames[i];
-
- if (oldname && strcmp(oldname, colname) == 0)
+ if (hash_search(colinfo->names_hash,
+ colname,
+ HASH_FIND,
+ NULL) != NULL)
return false;
}
-
- /* Also check against USING-column names that must be globally unique */
- foreach(lc, dpns->using_names)
+ else
{
- char *oldname = (char *) lfirst(lc);
+ /* Check against already-assigned column aliases within RTE */
+ for (i = 0; i < colinfo->num_cols; i++)
+ {
+ char *oldname = colinfo->colnames[i];
- if (strcmp(oldname, colname) == 0)
- return false;
+ if (oldname && strcmp(oldname, colname) == 0)
+ return false;
+ }
+
+ /*
+ * If we're building a new_colnames array, check that too (this will
+ * be partially but not completely redundant with the previous checks)
+ */
+ for (i = 0; i < colinfo->num_new_cols; i++)
+ {
+ char *oldname = colinfo->new_colnames[i];
+
+ if (oldname && strcmp(oldname, colname) == 0)
+ return false;
+ }
+
+ /*
+ * Also check against names already assigned for parent-join USING
+ * cols
+ */
+ foreach(lc, colinfo->parentUsing)
+ {
+ char *oldname = (char *) lfirst(lc);
+
+ if (strcmp(oldname, colname) == 0)
+ return false;
+ }
}
- /* Also check against names already assigned for parent-join USING cols */
- foreach(lc, colinfo->parentUsing)
+ /*
+ * Also check against USING-column names that must be globally unique.
+ * These are not hashed, but there should be few of them.
+ */
+ foreach(lc, dpns->using_names)
{
char *oldname = (char *) lfirst(lc);
@@ -4862,6 +4920,90 @@ expand_colnames_array_to(deparse_columns *colinfo, int n)
}
/*
+ * build_colinfo_names_hash: optionally construct a hash table for colinfo
+ */
+static void
+build_colinfo_names_hash(deparse_columns *colinfo)
+{
+ HASHCTL hash_ctl;
+ int i;
+ ListCell *lc;
+
+ /*
+ * Use a hash table only for RTEs with at least 32 columns. (The cutoff
+ * is somewhat arbitrary, but let's choose it so that this code does get
+ * exercised in the regression tests.)
+ */
+ if (colinfo->num_cols < 32)
+ return;
+
+ /*
+ * Set up the hash table. The entries are just strings with no other
+ * payload.
+ */
+ hash_ctl.keysize = NAMEDATALEN;
+ hash_ctl.entrysize = NAMEDATALEN;
+ hash_ctl.hcxt = CurrentMemoryContext;
+ colinfo->names_hash = hash_create("deparse_columns names",
+ colinfo->num_cols + colinfo->num_new_cols,
+ &hash_ctl,
+ HASH_ELEM | HASH_STRINGS | HASH_CONTEXT);
+
+ /*
+ * Preload the hash table with any names already present (these would have
+ * come from set_using_names).
+ */
+ for (i = 0; i < colinfo->num_cols; i++)
+ {
+ char *oldname = colinfo->colnames[i];
+
+ if (oldname)
+ add_to_names_hash(colinfo, oldname);
+ }
+
+ for (i = 0; i < colinfo->num_new_cols; i++)
+ {
+ char *oldname = colinfo->new_colnames[i];
+
+ if (oldname)
+ add_to_names_hash(colinfo, oldname);
+ }
+
+ foreach(lc, colinfo->parentUsing)
+ {
+ char *oldname = (char *) lfirst(lc);
+
+ add_to_names_hash(colinfo, oldname);
+ }
+}
+
+/*
+ * add_to_names_hash: add a string to the names_hash, if we're using one
+ */
+static void
+add_to_names_hash(deparse_columns *colinfo, const char *name)
+{
+ if (colinfo->names_hash)
+ (void) hash_search(colinfo->names_hash,
+ name,
+ HASH_ENTER,
+ NULL);
+}
+
+/*
+ * destroy_colinfo_names_hash: destroy hash table when done with it
+ */
+static void
+destroy_colinfo_names_hash(deparse_columns *colinfo)
+{
+ if (colinfo->names_hash)
+ {
+ hash_destroy(colinfo->names_hash);
+ colinfo->names_hash = NULL;
+ }
+}
+
+/*
* identify_join_columns: figure out where columns of a join come from
*
* Fills the join-specific fields of the colinfo struct, except for