1 files changed, 165 insertions, 23 deletions
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index ee1b7f3dc94..badbf111ee0 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -224,6 +224,10 @@ typedef struct
  * of aliases to columns of the right input.  Thus, positions in the printable
  * column alias list are not necessarily one-for-one with varattnos of the
  * JOIN, so we need a separate new_colnames[] array for printing purposes.
+ *
+ * Finally, when dealing with wide tables we risk O(N^2) costs in assigning
+ * non-duplicate column names.  We ameliorate that by using a hash table that
+ * holds all the strings appearing in colnames, new_colnames, and parentUsing.
  */
 typedef struct
 {
@@ -291,6 +295,15 @@ typedef struct
 	int		   *leftattnos;		/* left-child varattnos of join cols, or 0 */
 	int		   *rightattnos;	/* right-child varattnos of join cols, or 0 */
 	List	   *usingNames;		/* names assigned to merged columns */
+
+	/*
+	 * Hash table holding copies of all the strings appearing in this struct's
+	 * colnames, new_colnames, and parentUsing.  We use a hash table only for
+	 * sufficiently wide relations, and only during the colname-assignment
+	 * functions set_relation_column_names and set_join_column_names;
+	 * otherwise, names_hash is NULL.
+	 */
+	HTAB	   *names_hash;		/* entries are just strings */
 } deparse_columns;
 
 /* This macro is analogous to rt_fetch(), but for deparse_columns structs */
@@ -376,6 +389,9 @@ static bool colname_is_unique(const char *colname, deparse_namespace *dpns,
 static char *make_colname_unique(char *colname, deparse_namespace *dpns,
 								 deparse_columns *colinfo);
 static void expand_colnames_array_to(deparse_columns *colinfo, int n);
+static void build_colinfo_names_hash(deparse_columns *colinfo);
+static void add_to_names_hash(deparse_columns *colinfo, const char *name);
+static void destroy_colinfo_names_hash(deparse_columns *colinfo);
 static void identify_join_columns(JoinExpr *j, RangeTblEntry *jrte,
 								  deparse_columns *colinfo);
 static char *get_rtable_name(int rtindex, deparse_context *context);
@@ -4133,6 +4149,10 @@ has_dangerous_join_using(deparse_namespace *dpns, Node *jtnode)
  *
  * parentUsing is a list of all USING aliases assigned in parent joins of
  * the current jointree node.  (The passed-in list must not be modified.)
+ *
+ * Note that we do not use per-deparse_columns hash tables in this function.
+ * The number of names that need to be assigned should be small enough that
+ * we don't need to trouble with that.
  */
 static void
 set_using_names(deparse_namespace *dpns, Node *jtnode, List *parentUsing)
@@ -4408,6 +4428,9 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
 	colinfo->new_colnames = (char **) palloc(ncolumns * sizeof(char *));
 	colinfo->is_new_col = (bool *) palloc(ncolumns * sizeof(bool));
 
+	/* If the RTE is wide enough, use a hash table to avoid O(N^2) costs */
+	build_colinfo_names_hash(colinfo);
+
 	/*
 	 * Scan the columns, select a unique alias for each one, and store it in
 	 * colinfo->colnames and colinfo->new_colnames.  The former array has NULL
@@ -4443,6 +4466,7 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
 			colname = make_colname_unique(colname, dpns, colinfo);
 
 			colinfo->colnames[i] = colname;
+			add_to_names_hash(colinfo, colname);
 		}
 
 		/* Put names of non-dropped columns in new_colnames[] too */
@@ -4456,6 +4480,9 @@ set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
 			changed_any = true;
 	}
 
+	/* We're now done needing the colinfo's names_hash */
+	destroy_colinfo_names_hash(colinfo);
+
 	/*
 	 * Set correct length for new_colnames[] array.  (Note: if columns have
 	 * been added, colinfo->num_cols includes them, which is not really quite
@@ -4526,6 +4553,9 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
 	expand_colnames_array_to(colinfo, noldcolumns);
 	Assert(colinfo->num_cols == noldcolumns);
 
+	/* If the RTE is wide enough, use a hash table to avoid O(N^2) costs */
+	build_colinfo_names_hash(colinfo);
+
 	/*
 	 * Scan the join output columns, select an alias for each one, and store
 	 * it in colinfo->colnames.  If there are USING columns, set_using_names()
@@ -4563,6 +4593,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
 		if (rte->alias == NULL)
 		{
 			colinfo->colnames[i] = real_colname;
+			add_to_names_hash(colinfo, real_colname);
 			continue;
 		}
 
@@ -4579,6 +4610,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
 			colname = make_colname_unique(colname, dpns, colinfo);
 
 			colinfo->colnames[i] = colname;
+			add_to_names_hash(colinfo, colname);
 		}
 
 		/* Remember if any assigned aliases differ from "real" name */
@@ -4677,6 +4709,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
 			}
 			else
 				colinfo->new_colnames[j] = child_colname;
+			add_to_names_hash(colinfo, colinfo->new_colnames[j]);
 		}
 
 		colinfo->is_new_col[j] = leftcolinfo->is_new_col[jc];
@@ -4726,6 +4759,7 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
 			}
 			else
 				colinfo->new_colnames[j] = child_colname;
+			add_to_names_hash(colinfo, colinfo->new_colnames[j]);
 		}
 
 		colinfo->is_new_col[j] = rightcolinfo->is_new_col[jc];
@@ -4740,6 +4774,9 @@ set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
 	Assert(j == nnewcolumns);
 #endif
 
+	/* We're now done needing the colinfo's names_hash */
+	destroy_colinfo_names_hash(colinfo);
+
 	/*
 	 * For a named join, print column aliases if we changed any from the child
 	 * names.  Unnamed joins cannot print aliases.
@@ -4762,38 +4799,59 @@ colname_is_unique(const char *colname, deparse_namespace *dpns,
 	int			i;
 	ListCell   *lc;
 
-	/* Check against already-assigned column aliases within RTE */
-	for (i = 0; i < colinfo->num_cols; i++)
-	{
-		char	   *oldname = colinfo->colnames[i];
-
-		if (oldname && strcmp(oldname, colname) == 0)
-			return false;
-	}
-
 	/*
-	 * If we're building a new_colnames array, check that too (this will be
-	 * partially but not completely redundant with the previous checks)
+	 * If we have a hash table, consult that instead of linearly scanning the
+	 * colinfo's strings.
 	 */
-	for (i = 0; i < colinfo->num_new_cols; i++)
+	if (colinfo->names_hash)
 	{
-		char	   *oldname = colinfo->new_colnames[i];
-
-		if (oldname && strcmp(oldname, colname) == 0)
+		if (hash_search(colinfo->names_hash,
+						colname,
+						HASH_FIND,
+						NULL) != NULL)
 			return false;
 	}
-
-	/* Also check against USING-column names that must be globally unique */
-	foreach(lc, dpns->using_names)
+	else
 	{
-		char	   *oldname = (char *) lfirst(lc);
+		/* Check against already-assigned column aliases within RTE */
+		for (i = 0; i < colinfo->num_cols; i++)
+		{
+			char	   *oldname = colinfo->colnames[i];
 
-		if (strcmp(oldname, colname) == 0)
-			return false;
+			if (oldname && strcmp(oldname, colname) == 0)
+				return false;
+		}
+
+		/*
+		 * If we're building a new_colnames array, check that too (this will
+		 * be partially but not completely redundant with the previous checks)
+		 */
+		for (i = 0; i < colinfo->num_new_cols; i++)
+		{
+			char	   *oldname = colinfo->new_colnames[i];
+
+			if (oldname && strcmp(oldname, colname) == 0)
+				return false;
+		}
+
+		/*
+		 * Also check against names already assigned for parent-join USING
+		 * cols
+		 */
+		foreach(lc, colinfo->parentUsing)
+		{
+			char	   *oldname = (char *) lfirst(lc);
+
+			if (strcmp(oldname, colname) == 0)
+				return false;
+		}
 	}
 
-	/* Also check against names already assigned for parent-join USING cols */
-	foreach(lc, colinfo->parentUsing)
+	/*
+	 * Also check against USING-column names that must be globally unique.
+	 * These are not hashed, but there should be few of them.
+	 */
+	foreach(lc, dpns->using_names)
 	{
 		char	   *oldname = (char *) lfirst(lc);
 
@@ -4862,6 +4920,90 @@ expand_colnames_array_to(deparse_columns *colinfo, int n)
 }
 
 /*
+ * build_colinfo_names_hash: optionally construct a hash table for colinfo
+ */
+static void
+build_colinfo_names_hash(deparse_columns *colinfo)
+{
+	HASHCTL		hash_ctl;
+	int			i;
+	ListCell   *lc;
+
+	/*
+	 * Use a hash table only for RTEs with at least 32 columns.  (The cutoff
+	 * is somewhat arbitrary, but let's choose it so that this code does get
+	 * exercised in the regression tests.)
+	 */
+	if (colinfo->num_cols < 32)
+		return;
+
+	/*
+	 * Set up the hash table.  The entries are just strings with no other
+	 * payload.
+	 */
+	hash_ctl.keysize = NAMEDATALEN;
+	hash_ctl.entrysize = NAMEDATALEN;
+	hash_ctl.hcxt = CurrentMemoryContext;
+	colinfo->names_hash = hash_create("deparse_columns names",
+									  colinfo->num_cols + colinfo->num_new_cols,
+									  &hash_ctl,
+									  HASH_ELEM | HASH_STRINGS | HASH_CONTEXT);
+
+	/*
+	 * Preload the hash table with any names already present (these would have
+	 * come from set_using_names).
+	 */
+	for (i = 0; i < colinfo->num_cols; i++)
+	{
+		char	   *oldname = colinfo->colnames[i];
+
+		if (oldname)
+			add_to_names_hash(colinfo, oldname);
+	}
+
+	for (i = 0; i < colinfo->num_new_cols; i++)
+	{
+		char	   *oldname = colinfo->new_colnames[i];
+
+		if (oldname)
+			add_to_names_hash(colinfo, oldname);
+	}
+
+	foreach(lc, colinfo->parentUsing)
+	{
+		char	   *oldname = (char *) lfirst(lc);
+
+		add_to_names_hash(colinfo, oldname);
+	}
+}
+
+/*
+ * add_to_names_hash: add a string to the names_hash, if we're using one
+ */
+static void
+add_to_names_hash(deparse_columns *colinfo, const char *name)
+{
+	if (colinfo->names_hash)
+		(void) hash_search(colinfo->names_hash,
+						   name,
+						   HASH_ENTER,
+						   NULL);
+}
+
+/*
+ * destroy_colinfo_names_hash: destroy hash table when done with it
+ */
+static void
+destroy_colinfo_names_hash(deparse_columns *colinfo)
+{
+	if (colinfo->names_hash)
+	{
+		hash_destroy(colinfo->names_hash);
+		colinfo->names_hash = NULL;
+	}
+}
+
+/*
  * identify_join_columns: figure out where columns of a join come from
  *
  * Fills the join-specific fields of the colinfo struct, except for