7 files changed, 1362 insertions, 899 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 52c30f7d01d..572ef00d2e8 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.58 2000/02/07 04:40:59 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.59 2000/02/15 20:49:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -100,7 +100,7 @@ set_base_rel_pathlist(Query *root)
 		/*
 		 * Generate paths and add them to the rel's pathlist.
 		 *
-		 * add_path/add_pathlist will discard any paths that are dominated
+		 * Note: add_path() will discard any paths that are dominated
 		 * by another available path, keeping only those paths that are
 		 * superior along at least one dimension of cost or sortedness.
 		 */
@@ -109,24 +109,21 @@ set_base_rel_pathlist(Query *root)
 		add_path(rel, create_seqscan_path(rel));
 
 		/* Consider TID scans */
-		add_pathlist(rel, create_tidscan_paths(root, rel));
+		create_tidscan_paths(root, rel);
 
 		/* Consider index paths for both simple and OR index clauses */
-		add_pathlist(rel, create_index_paths(root,
-											 rel,
-											 indices,
-											 rel->baserestrictinfo,
-											 rel->joininfo));
+		create_index_paths(root, rel, indices,
+						   rel->baserestrictinfo,
+						   rel->joininfo);
 
 		/* Note: create_or_index_paths depends on create_index_paths
 		 * to have marked OR restriction clauses with relevant indices;
-		 * this is why it doesn't need to be given the full list of indices.
+		 * this is why it doesn't need to be given the list of indices.
 		 */
-		add_pathlist(rel, create_or_index_paths(root, rel,
-												rel->baserestrictinfo));
+		create_or_index_paths(root, rel, rel->baserestrictinfo);
 
 		/* Now find the cheapest of the paths for this rel */
-		set_cheapest(rel, rel->pathlist);
+		set_cheapest(rel);
 	}
 }
 
@@ -196,8 +193,8 @@ make_one_rel_by_joins(Query *root, int levels_needed)
 				xfunc_trypullup(rel);
 #endif
 
-			/* Find and save the cheapest path for this rel */
-			set_cheapest(rel, rel->pathlist);
+			/* Find and save the cheapest paths for this rel */
+			set_cheapest(rel);
 
 #ifdef OPTIMIZER_DEBUG
 			debug_print_rel(root, rel);
@@ -279,15 +276,26 @@ print_path(Query *root, Path *path, int indent)
 	if (join)
 	{
 		jp = (JoinPath *) path;
-		printf("%s rows=%.0f cost=%f\n",
-			   ptype, path->parent->rows, path->path_cost);
+
+		printf("%s rows=%.0f cost=%.2f..%.2f\n",
+			   ptype, path->parent->rows,
+			   path->startup_cost, path->total_cost);
+
+		if (path->pathkeys)
+		{
+			for (i = 0; i < indent; i++)
+				printf("\t");
+			printf("  pathkeys=");
+			print_pathkeys(path->pathkeys, root->rtable);
+		}
+
 		switch (nodeTag(path))
 		{
 			case T_MergePath:
 			case T_HashPath:
-				for (i = 0; i < indent + 1; i++)
+				for (i = 0; i < indent; i++)
 					printf("\t");
-				printf("   clauses=(");
+				printf("  clauses=(");
 				print_joinclauses(root, jp->joinrestrictinfo);
 				printf(")\n");
 
@@ -297,9 +305,9 @@ print_path(Query *root, Path *path, int indent)
 
 					if (mp->outersortkeys || mp->innersortkeys)
 					{
-						for (i = 0; i < indent + 1; i++)
+						for (i = 0; i < indent; i++)
 							printf("\t");
-						printf("   sortouter=%d sortinner=%d\n",
+						printf("  sortouter=%d sortinner=%d\n",
 							   ((mp->outersortkeys) ? 1 : 0),
 							   ((mp->innersortkeys) ? 1 : 0));
 					}
@@ -315,11 +323,14 @@ print_path(Query *root, Path *path, int indent)
 	{
 		int			relid = lfirsti(path->parent->relids);
 
-		printf("%s(%d) rows=%.0f cost=%f\n",
-			   ptype, relid, path->parent->rows, path->path_cost);
+		printf("%s(%d) rows=%.0f cost=%.2f..%.2f\n",
+			   ptype, relid, path->parent->rows,
+			   path->startup_cost, path->total_cost);
 
-		if (IsA(path, IndexPath))
+		if (path->pathkeys)
 		{
+			for (i = 0; i < indent; i++)
+				printf("\t");
 			printf("  pathkeys=");
 			print_pathkeys(path->pathkeys, root->rtable);
 		}
@@ -339,8 +350,10 @@ debug_print_rel(Query *root, RelOptInfo *rel)
 	printf("\tpath list:\n");
 	foreach(l, rel->pathlist)
 		print_path(root, lfirst(l), 1);
-	printf("\tcheapest path:\n");
-	print_path(root, rel->cheapestpath, 1);
+	printf("\tcheapest startup path:\n");
+	print_path(root, rel->cheapest_startup_path, 1);
+	printf("\tcheapest total path:\n");
+	print_path(root, rel->cheapest_total_path, 1);
 }
 
 #endif	 /* OPTIMIZER_DEBUG */
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 7c8d4b63c07..c14692d5b97 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3,23 +3,46 @@
  * costsize.c
  *	  Routines to compute (and set) relation sizes and path costs
  *
- * Path costs are measured in units of disk accesses: one page fetch
- * has cost 1.  The other primitive unit is the CPU time required to
- * process one tuple, which we set at "cpu_page_weight" of a page
- * fetch.  Obviously, the CPU time per tuple depends on the query
- * involved, but the relative CPU and disk speeds of a given platform
- * are so variable that we are lucky if we can get useful numbers
- * at all.  cpu_page_weight is user-settable, in case a particular
- * user is clueful enough to have a better-than-default estimate
- * of the ratio for his platform.  There is also cpu_index_page_weight,
- * the cost to process a tuple of an index during an index scan.
+ * Path costs are measured in units of disk accesses: one sequential page
+ * fetch has cost 1.  All else is scaled relative to a page fetch, using
+ * the scaling parameters
+ *
+ *	random_page_cost	Cost of a non-sequential page fetch
+ *	cpu_tuple_cost		Cost of typical CPU time to process a tuple
+ *	cpu_index_tuple_cost  Cost of typical CPU time to process an index tuple
+ *	cpu_operator_cost	Cost of CPU time to process a typical WHERE operator
+ *
+ * We also use a rough estimate "effective_cache_size" of the number of
+ * disk pages in Postgres + OS-level disk cache.  (We can't simply use
+ * NBuffers for this purpose because that would ignore the effects of
+ * the kernel's disk cache.)
+ *
+ * Obviously, taking constants for these values is an oversimplification,
+ * but it's tough enough to get any useful estimates even at this level of
+ * detail.  Note that all of these parameters are user-settable, in case
+ * the default values are drastically off for a particular platform.
+ *
+ * We compute two separate costs for each path:
+ *		total_cost: total estimated cost to fetch all tuples
+ *		startup_cost: cost that is expended before first tuple is fetched
+ * In some scenarios, such as when there is a LIMIT or we are implementing
+ * an EXISTS(...) sub-select, it is not necessary to fetch all tuples of the
+ * path's result.  A caller can estimate the cost of fetching a partial
+ * result by interpolating between startup_cost and total_cost.  In detail:
+ *		actual_cost = startup_cost +
+ *			(total_cost - startup_cost) * tuples_to_fetch / path->parent->rows;
+ * Note that a relation's rows count (and, by extension, a Plan's plan_rows)
+ * are set without regard to any LIMIT, so that this equation works properly.
+ * (Also, these routines guarantee not to set the rows count to zero, so there
+ * will be no zero divide.)  RelOptInfos, Paths, and Plans themselves never
+ * account for LIMIT.
  *
  * 
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.51 2000/02/07 04:40:59 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.52 2000/02/15 20:49:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -27,26 +50,25 @@
 #include "postgres.h"
 
 #include <math.h>
-#ifdef HAVE_LIMITS_H
-#include <limits.h>
-#ifndef MAXINT
-#define MAXINT		  INT_MAX
-#endif
-#else
-#ifdef HAVE_VALUES_H
-#include <values.h>
-#endif
-#endif
 
 #include "miscadmin.h"
+#include "nodes/plannodes.h"
+#include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/internal.h"
 #include "optimizer/tlist.h"
 #include "utils/lsyscache.h"
 
 
-Cost		cpu_page_weight = CPU_PAGE_WEIGHT;
-Cost		cpu_index_page_weight = CPU_INDEX_PAGE_WEIGHT;
+#define LOG2(x)  (log(x) / 0.693147180559945)
+#define LOG6(x)  (log(x) / 1.79175946922805)
+
+
+double		effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
+Cost		random_page_cost = DEFAULT_RANDOM_PAGE_COST;
+Cost		cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
+Cost		cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
+Cost		cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
 
 Cost		disable_cost = 100000000.0;
 
@@ -59,53 +81,114 @@ bool		enable_mergejoin = true;
 bool		enable_hashjoin = true;
 
 
+static bool cost_qual_eval_walker(Node *node, Cost *total);
 static void set_rel_width(Query *root, RelOptInfo *rel);
 static int	compute_attribute_width(TargetEntry *tlistentry);
 static double relation_byte_size(double tuples, int width);
 static double page_size(double tuples, int width);
-static double base_log(double x, double b);
 
 
 /*
  * cost_seqscan
  *	  Determines and returns the cost of scanning a relation sequentially.
- *	  If the relation is a temporary to be materialized from a query
- *	  embedded within a data field (determined by 'relid' containing an
- *	  attribute reference), then a predetermined constant is returned (we
- *	  have NO IDEA how big the result of a POSTQUEL procedure is going to
- *	  be).
- *
- *		disk = p
- *		cpu = CPU-PAGE-WEIGHT * t
+ *
+ * If the relation is a temporary to be materialized from a query
+ * embedded within a data field (determined by 'relid' containing an
+ * attribute reference), then a predetermined constant is returned (we
+ * have NO IDEA how big the result of a POSTQUEL procedure is going to be).
+ *
+ * Note: for historical reasons, this routine and the others in this module
+ * use the passed result Path only to store their startup_cost and total_cost
+ * results into.  All the input data they need is passed as separate
+ * parameters, even though much of it could be extracted from the result Path.
  */
-Cost
-cost_seqscan(RelOptInfo *baserel)
+void
+cost_seqscan(Path *path, RelOptInfo *baserel)
 {
-	Cost		temp = 0;
+	Cost		startup_cost = 0;
+	Cost		run_cost = 0;
+	Cost		cpu_per_tuple;
 
 	/* Should only be applied to base relations */
 	Assert(length(baserel->relids) == 1);
 
 	if (!enable_seqscan)
-		temp += disable_cost;
+		startup_cost += disable_cost;
 
+	/* disk costs */
 	if (lfirsti(baserel->relids) < 0)
 	{
 		/*
 		 * cost of sequentially scanning a materialized temporary relation
 		 */
-		temp += _NONAME_SCAN_COST_;
+		run_cost += _NONAME_SCAN_COST_;
 	}
 	else
 	{
-		temp += baserel->pages;
-		temp += cpu_page_weight * baserel->tuples;
+		/*
+		 * The cost of reading a page sequentially is 1.0, by definition.
+		 * Note that the Unix kernel will typically do some amount of
+		 * read-ahead optimization, so that this cost is less than the true
+		 * cost of reading a page from disk.  We ignore that issue here,
+		 * but must take it into account when estimating the cost of
+		 * non-sequential accesses!
+		 */
+		run_cost += baserel->pages;	/* sequential fetches with cost 1.0 */
 	}
 
-	Assert(temp >= 0);
-	return temp;
+	/* CPU costs */
+	cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost;
+	run_cost += cpu_per_tuple * baserel->tuples;
+
+	path->startup_cost = startup_cost;
+	path->total_cost = startup_cost + run_cost;
 }
 
+/*
+ * cost_nonsequential_access
+ *	  Estimate the cost of accessing one page at random from a relation
+ *	  (or sort temp file) of the given size in pages.
+ *
+ * The simplistic model that the cost is random_page_cost is what we want
+ * to use for large relations; but for small ones that is a serious
+ * overestimate because of the effects of caching.  This routine tries to
+ * account for that.
+ *
+ * Unfortunately we don't have any good way of estimating the effective cache
+ * size we are working with --- we know that Postgres itself has NBuffers
+ * internal buffers, but the size of the kernel's disk cache is uncertain,
+ * and how much of it we get to use is even less certain.  We punt the problem
+ * for now by assuming we are given an effective_cache_size parameter.
+ *
+ * Given a guesstimated cache size, we estimate the actual I/O cost per page
+ * with the entirely ad-hoc equations:
+ *	for rel_size <= effective_cache_size:
+ *		1 + (random_page_cost/2-1) * (rel_size/effective_cache_size) ** 2
+ *	for rel_size >= effective_cache_size:
+ *		random_page_cost * (1 - (effective_cache_size/rel_size)/2)
+ * These give the right asymptotic behavior (=> 1.0 as rel_size becomes
+ * small, => random_page_cost as it becomes large) and meet in the middle
+ * with the estimate that the cache is about 50% effective for a relation
+ * of the same size as effective_cache_size.  (XXX this is probably all
+ * wrong, but I haven't been able to find any theory about how effective
+ * a disk cache should be presumed to be.)
+ */
+static Cost
+cost_nonsequential_access(double relpages)
+{
+	double		relsize;
+
+	/* don't crash on bad input data */
+	if (relpages <= 0.0 || effective_cache_size <= 0.0)
+		return random_page_cost;
+
+	relsize = relpages / effective_cache_size;
+
+	if (relsize >= 1.0)
+		return random_page_cost * (1.0 - 0.5 / relsize);
+	else
+		return 1.0 + (random_page_cost * 0.5 - 1.0) * relsize * relsize;
+}
 
 /*
  * cost_index
@@ -126,25 +209,28 @@ cost_seqscan(RelOptInfo *baserel)
  * tuples, but they won't reduce the number of tuples we have to fetch from
  * the table, so they don't reduce the scan cost.
  */
-Cost
-cost_index(Query *root,
+void
+cost_index(Path *path, Query *root,
 		   RelOptInfo *baserel,
 		   IndexOptInfo *index,
 		   List *indexQuals,
 		   bool is_injoin)
 {
-	Cost		temp = 0;
-	Cost		indexAccessCost;
+	Cost		startup_cost = 0;
+	Cost		run_cost = 0;
+	Cost		cpu_per_tuple;
+	Cost		indexStartupCost;
+	Cost		indexTotalCost;
 	Selectivity	indexSelectivity;
-	double		reltuples;
-	double		relpages;
+	double		tuples_fetched;
+	double		pages_fetched;
 
 	/* Should only be applied to base relations */
 	Assert(IsA(baserel, RelOptInfo) && IsA(index, IndexOptInfo));
 	Assert(length(baserel->relids) == 1);
 
 	if (!enable_indexscan && !is_injoin)
-		temp += disable_cost;
+		startup_cost += disable_cost;
 
 	/*
 	 * Call index-access-method-specific code to estimate the processing
@@ -152,31 +238,21 @@ cost_index(Query *root,
 	 * (ie, the fraction of main-table tuples we will have to retrieve).
 	 */
 	fmgr(index->amcostestimate, root, baserel, index, indexQuals,
-		 &indexAccessCost, &indexSelectivity);
+		 &indexStartupCost, &indexTotalCost, &indexSelectivity);
 
 	/* all costs for touching index itself included here */
-	temp += indexAccessCost;
+	startup_cost += indexStartupCost;
+	run_cost += indexTotalCost - indexStartupCost;
 
-	/*--------------------
-	 * Estimate number of main-table tuples and pages touched.
-	 *
-	 * Worst case is that each tuple the index tells us to fetch comes
-	 * from a different base-rel page, in which case the I/O cost would be
-	 * 'reltuples' pages.  In practice we can expect the number of page
-	 * fetches to be reduced by the buffer cache, because more than one
-	 * tuple can be retrieved per page fetched.  Currently, we estimate
-	 * the number of pages to be retrieved as
-	 *			MIN(reltuples, relpages)
-	 * This amounts to assuming that the buffer cache is perfectly efficient
-	 * and never ends up reading the same page twice within one scan, which
-	 * of course is too optimistic.  On the other hand, we are assuming that
-	 * the target tuples are perfectly uniformly distributed across the
-	 * relation's pages, which is too pessimistic --- any nonuniformity of
-	 * distribution will reduce the number of pages we have to fetch.
-	 * So, we guess-and-hope that these sources of error will more or less
-	 * balance out.
+	/*
+	 * Estimate number of main-table tuples and pages fetched.
 	 *
-	 * XXX need to add a penalty for nonsequential page fetches.
+	 * If the number of tuples is much smaller than the number of pages in
+	 * the relation, each tuple will cost a separate nonsequential fetch.
+	 * If it is comparable or larger, then probably we will be able to avoid
+	 * some fetches.  We use a growth rate of log(#tuples/#pages + 1) ---
+	 * probably totally bogus, but intuitively it gives the right shape of
+	 * curve at least.
 	 *
 	 * XXX if the relation has recently been "clustered" using this index,
 	 * then in fact the target tuples will be highly nonuniformly distributed,
@@ -184,54 +260,77 @@ cost_index(Query *root,
 	 * have no way to know whether the relation has been clustered, nor how
 	 * much it's been modified since the last clustering, so we ignore this
 	 * effect.  Would be nice to do better someday.
-	 *--------------------
 	 */
 
-	reltuples = indexSelectivity * baserel->tuples;
+	tuples_fetched = indexSelectivity * baserel->tuples;
 
-	relpages = reltuples;
-	if (baserel->pages > 0 && baserel->pages < relpages)
-		relpages = baserel->pages;
+	if (tuples_fetched > 0 && baserel->pages > 0)
+		pages_fetched = baserel->pages *
+			log(tuples_fetched / baserel->pages + 1.0);
+	else
+		pages_fetched = tuples_fetched;
+
+	/*
+	 * Now estimate one nonsequential access per page fetched,
+	 * plus appropriate CPU costs per tuple.
+	 */
 
 	/* disk costs for main table */
-	temp += relpages;
+	run_cost += pages_fetched * cost_nonsequential_access(baserel->pages);
 
-	/* CPU costs for heap tuples */
-	temp += cpu_page_weight * reltuples;
+	/* CPU costs */
+	cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost;
+	/*
+	 * Assume that the indexquals will be removed from the list of
+	 * restriction clauses that we actually have to evaluate as qpquals.
+	 * This is not completely right, but it's close.
+	 * For a lossy index, however, we will have to recheck all the quals.
+	 */
+	if (! index->lossy)
+		cpu_per_tuple -= cost_qual_eval(indexQuals);
 
-	Assert(temp >= 0);
-	return temp;
+	run_cost += cpu_per_tuple * tuples_fetched;
+
+	path->startup_cost = startup_cost;
+	path->total_cost = startup_cost + run_cost;
 }
 
 /*
  * cost_tidscan
  *	  Determines and returns the cost of scanning a relation using tid-s.
- *
- *		disk = number of tids
- *		cpu = CPU-PAGE-WEIGHT * number_of_tids
  */
-Cost
-cost_tidscan(RelOptInfo *baserel, List *tideval)
+void
+cost_tidscan(Path *path, RelOptInfo *baserel, List *tideval)
 {
-	Cost	temp = 0;
+	Cost		startup_cost = 0;
+	Cost		run_cost = 0;
+	Cost		cpu_per_tuple;
+	int			ntuples = length(tideval);
 
 	if (!enable_tidscan)
-		temp += disable_cost;
+		startup_cost += disable_cost;
 
-	temp += (1.0 + cpu_page_weight) * length(tideval);
+	/* disk costs --- assume each tuple on a different page */
+	run_cost += random_page_cost * ntuples;
 
-	return temp;
+	/* CPU costs */
+	cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost;
+	run_cost += cpu_per_tuple * ntuples;
+
+	path->startup_cost = startup_cost;
+	path->total_cost = startup_cost + run_cost;
 }
  
 /*
  * cost_sort
  *	  Determines and returns the cost of sorting a relation.
  *
+ * The cost of supplying the input data is NOT included; the caller should
+ * add that cost to both startup and total costs returned from this routine!
+ *
  * If the total volume of data to sort is less than SortMem, we will do
  * an in-memory sort, which requires no I/O and about t*log2(t) tuple
- * comparisons for t tuples.  We use cpu_index_page_weight as the cost
- * of a tuple comparison (is this reasonable, or do we need another
- * basic parameter?).
+ * comparisons for t tuples.
  *
  * If the total volume exceeds SortMem, we switch to a tape-style merge
  * algorithm.  There will still be about t*log2(t) tuple comparisons in
@@ -240,8 +339,14 @@ cost_tidscan(RelOptInfo *baserel, List *tideval)
  * number of initial runs formed (log6 because tuplesort.c uses six-tape
  * merging).  Since the average initial run should be about twice SortMem,
  * we have
- *		disk = 2 * p * ceil(log6(p / (2*SortMem)))
- *		cpu = CPU-INDEX-PAGE-WEIGHT * t * log2(t)
+ *		disk traffic = 2 * relsize * ceil(log6(p / (2*SortMem)))
+ *		cpu = comparison_cost * t * log2(t)
+ *
+ * The disk traffic is assumed to be half sequential and half random
+ * accesses (XXX can't we refine that guess?)
+ *
+ * We charge two operator evals per tuple comparison, which should be in
+ * the right ballpark in most cases.
  *
  * 'pathkeys' is a list of sort keys
  * 'tuples' is the number of tuples in the relation
@@ -252,15 +357,16 @@ cost_tidscan(RelOptInfo *baserel, List *tideval)
  * currently do anything with pathkeys anyway, that doesn't matter...
  * but if it ever does, it should react gracefully to lack of key data.
  */
-Cost
-cost_sort(List *pathkeys, double tuples, int width)
+void
+cost_sort(Path *path, List *pathkeys, double tuples, int width)
 {
-	Cost		temp = 0;
+	Cost		startup_cost = 0;
+	Cost		run_cost = 0;
 	double		nbytes = relation_byte_size(tuples, width);
 	long		sortmembytes = SortMem * 1024L;
 
 	if (!enable_sort)
-		temp += disable_cost;
+		startup_cost += disable_cost;
 
 	/*
 	 * We want to be sure the cost of a sort is never estimated as zero,
@@ -270,42 +376,39 @@ cost_sort(List *pathkeys, double tuples, int width)
 	if (tuples < 2.0)
 		tuples = 2.0;
 
-	temp += cpu_index_page_weight * tuples * base_log(tuples, 2.0);
+	/*
+	 * CPU costs
+	 *
+	 * Assume about two operator evals per tuple comparison
+	 * and N log2 N comparisons
+	 */
+	startup_cost += 2.0 * cpu_operator_cost * tuples * LOG2(tuples);
 
+	/* disk costs */
 	if (nbytes > sortmembytes)
 	{
 		double		npages = ceil(nbytes / BLCKSZ);
 		double		nruns = nbytes / (sortmembytes * 2);
-		double		log_runs = ceil(base_log(nruns, 6.0));
+		double		log_runs = ceil(LOG6(nruns));
+		double		npageaccesses;
 
 		if (log_runs < 1.0)
 			log_runs = 1.0;
-		temp += 2 * npages * log_runs;
+		npageaccesses = 2.0 * npages * log_runs;
+		/* Assume half are sequential (cost 1), half are not */
+		startup_cost += npageaccesses *
+			(1.0 + cost_nonsequential_access(npages)) * 0.5;
 	}
 
-	Assert(temp > 0);
-	return temp;
-}
-
-
-/*
- * cost_result
- *	  Determines and returns the cost of writing a relation of 'tuples'
- *	  tuples of 'width' bytes out to a result relation.
- */
-#ifdef NOT_USED
-Cost
-cost_result(double tuples, int width)
-{
-	Cost		temp = 0;
-
-	temp += page_size(tuples, width);
-	temp += cpu_page_weight * tuples;
-	Assert(temp >= 0);
-	return temp;
+	/*
+	 * Note: should we bother to assign a nonzero run_cost to reflect the
+	 * overhead of extracting tuples from the sort result?  Probably not
+	 * worth worrying about.
+	 */
+	path->startup_cost = startup_cost;
+	path->total_cost = startup_cost + run_cost;
 }
 
-#endif
 
 /*
  * cost_nestloop
@@ -314,23 +417,45 @@ cost_result(double tuples, int width)
  *
  * 'outer_path' is the path for the outer relation
  * 'inner_path' is the path for the inner relation
+ * 'restrictlist' are the RestrictInfo nodes to be applied at the join
  * 'is_indexjoin' is true if we are using an indexscan for the inner relation
+ *		(not currently needed here; the indexscan adjusts its cost...)
  */
-Cost
-cost_nestloop(Path *outer_path,
+void
+cost_nestloop(Path *path,
+			  Path *outer_path,
 			  Path *inner_path,
+			  List *restrictlist,
 			  bool is_indexjoin)
 {
-	Cost		temp = 0;
+	Cost		startup_cost = 0;
+	Cost		run_cost = 0;
+	Cost		cpu_per_tuple;
+	double		ntuples;
 
 	if (!enable_nestloop)
-		temp += disable_cost;
+		startup_cost += disable_cost;
+
+	/* cost of source data */
+	/*
+	 * NOTE: we assume that the inner path's startup_cost is paid once, not
+	 * over again on each restart.  This is certainly correct if the inner
+	 * path is materialized.  Are there any cases where it is wrong?
+	 */
+	startup_cost += outer_path->startup_cost + inner_path->startup_cost;
+	run_cost += outer_path->total_cost - outer_path->startup_cost;
+	run_cost += outer_path->parent->rows *
+		(inner_path->total_cost - inner_path->startup_cost);
 
-	temp += outer_path->path_cost;
-	temp += outer_path->parent->rows * inner_path->path_cost;
+	/* number of tuples processed (not number emitted!) */
+	ntuples = outer_path->parent->rows * inner_path->parent->rows;
 
-	Assert(temp >= 0);
-	return temp;
+	/* CPU costs */
+	cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist);
+	run_cost += cpu_per_tuple * ntuples;
+
+	path->startup_cost = startup_cost;
+	path->total_cost = startup_cost + run_cost;
 }
 
 /*
@@ -340,33 +465,66 @@ cost_nestloop(Path *outer_path,
  *
  * 'outer_path' is the path for the outer relation
  * 'inner_path' is the path for the inner relation
+ * 'restrictlist' are the RestrictInfo nodes to be applied at the join
  * 'outersortkeys' and 'innersortkeys' are lists of the keys to be used
  *				to sort the outer and inner relations, or NIL if no explicit
  *				sort is needed because the source path is already ordered
  */
-Cost
-cost_mergejoin(Path *outer_path,
+void
+cost_mergejoin(Path *path,
+			   Path *outer_path,
 			   Path *inner_path,
+			   List *restrictlist,
 			   List *outersortkeys,
 			   List *innersortkeys)
 {
-	Cost		temp = 0;
+	Cost		startup_cost = 0;
+	Cost		run_cost = 0;
+	Cost		cpu_per_tuple;
+	double		ntuples;
+	Path		sort_path;		/* dummy for result of cost_sort */
 
 	if (!enable_mergejoin)
-		temp += disable_cost;
+		startup_cost += disable_cost;
 
 	/* cost of source data */
-	temp += outer_path->path_cost + inner_path->path_cost;
-
-	if (outersortkeys)			/* do we need to sort? */
-		temp += cost_sort(outersortkeys,
-						  outer_path->parent->rows,
-						  outer_path->parent->width);
+	/*
+	 * Note we are assuming that each source tuple is fetched just once,
+	 * which is not right in the presence of equal keys.  If we had a way of
+	 * estimating the proportion of equal keys, we could apply a correction
+	 * factor...
+	 */
+	if (outersortkeys)			/* do we need to sort outer? */
+	{
+		startup_cost += outer_path->total_cost;
+		cost_sort(&sort_path,
+				  outersortkeys,
+				  outer_path->parent->rows,
+				  outer_path->parent->width);
+		startup_cost += sort_path.startup_cost;
+		run_cost += sort_path.total_cost - sort_path.startup_cost;
+	}
+	else
+	{
+		startup_cost += outer_path->startup_cost;
+		run_cost += outer_path->total_cost - outer_path->startup_cost;
+	}
 
-	if (innersortkeys)			/* do we need to sort? */
-		temp += cost_sort(innersortkeys,
-						  inner_path->parent->rows,
-						  inner_path->parent->width);
+	if (innersortkeys)			/* do we need to sort inner? */
+	{
+		startup_cost += inner_path->total_cost;
+		cost_sort(&sort_path,
+				  innersortkeys,
+				  inner_path->parent->rows,
+				  inner_path->parent->width);
+		startup_cost += sort_path.startup_cost;
+		run_cost += sort_path.total_cost - sort_path.startup_cost;
+	}
+	else
+	{
+		startup_cost += inner_path->startup_cost;
+		run_cost += inner_path->total_cost - inner_path->startup_cost;
+	}
 
 	/*
 	 * Estimate the number of tuples to be processed in the mergejoin itself
@@ -374,11 +532,14 @@ cost_mergejoin(Path *outer_path,
 	 * underestimate if there are many equal-keyed tuples in either relation,
 	 * but we have no good way of estimating that...
 	 */
-	temp += cpu_page_weight * (outer_path->parent->rows +
-							   inner_path->parent->rows);
+	ntuples = outer_path->parent->rows + inner_path->parent->rows;
 
-	Assert(temp >= 0);
-	return temp;
+	/* CPU costs */
+	cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist);
+	run_cost += cpu_per_tuple * ntuples;
+
+	path->startup_cost = startup_cost;
+	path->total_cost = startup_cost + run_cost;
 }
 
 /*
@@ -388,15 +549,21 @@ cost_mergejoin(Path *outer_path,
  *
  * 'outer_path' is the path for the outer relation
  * 'inner_path' is the path for the inner relation
+ * 'restrictlist' are the RestrictInfo nodes to be applied at the join
  * 'innerdisbursion' is an estimate of the disbursion statistic
  *				for the inner hash key.
  */
-Cost
-cost_hashjoin(Path *outer_path,
+void
+cost_hashjoin(Path *path,
+			  Path *outer_path,
 			  Path *inner_path,
+			  List *restrictlist,
 			  Selectivity innerdisbursion)
 {
-	Cost		temp = 0;
+	Cost		startup_cost = 0;
+	Cost		run_cost = 0;
+	Cost		cpu_per_tuple;
+	double		ntuples;
 	double		outerbytes = relation_byte_size(outer_path->parent->rows,
 												outer_path->parent->width);
 	double		innerbytes = relation_byte_size(inner_path->parent->rows,
@@ -404,48 +571,169 @@ cost_hashjoin(Path *outer_path,
 	long		hashtablebytes = SortMem * 1024L;
 
 	if (!enable_hashjoin)
-		temp += disable_cost;
+		startup_cost += disable_cost;
 
 	/* cost of source data */
-	temp += outer_path->path_cost + inner_path->path_cost;
+	startup_cost += outer_path->startup_cost;
+	run_cost += outer_path->total_cost - outer_path->startup_cost;
+	startup_cost += inner_path->total_cost;
 
-	/* cost of computing hash function: must do it once per tuple */
-	temp += cpu_page_weight * (outer_path->parent->rows +
-							   inner_path->parent->rows);
+	/* cost of computing hash function: must do it once per input tuple */
+	startup_cost += cpu_operator_cost * inner_path->parent->rows;
+	run_cost += cpu_operator_cost * outer_path->parent->rows;
 
 	/* the number of tuple comparisons needed is the number of outer
 	 * tuples times the typical hash bucket size, which we estimate
-	 * conservatively as the inner disbursion times the inner tuple
-	 * count.  The cost per comparison is set at cpu_index_page_weight;
-	 * is that reasonable, or do we need another basic parameter?
+	 * conservatively as the inner disbursion times the inner tuple count.
 	 */
-	temp += cpu_index_page_weight * outer_path->parent->rows *
+	run_cost += cpu_operator_cost * outer_path->parent->rows *
 		(inner_path->parent->rows * innerdisbursion);
 
 	/*
+	 * Estimate the number of tuples that get through the hashing filter
+	 * as one per tuple in the two source relations.  This could be a drastic
+	 * underestimate if there are many equal-keyed tuples in either relation,
+	 * but we have no good way of estimating that...
+	 */
+	ntuples = outer_path->parent->rows + inner_path->parent->rows;
+
+	/* CPU costs */
+	cpu_per_tuple = cpu_tuple_cost + cost_qual_eval(restrictlist);
+	run_cost += cpu_per_tuple * ntuples;
+
+	/*
 	 * if inner relation is too big then we will need to "batch" the join,
 	 * which implies writing and reading most of the tuples to disk an
-	 * extra time.  Charge one cost unit per page of I/O.
+	 * extra time.  Charge one cost unit per page of I/O (correct since
+	 * it should be nice and sequential...).  Writing the inner rel counts
+	 * as startup cost, all the rest as run cost.
 	 */
 	if (innerbytes > hashtablebytes)
-		temp += 2 * (page_size(outer_path->parent->rows,
-							   outer_path->parent->width) +
-					 page_size(inner_path->parent->rows,
-							   inner_path->parent->width));
+	{
+		double	outerpages = page_size(outer_path->parent->rows,
+									   outer_path->parent->width);
+		double	innerpages = page_size(inner_path->parent->rows,
+									   inner_path->parent->width);
+
+		startup_cost += innerpages;
+		run_cost += innerpages + 2 * outerpages;
+	}
 
 	/*
 	 * Bias against putting larger relation on inside.  We don't want
 	 * an absolute prohibition, though, since larger relation might have
 	 * better disbursion --- and we can't trust the size estimates
-	 * unreservedly, anyway.
+	 * unreservedly, anyway.  Instead, inflate the startup cost by
+	 * the square root of the size ratio.  (Why square root?  No real good
+	 * reason, but it seems reasonable...)
+	 */
+	if (innerbytes > outerbytes && outerbytes > 0)
+	{
+		startup_cost *= sqrt(innerbytes / outerbytes);
+	}
+
+	path->startup_cost = startup_cost;
+	path->total_cost = startup_cost + run_cost;
+}
+
+
+/*
+ * cost_qual_eval
+ *		Estimate the CPU cost of evaluating a WHERE clause (once).
+ *		The input can be either an implicitly-ANDed list of boolean
+ *		expressions, or a list of RestrictInfo nodes.
+ */
+Cost
+cost_qual_eval(List *quals)
+{
+	Cost	total = 0;
+
+	cost_qual_eval_walker((Node *) quals, &total);
+	return total;
+}
+
+static bool
+cost_qual_eval_walker(Node *node, Cost *total)
+{
+	if (node == NULL)
+		return false;
+	/*
+	 * Our basic strategy is to charge one cpu_operator_cost for each
+	 * operator or function node in the given tree.  Vars and Consts
+	 * are charged zero, and so are boolean operators (AND, OR, NOT).
+	 * Simplistic, but a lot better than no model at all.
+	 *
+	 * Should we try to account for the possibility of short-circuit
+	 * evaluation of AND/OR?
 	 */
-	if (innerbytes > outerbytes)
-		temp *= 1.1;			/* is this an OK fudge factor? */
+	if (IsA(node, Expr))
+	{
+		Expr   *expr = (Expr *) node;
+
+		switch (expr->opType)
+		{
+			case OP_EXPR:
+			case FUNC_EXPR:
+				*total += cpu_operator_cost;
+				break;
+			case OR_EXPR:
+			case AND_EXPR:
+			case NOT_EXPR:
+				break;
+			case SUBPLAN_EXPR:
+				/*
+				 * A subplan node in an expression indicates that the subplan
+				 * will be executed on each evaluation, so charge accordingly.
+				 * (We assume that sub-selects that can be executed as
+				 * InitPlans have already been removed from the expression.)
+				 *
+				 * NOTE: this logic should agree with make_subplan in
+				 * subselect.c. 
+				 */
+				{
+					SubPlan	   *subplan = (SubPlan *) expr->oper;
+					Plan	   *plan = subplan->plan;
+					Cost		subcost;
+
+					if (subplan->sublink->subLinkType == EXISTS_SUBLINK)
+					{
+						/* we only need to fetch 1 tuple */
+						subcost = plan->startup_cost +
+							(plan->total_cost - plan->startup_cost) / plan->plan_rows;
+					}
+					else if (subplan->sublink->subLinkType == EXPR_SUBLINK)
+					{
+						/* assume we need all tuples */
+						subcost = plan->total_cost;
+					}
+					else
+					{
+						/* assume we need 50% of the tuples */
+						subcost = plan->startup_cost +
+							0.50 * (plan->total_cost - plan->startup_cost);
+					}
+					*total += subcost;
+				}
+				break;
+		}
+		/* fall through to examine args of Expr node */
+	}
+	/*
+	 * expression_tree_walker doesn't know what to do with RestrictInfo nodes,
+	 * but we just want to recurse through them.
+	 */
+	if (IsA(node, RestrictInfo))
+	{
+		RestrictInfo   *restrictinfo = (RestrictInfo *) node;
 
-	Assert(temp >= 0);
-	return temp;
+		return cost_qual_eval_walker((Node *) restrictinfo->clause, total);
+	}
+	/* Otherwise, recurse. */
+	return expression_tree_walker(node, cost_qual_eval_walker,
+								  (void *) total);
 }
 
+
 /*
  * set_baserel_size_estimates
  *		Set the size estimates for the given base relation.
@@ -457,6 +745,7 @@ cost_hashjoin(Path *outer_path,
  *	rows: the estimated number of output tuples (after applying
  *	      restriction clauses).
  *	width: the estimated average output tuple width in bytes.
+ *	baserestrictcost: estimated cost of evaluating baserestrictinfo clauses.
  */
 void
 set_baserel_size_estimates(Query *root, RelOptInfo *rel)
@@ -468,7 +757,14 @@ set_baserel_size_estimates(Query *root, RelOptInfo *rel)
 		restrictlist_selectivity(root,
 								 rel->baserestrictinfo,
 								 lfirsti(rel->relids));
-	Assert(rel->rows >= 0);
+	/*
+	 * Force estimate to be at least one row, to make explain output look
+	 * better and to avoid possible divide-by-zero when interpolating cost.
+	 */
+	if (rel->rows < 1.0)
+		rel->rows = 1.0;
+
+	rel->baserestrictcost = cost_qual_eval(rel->baserestrictinfo);
 
 	set_rel_width(root, rel);
 }
@@ -513,7 +809,12 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
 									 restrictlist,
 									 0);
 
-	Assert(temp >= 0);
+	/*
+	 * Force estimate to be at least one row, to make explain output look
+	 * better and to avoid possible divide-by-zero when interpolating cost.
+	 */
+	if (temp < 1.0)
+		temp = 1.0;
 	rel->rows = temp;
 
 	/*
@@ -582,9 +883,3 @@ page_size(double tuples, int width)
 {
 	return ceil(relation_byte_size(tuples, width) / BLCKSZ);
 }
-
-static double
-base_log(double x, double b)
-{
-	return log(x) / log(b);
-}
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 4c2b0109bc0..edb16ce0d6d 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.79 2000/02/05 18:26:09 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.80 2000/02/15 20:49:16 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -83,7 +83,8 @@ static List *index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index,
 static bool useful_for_mergejoin(RelOptInfo *rel, IndexOptInfo *index,
 								 List *joininfo_list);
 static bool useful_for_ordering(Query *root, RelOptInfo *rel,
-								IndexOptInfo *index);
+								IndexOptInfo *index,
+								ScanDirection scandir);
 static bool match_index_to_operand(int indexkey, Var *operand,
 								   RelOptInfo *rel, IndexOptInfo *index);
 static bool function_index_operand(Expr *funcOpnd, RelOptInfo *rel,
@@ -106,6 +107,8 @@ static bool string_lessthan(const char * str1, const char * str2,
 /*
  * create_index_paths()
  *	  Generate all interesting index paths for the given relation.
+ *	  Candidate paths are added to the rel's pathlist (using add_path).
+ *	  Additional IndexPath nodes may also be added to rel's innerjoin list.
  *
  * To be considered for an index scan, an index must match one or more
  * restriction clauses or join clauses from the query's qual condition,
@@ -120,29 +123,26 @@ static bool string_lessthan(const char * str1, const char * str2,
  * in its join clauses.  In that context, values for the other rels'
  * attributes are available and fixed during any one scan of the indexpath.
  *
- * This routine's return value is a list of plain IndexPaths for each
- * index the routine deems potentially interesting for the current query
+ * An IndexPath is generated and submitted to add_path() for each index
+ * this routine deems potentially interesting for the current query
  * (at most one IndexPath per index on the given relation).  An innerjoin
  * path is also generated for each interesting combination of outer join
- * relations.  The innerjoin paths are *not* in the return list, but are
- * appended to the "innerjoin" list of the relation itself.
+ * relations.  The innerjoin paths are *not* passed to add_path(), but are
+ * appended to the "innerjoin" list of the relation for later consideration
+ * in nested-loop joins.
  *
  * 'rel' is the relation for which we want to generate index paths
  * 'indices' is a list of available indexes for 'rel'
  * 'restrictinfo_list' is a list of restrictinfo nodes for 'rel'
  * 'joininfo_list' is a list of joininfo nodes for 'rel'
- *
- * Returns a list of IndexPath access path descriptors.  Additional
- * IndexPath nodes may also be added to the rel->innerjoin list.
  */
-List *
+void
 create_index_paths(Query *root,
 				   RelOptInfo *rel,
 				   List *indices,
 				   List *restrictinfo_list,
 				   List *joininfo_list)
 {
-	List	   *retval = NIL;
 	List	   *ilist;
 
 	foreach(ilist, indices)
@@ -189,9 +189,9 @@ create_index_paths(Query *root,
 													restrictinfo_list);
 
 		if (restrictclauses != NIL)
-			retval = lappend(retval,
-							 create_index_path(root, rel, index,
-											   restrictclauses));
+			add_path(rel, (Path *) create_index_path(root, rel, index,
+													 restrictclauses,
+													 NoMovementScanDirection));
 
 		/*
 		 * 3. If this index can be used for a mergejoin, then create an
@@ -205,10 +205,22 @@ create_index_paths(Query *root,
 		if (restrictclauses == NIL)
 		{
 			if (useful_for_mergejoin(rel, index, joininfo_list) ||
-				useful_for_ordering(root, rel, index))
-				retval = lappend(retval,
-								 create_index_path(root, rel, index, NIL));
+				useful_for_ordering(root, rel, index, ForwardScanDirection))
+				add_path(rel, (Path *)
+						 create_index_path(root, rel, index,
+										   NIL,
+										   ForwardScanDirection));
 		}
+		/*
+		 * Currently, backwards scan is never considered except for the case
+		 * of matching a query result ordering.  Possibly should consider
+		 * it in other places?
+		 */
+		if (useful_for_ordering(root, rel, index, BackwardScanDirection))
+			add_path(rel, (Path *)
+					 create_index_path(root, rel, index,
+									   NIL,
+									   BackwardScanDirection));
 
 		/*
 		 * 4. Create an innerjoin index path for each combination of
@@ -231,8 +243,6 @@ create_index_paths(Query *root,
 												   joinouterrelids));
 		}
 	}
-
-	return retval;
 }
 
 
@@ -892,39 +902,26 @@ useful_for_mergejoin(RelOptInfo *rel,
  *	  Determine whether the given index can produce an ordering matching
  *	  the order that is wanted for the query result.
  *
- * We check to see whether either forward or backward scan direction can
- * match the specified pathkeys.
- *
  * 'rel' is the relation for which 'index' is defined
+ * 'scandir' is the contemplated scan direction
  */
 static bool
 useful_for_ordering(Query *root,
 					RelOptInfo *rel,
-					IndexOptInfo *index)
+					IndexOptInfo *index,
+					ScanDirection scandir)
 {
 	List	   *index_pathkeys;
 
 	if (root->query_pathkeys == NIL)
 		return false;			/* no special ordering requested */
 
-	index_pathkeys = build_index_pathkeys(root, rel, index);
+	index_pathkeys = build_index_pathkeys(root, rel, index, scandir);
 
 	if (index_pathkeys == NIL)
 		return false;			/* unordered index */
 
-	if (pathkeys_contained_in(root->query_pathkeys, index_pathkeys))
-		return true;
-
-	/* caution: commute_pathkeys destructively modifies its argument;
-	 * safe because we just built the index_pathkeys for local use here.
-	 */
-	if (commute_pathkeys(index_pathkeys))
-	{
-		if (pathkeys_contained_in(root->query_pathkeys, index_pathkeys))
-			return true;		/* useful as a reverse-order path */
-	}
-
-	return false;
+	return pathkeys_contained_in(root->query_pathkeys, index_pathkeys);
 }
 
 /****************************************************************************
@@ -1433,7 +1430,12 @@ index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index,
 
 		pathnode->path.pathtype = T_IndexScan;
 		pathnode->path.parent = rel;
-		pathnode->path.pathkeys = build_index_pathkeys(root, rel, index);
+		/*
+		 * There's no point in marking the path with any pathkeys, since
+		 * it will only ever be used as the inner path of a nestloop,
+		 * and so its ordering does not matter.
+		 */
+		pathnode->path.pathkeys = NIL;
 
 		indexquals = get_actual_clauses(clausegroup);
 		/* expand special operators to indexquals the executor can handle */
@@ -1446,11 +1448,13 @@ index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index,
 		pathnode->indexid = lconsi(index->indexoid, NIL);
 		pathnode->indexqual = lcons(indexquals, NIL);
 
+		/* We don't actually care what order the index scans in ... */
+		pathnode->indexscandir = NoMovementScanDirection;
+
 		/* joinrelids saves the rels needed on the outer side of the join */
 		pathnode->joinrelids = lfirst(outerrelids_list);
 
-		pathnode->path.path_cost = cost_index(root, rel, index, indexquals,
-											  true);
+		cost_index(&pathnode->path, root, rel, index, indexquals, true);
 
 		path_list = lappend(path_list, pathnode);
 		outerrelids_list = lnext(outerrelids_list);
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index f8912a1a547..091e2e40c79 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.51 2000/02/07 04:40:59 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.52 2000/02/15 20:49:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -27,24 +27,21 @@
 #include "parser/parsetree.h"
 #include "utils/lsyscache.h"
 
+static void sort_inner_and_outer(Query *root, RelOptInfo *joinrel,
+								 RelOptInfo *outerrel, RelOptInfo *innerrel,
+								 List *restrictlist, List *mergeclause_list);
+static void match_unsorted_outer(Query *root, RelOptInfo *joinrel,
+								 RelOptInfo *outerrel, RelOptInfo *innerrel,
+								 List *restrictlist, List *mergeclause_list);
+#ifdef NOT_USED
+static void match_unsorted_inner(Query *root, RelOptInfo *joinrel,
+								 RelOptInfo *outerrel, RelOptInfo *innerrel,
+								 List *restrictlist, List *mergeclause_list);
+#endif
+static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
+								 RelOptInfo *outerrel, RelOptInfo *innerrel,
+								 List *restrictlist);
 static Path *best_innerjoin(List *join_paths, List *outer_relid);
-static List *sort_inner_and_outer(RelOptInfo *joinrel,
-								  RelOptInfo *outerrel,
-								  RelOptInfo *innerrel,
-								  List *restrictlist,
-								  List *mergeclause_list);
-static List *match_unsorted_outer(RelOptInfo *joinrel, RelOptInfo *outerrel,
-								  RelOptInfo *innerrel, List *restrictlist,
-								  List *outerpath_list, Path *cheapest_inner,
-								  Path *best_innerjoin,
-								  List *mergeclause_list);
-static List *match_unsorted_inner(RelOptInfo *joinrel, RelOptInfo *outerrel,
-								  RelOptInfo *innerrel, List *restrictlist,
-								  List *innerpath_list,
-								  List *mergeclause_list);
-static List *hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
-								  RelOptInfo *outerrel, RelOptInfo *innerrel,
-								  List *restrictlist);
 static Selectivity estimate_disbursion(Query *root, Var *var);
 static List *select_mergejoin_clauses(RelOptInfo *joinrel,
 									  RelOptInfo *outerrel,
@@ -70,15 +67,9 @@ add_paths_to_joinrel(Query *root,
 					 RelOptInfo *innerrel,
 					 List *restrictlist)
 {
-	Path	   *bestinnerjoin;
 	List	   *mergeclause_list = NIL;
 
 	/*
-	 * Get the best inner join for match_unsorted_outer().
-	 */
-	bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids);
-
-	/*
 	 * Find potential mergejoin clauses.
 	 */
 	if (enable_mergejoin)
@@ -91,84 +82,41 @@ add_paths_to_joinrel(Query *root,
 	 * 1. Consider mergejoin paths where both relations must be
 	 * explicitly sorted.
 	 */
-	add_pathlist(joinrel, sort_inner_and_outer(joinrel,
-											   outerrel,
-											   innerrel,
-											   restrictlist,
-											   mergeclause_list));
+	sort_inner_and_outer(root, joinrel, outerrel, innerrel,
+						 restrictlist, mergeclause_list);
 
 	/*
 	 * 2. Consider paths where the outer relation need not be
 	 * explicitly sorted. This includes both nestloops and
 	 * mergejoins where the outer path is already ordered.
 	 */
-	add_pathlist(joinrel, match_unsorted_outer(joinrel,
-											   outerrel,
-											   innerrel,
-											   restrictlist,
-											   outerrel->pathlist,
-											   innerrel->cheapestpath,
-											   bestinnerjoin,
-											   mergeclause_list));
+	match_unsorted_outer(root, joinrel, outerrel, innerrel,
+						 restrictlist, mergeclause_list);
 
+#ifdef NOT_USED
 	/*
 	 * 3. Consider paths where the inner relation need not be
 	 * explicitly sorted.  This includes mergejoins only
 	 * (nestloops were already built in match_unsorted_outer).
+	 *
+	 * Diked out as redundant 2/13/2000 -- tgl.  There isn't any
+	 * really significant difference between the inner and outer
+	 * side of a mergejoin, so match_unsorted_inner creates no paths
+	 * that aren't equivalent to those made by match_unsorted_outer
+	 * when add_paths_to_joinrel() is invoked with the two rels given
+	 * in the other order.
 	 */
-	add_pathlist(joinrel, match_unsorted_inner(joinrel,
-											   outerrel,
-											   innerrel,
-											   restrictlist,
-											   innerrel->pathlist,
-											   mergeclause_list));
+	match_unsorted_inner(root, joinrel, outerrel, innerrel,
+						 restrictlist, mergeclause_list);
+#endif
 
 	/*
 	 * 4. Consider paths where both outer and inner relations must be
 	 * hashed before being joined.
 	 */
 	if (enable_hashjoin)
-		add_pathlist(joinrel, hash_inner_and_outer(root,
-												   joinrel,
-												   outerrel,
-												   innerrel,
-												   restrictlist));
-}
-
-/*
- * best_innerjoin
- *	  Find the cheapest index path that has already been identified by
- *	  indexable_joinclauses() as being a possible inner path for the given
- *	  outer relation(s) in a nestloop join.
- *
- * 'join_paths' is a list of potential inner indexscan join paths
- * 'outer_relids' is the relid list of the outer join relation
- *
- * Returns the pathnode of the best path, or NULL if there's no
- * usable path.
- */
-static Path *
-best_innerjoin(List *join_paths, Relids outer_relids)
-{
-	Path	   *cheapest = (Path *) NULL;
-	List	   *join_path;
-
-	foreach(join_path, join_paths)
-	{
-		Path	   *path = (Path *) lfirst(join_path);
-
-		Assert(IsA(path, IndexPath));
-
-		/* path->joinrelids is the set of base rels that must be part of
-		 * outer_relids in order to use this inner path, because those
-		 * rels are used in the index join quals of this inner path.
-		 */
-		if (is_subseti(((IndexPath *) path)->joinrelids, outer_relids) &&
-			(cheapest == NULL ||
-			 path_is_cheaper(path, cheapest)))
-			cheapest = path;
-	}
-	return cheapest;
+		hash_inner_and_outer(root, joinrel, outerrel, innerrel,
+							 restrictlist);
 }
 
 /*
@@ -183,17 +131,15 @@ best_innerjoin(List *join_paths, Relids outer_relids)
  *		clauses that apply to this join
  * 'mergeclause_list' is a list of RestrictInfo nodes for available
  *		mergejoin clauses in this join
- *
- * Returns a list of mergejoin paths.
  */
-static List *
-sort_inner_and_outer(RelOptInfo *joinrel,
+static void
+sort_inner_and_outer(Query *root,
+					 RelOptInfo *joinrel,
 					 RelOptInfo *outerrel,
 					 RelOptInfo *innerrel,
 					 List *restrictlist,
 					 List *mergeclause_list)
 {
-	List	   *path_list = NIL;
 	List	   *i;
 
 	/*
@@ -223,7 +169,6 @@ sort_inner_and_outer(RelOptInfo *joinrel,
 		List		   *outerkeys;
 		List		   *innerkeys;
 		List		   *merge_pathkeys;
-		MergePath	   *path_node;
 
 		/* Make a mergeclause list with this guy first. */
 		curclause_list = lcons(restrictinfo,
@@ -231,31 +176,37 @@ sort_inner_and_outer(RelOptInfo *joinrel,
 									   listCopy(mergeclause_list)));
 		/* Build sort pathkeys for both sides.
 		 *
-		 * Note: it's possible that the cheapest path will already be
-		 * sorted properly --- create_mergejoin_path will detect that case
-		 * and suppress an explicit sort step.
+		 * Note: it's possible that the cheapest paths will already be
+		 * sorted properly.  create_mergejoin_path will detect that case
+		 * and suppress an explicit sort step, so we needn't do so here.
 		 */
-		outerkeys = make_pathkeys_for_mergeclauses(curclause_list,
+		outerkeys = make_pathkeys_for_mergeclauses(root,
+												   curclause_list,
 												   outerrel->targetlist);
-		innerkeys = make_pathkeys_for_mergeclauses(curclause_list,
+		innerkeys = make_pathkeys_for_mergeclauses(root,
+												   curclause_list,
 												   innerrel->targetlist);
 		/* Build pathkeys representing output sort order. */
 		merge_pathkeys = build_join_pathkeys(outerkeys,
 											 joinrel->targetlist,
-											 curclause_list);
-		/* And now we can make the path. */
-		path_node = create_mergejoin_path(joinrel,
-										  outerrel->cheapestpath,
-										  innerrel->cheapestpath,
-										  restrictlist,
-										  merge_pathkeys,
-										  get_actual_clauses(curclause_list),
-										  outerkeys,
-										  innerkeys);
+											 root->equi_key_list);
 
-		path_list = lappend(path_list, path_node);
+		/*
+		 * And now we can make the path.  We only consider the cheapest-
+		 * total-cost input paths, since we are assuming here that a sort
+		 * is required.  We will consider cheapest-startup-cost input paths
+		 * later, and only if they don't need a sort.
+		 */
+		add_path(joinrel, (Path *)
+				 create_mergejoin_path(joinrel,
+									   outerrel->cheapest_total_path,
+									   innerrel->cheapest_total_path,
+									   restrictlist,
+									   merge_pathkeys,
+									   get_actual_clauses(curclause_list),
+									   outerkeys,
+									   innerkeys));
 	}
-	return path_list;
 }
 
 /*
@@ -266,74 +217,56 @@ sort_inner_and_outer(RelOptInfo *joinrel,
  *	  only outer paths that are already ordered well enough for merging).
  *
  * We always generate a nestloop path for each available outer path.
- * If an indexscan inner path exists that is compatible with this outer rel
- * and cheaper than the cheapest general-purpose inner path, then we use
- * the indexscan inner path; else we use the cheapest general-purpose inner.
+ * In fact we may generate as many as three: one on the cheapest-total-cost
+ * inner path, one on the cheapest-startup-cost inner path (if different),
+ * and one on the best inner-indexscan path (if any).
  *
  * We also consider mergejoins if mergejoin clauses are available.  We have
- * two ways to generate the inner path for a mergejoin: use the cheapest
- * inner path (sorting it if it's not suitably ordered already), or using an
- * inner path that is already suitably ordered for the merge.  If the
- * cheapest inner path is suitably ordered, then by definition it's the one
- * to use.  Otherwise, we look for ordered paths that are cheaper than the
- * cheapest inner + sort costs.  If we have several mergeclauses, it could be
- * that there is no inner path (or only a very expensive one) for the full
- * list of mergeclauses, but better paths exist if we truncate the
- * mergeclause list (thereby discarding some sort key requirements).  So, we
- * consider truncations of the mergeclause list as well as the full list.
- * In any case, we find the cheapest suitable path and generate a single
- * output mergejoin path.  (Since all the possible mergejoins will have
- * identical output pathkeys, there is no need to keep any but the cheapest.)
+ * two ways to generate the inner path for a mergejoin: sort the cheapest
+ * inner path, or use an inner path that is already suitably ordered for the
+ * merge.  If we have several mergeclauses, it could be that there is no inner
+ * path (or only a very expensive one) for the full list of mergeclauses, but
+ * better paths exist if we truncate the mergeclause list (thereby discarding
+ * some sort key requirements).  So, we consider truncations of the
+ * mergeclause list as well as the full list.  (Ideally we'd consider all
+ * subsets of the mergeclause list, but that seems way too expensive.)
  *
  * 'joinrel' is the join relation
  * 'outerrel' is the outer join relation
  * 'innerrel' is the inner join relation
  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
  *		clauses that apply to this join
- * 'outerpath_list' is the list of possible outer paths
- * 'cheapest_inner' is the cheapest inner path
- * 'best_innerjoin' is the best inner index path (if any)
  * 'mergeclause_list' is a list of RestrictInfo nodes for available
  *		mergejoin clauses in this join
- *
- * Returns a list of possible join path nodes.
  */
-static List *
-match_unsorted_outer(RelOptInfo *joinrel,
+static void
+match_unsorted_outer(Query *root,
+					 RelOptInfo *joinrel,
 					 RelOptInfo *outerrel,
 					 RelOptInfo *innerrel,
 					 List *restrictlist,
-					 List *outerpath_list,
-					 Path *cheapest_inner,
-					 Path *best_innerjoin,
 					 List *mergeclause_list)
 {
-	List	   *path_list = NIL;
-	Path	   *nestinnerpath;
+	Path	   *bestinnerjoin;
 	List	   *i;
 
 	/*
-	 * We only use the best innerjoin indexpath if it is cheaper
-	 * than the cheapest general-purpose inner path.
+	 * Get the best innerjoin indexpath (if any) for this outer rel.
+	 * It's the same for all outer paths.
 	 */
-	if (best_innerjoin &&
-		path_is_cheaper(best_innerjoin, cheapest_inner))
-		nestinnerpath = best_innerjoin;
-	else
-		nestinnerpath = cheapest_inner;
+	bestinnerjoin = best_innerjoin(innerrel->innerjoin, outerrel->relids);
 
-	foreach(i, outerpath_list)
+	foreach(i, outerrel->pathlist)
 	{
 		Path	   *outerpath = (Path *) lfirst(i);
-		List	   *mergeclauses;
 		List	   *merge_pathkeys;
+		List	   *mergeclauses;
 		List	   *innersortkeys;
-		Path	   *mergeinnerpath;
-		int			mergeclausecount;
+		List	   *trialsortkeys;
+		Path	   *cheapest_startup_inner;
+		Path	   *cheapest_total_inner;
+		int			clausecnt;
 
-		/* Look for useful mergeclauses (if any) */
-		mergeclauses = find_mergeclauses_for_pathkeys(outerpath->pathkeys,
-													  mergeclause_list);
 		/*
 		 * The result will have this sort order (even if it is implemented
 		 * as a nestloop, and even if some of the mergeclauses are implemented
@@ -341,91 +274,137 @@ match_unsorted_outer(RelOptInfo *joinrel,
 		 */
 		merge_pathkeys = build_join_pathkeys(outerpath->pathkeys,
 											 joinrel->targetlist,
-											 mergeclauses);
+											 root->equi_key_list);
+
+		/*
+		 * Always consider a nestloop join with this outer and cheapest-
+		 * total-cost inner.  Consider nestloops using the cheapest-
+		 * startup-cost inner as well, and the best innerjoin indexpath.
+		 */
+		add_path(joinrel, (Path *)
+				 create_nestloop_path(joinrel,
+									  outerpath,
+									  innerrel->cheapest_total_path,
+									  restrictlist,
+									  merge_pathkeys));
+		if (innerrel->cheapest_startup_path != innerrel->cheapest_total_path)
+			add_path(joinrel, (Path *)
+					 create_nestloop_path(joinrel,
+										  outerpath,
+										  innerrel->cheapest_startup_path,
+										  restrictlist,
+										  merge_pathkeys));
+		if (bestinnerjoin != NULL)
+			add_path(joinrel, (Path *)
+					 create_nestloop_path(joinrel,
+										  outerpath,
+										  bestinnerjoin,
+										  restrictlist,
+										  merge_pathkeys));
 
-		/* Always consider a nestloop join with this outer and best inner. */
-		path_list = lappend(path_list,
-							create_nestloop_path(joinrel,
-												 outerpath,
-												 nestinnerpath,
-												 restrictlist,
-												 merge_pathkeys));
+		/* Look for useful mergeclauses (if any) */
+		mergeclauses = find_mergeclauses_for_pathkeys(outerpath->pathkeys,
+													  mergeclause_list);
 
 		/* Done with this outer path if no chance for a mergejoin */
 		if (mergeclauses == NIL)
 			continue;
 
 		/* Compute the required ordering of the inner path */
-		innersortkeys = make_pathkeys_for_mergeclauses(mergeclauses,
+		innersortkeys = make_pathkeys_for_mergeclauses(root,
+													   mergeclauses,
 													   innerrel->targetlist);
 
-		/* Set up on the assumption that we will use the cheapest_inner */
-		mergeinnerpath = cheapest_inner;
-		mergeclausecount = length(mergeclauses);
-
-		/* If the cheapest_inner doesn't need to be sorted, it is the winner
-		 * by definition.
+		/*
+		 * Generate a mergejoin on the basis of sorting the cheapest inner.
+		 * Since a sort will be needed, only cheapest total cost matters.
 		 */
-		if (pathkeys_contained_in(innersortkeys,
-								  cheapest_inner->pathkeys))
-		{
-			/* cheapest_inner is the winner */
-			innersortkeys = NIL; /* we do not need to sort it... */
-		}
-		else
-		{
-			/* look for a presorted path that's cheaper */
-			List	   *trialsortkeys = listCopy(innersortkeys);
-			Cost		cheapest_cost;
-			int			clausecount;
+		add_path(joinrel, (Path *)
+				 create_mergejoin_path(joinrel,
+									   outerpath,
+									   innerrel->cheapest_total_path,
+									   restrictlist,
+									   merge_pathkeys,
+									   get_actual_clauses(mergeclauses),
+									   NIL,
+									   innersortkeys));
 
-			cheapest_cost = cheapest_inner->path_cost +
-				cost_sort(innersortkeys, innerrel->rows, innerrel->width);
+		/*
+		 * Look for presorted inner paths that satisfy the mergeclause list
+		 * or any truncation thereof.  Here, we consider both cheap startup
+		 * cost and cheap total cost.
+		 */
+		trialsortkeys = listCopy(innersortkeys); /* modifiable copy */
+		cheapest_startup_inner = NULL;
+		cheapest_total_inner = NULL;
 
-			for (clausecount = mergeclausecount;
-				 clausecount > 0;
-				 clausecount--)
+		for (clausecnt = length(mergeclauses); clausecnt > 0; clausecnt--)
+		{
+			Path	   *innerpath;
+
+			/* Look for an inner path ordered well enough to merge with
+			 * the first 'clausecnt' mergeclauses.  NB: trialsortkeys list
+			 * is modified destructively, which is why we made a copy...
+			 */
+			trialsortkeys = ltruncate(clausecnt, trialsortkeys);
+			innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
+													   trialsortkeys,
+													   TOTAL_COST);
+			if (innerpath != NULL &&
+				(cheapest_total_inner == NULL ||
+				 compare_path_costs(innerpath, cheapest_total_inner,
+									TOTAL_COST) < 0))
 			{
-				Path	   *trialinnerpath;
-
-				/* Look for an inner path ordered well enough to merge with
-				 * the first 'clausecount' mergeclauses.  NB: trialsortkeys
-				 * is modified destructively, which is why we made a copy...
-				 */
-				trialinnerpath =
-					get_cheapest_path_for_pathkeys(innerrel->pathlist,
-												   ltruncate(clausecount,
-															 trialsortkeys),
-												   false);
-				if (trialinnerpath != NULL &&
-					trialinnerpath->path_cost < cheapest_cost)
+				/* Found a cheap (or even-cheaper) sorted path */
+				List   *newclauses;
+
+				newclauses = ltruncate(clausecnt,
+									   get_actual_clauses(mergeclauses));
+				add_path(joinrel, (Path *)
+						 create_mergejoin_path(joinrel,
+											   outerpath,
+											   innerpath,
+											   restrictlist,
+											   merge_pathkeys,
+											   newclauses,
+											   NIL,
+											   NIL));
+				cheapest_total_inner = innerpath;
+			}
+			/* Same on the basis of cheapest startup cost ... */
+			innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
+													   trialsortkeys,
+													   STARTUP_COST);
+			if (innerpath != NULL &&
+				(cheapest_startup_inner == NULL ||
+				 compare_path_costs(innerpath, cheapest_startup_inner,
+									STARTUP_COST) < 0))
+			{
+				/* Found a cheap (or even-cheaper) sorted path */
+				if (innerpath != cheapest_total_inner)
 				{
-					/* Found a cheaper (or even-cheaper) sorted path */
-					cheapest_cost = trialinnerpath->path_cost;
-					mergeinnerpath = trialinnerpath;
-					mergeclausecount = clausecount;
-					innersortkeys = NIL; /* we will not need to sort it... */
+					List   *newclauses;
+
+					newclauses = ltruncate(clausecnt,
+										   get_actual_clauses(mergeclauses));
+					add_path(joinrel, (Path *)
+							 create_mergejoin_path(joinrel,
+												   outerpath,
+												   innerpath,
+												   restrictlist,
+												   merge_pathkeys,
+												   newclauses,
+												   NIL,
+												   NIL));
 				}
+				cheapest_startup_inner = innerpath;
 			}
 		}
-
-		/* Finally, we can build the mergejoin path */
-		mergeclauses = ltruncate(mergeclausecount,
-								 get_actual_clauses(mergeclauses));
-		path_list = lappend(path_list,
-							create_mergejoin_path(joinrel,
-												  outerpath,
-												  mergeinnerpath,
-												  restrictlist,
-												  merge_pathkeys,
-												  mergeclauses,
-												  NIL,
-												  innersortkeys));
 	}
-
-	return path_list;
 }
 
+#ifdef NOT_USED
+
 /*
  * match_unsorted_inner
  *	  Generate mergejoin paths that use an explicit sort of the outer path
@@ -436,86 +415,105 @@ match_unsorted_outer(RelOptInfo *joinrel,
  * 'innerrel' is the inner join relation
  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
  *		clauses that apply to this join
- * 'innerpath_list' is the list of possible inner join paths
  * 'mergeclause_list' is a list of RestrictInfo nodes for available
  *		mergejoin clauses in this join
- *
- * Returns a list of possible merge paths.
  */
-static List *
-match_unsorted_inner(RelOptInfo *joinrel,
+static void
+match_unsorted_inner(Query *root,
+					 RelOptInfo *joinrel,
 					 RelOptInfo *outerrel,
 					 RelOptInfo *innerrel,
 					 List *restrictlist,
-					 List *innerpath_list,
 					 List *mergeclause_list)
 {
-	List	   *path_list = NIL;
 	List	   *i;
 
-	foreach(i, innerpath_list)
+	foreach(i, innerrel->pathlist)
 	{
 		Path	   *innerpath = (Path *) lfirst(i);
 		List	   *mergeclauses;
+		List	   *outersortkeys;
+		List	   *merge_pathkeys;
+		Path	   *totalouterpath;
+		Path	   *startupouterpath;
 
 		/* Look for useful mergeclauses (if any) */
 		mergeclauses = find_mergeclauses_for_pathkeys(innerpath->pathkeys,
 													  mergeclause_list);
+		if (mergeclauses == NIL)
+			continue;
 
-		if (mergeclauses)
-		{
-			List	   *outersortkeys;
-			Path	   *mergeouterpath;
-			List	   *merge_pathkeys;
-
-			/* Compute the required ordering of the outer path */
-			outersortkeys =
-				make_pathkeys_for_mergeclauses(mergeclauses,
-											   outerrel->targetlist);
-
-			/* Look for an outer path already ordered well enough to merge */
-			mergeouterpath =
-				get_cheapest_path_for_pathkeys(outerrel->pathlist,
-											   outersortkeys,
-											   false);
-
-			/* Should we use the mergeouter, or sort the cheapest outer? */
-			if (mergeouterpath != NULL &&
-				mergeouterpath->path_cost <=
-				(outerrel->cheapestpath->path_cost +
-				 cost_sort(outersortkeys, outerrel->rows, outerrel->width)))
-			{
-				/* Use mergeouterpath */
-				outersortkeys = NIL;	/* no explicit sort step */
-			}
-			else
-			{
-				/* Use outerrel->cheapestpath, with the outersortkeys */
-				mergeouterpath = outerrel->cheapestpath;
-			}
+		/* Compute the required ordering of the outer path */
+		outersortkeys = make_pathkeys_for_mergeclauses(root,
+													   mergeclauses,
+													   outerrel->targetlist);
+
+		/*
+		 * Generate a mergejoin on the basis of sorting the cheapest outer.
+		 * Since a sort will be needed, only cheapest total cost matters.
+		 */
+		merge_pathkeys = build_join_pathkeys(outersortkeys,
+											 joinrel->targetlist,
+											 root->equi_key_list);
+		add_path(joinrel, (Path *)
+				 create_mergejoin_path(joinrel,
+									   outerrel->cheapest_total_path,
+									   innerpath,
+									   restrictlist,
+									   merge_pathkeys,
+									   get_actual_clauses(mergeclauses),
+									   outersortkeys,
+									   NIL));
+		/*
+		 * Now generate mergejoins based on already-sufficiently-ordered
+		 * outer paths.  There's likely to be some redundancy here with paths
+		 * already generated by merge_unsorted_outer ... but since
+		 * merge_unsorted_outer doesn't consider all permutations of the
+		 * mergeclause list, it may fail to notice that this particular
+		 * innerpath could have been used with this outerpath.
+		 */
+		totalouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist,
+														outersortkeys,
+														TOTAL_COST);
+		if (totalouterpath == NULL)
+			continue;			/* there won't be a startup-cost path either */
 
-			/* Compute pathkeys the result will have */
-			merge_pathkeys = build_join_pathkeys(
-				outersortkeys ? outersortkeys : mergeouterpath->pathkeys,
-				joinrel->targetlist,
-				mergeclauses);
-
-			mergeclauses = get_actual_clauses(mergeclauses);
-			path_list = lappend(path_list,
-								create_mergejoin_path(joinrel,
-													  mergeouterpath,
-													  innerpath,
-													  restrictlist,
-													  merge_pathkeys,
-													  mergeclauses,
-													  outersortkeys,
-													  NIL));
+		merge_pathkeys = build_join_pathkeys(totalouterpath->pathkeys,
+											 joinrel->targetlist,
+											 root->equi_key_list);
+		add_path(joinrel, (Path *)
+				 create_mergejoin_path(joinrel,
+									   totalouterpath,
+									   innerpath,
+									   restrictlist,
+									   merge_pathkeys,
+									   get_actual_clauses(mergeclauses),
+									   NIL,
+									   NIL));
+
+		startupouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist,
+														  outersortkeys,
+														  STARTUP_COST);
+		if (startupouterpath != NULL && startupouterpath != totalouterpath)
+		{
+			merge_pathkeys = build_join_pathkeys(startupouterpath->pathkeys,
+												 joinrel->targetlist,
+												 root->equi_key_list);
+			add_path(joinrel, (Path *)
+					 create_mergejoin_path(joinrel,
+										   startupouterpath,
+										   innerpath,
+										   restrictlist,
+										   merge_pathkeys,
+										   get_actual_clauses(mergeclauses),
+										   NIL,
+										   NIL));
 		}
 	}
-
-	return path_list;
 }
 
+#endif
+
 /*
  * hash_inner_and_outer
  *	  Create hashjoin join paths by explicitly hashing both the outer and
@@ -526,17 +524,14 @@ match_unsorted_inner(RelOptInfo *joinrel,
  * 'innerrel' is the inner join relation
  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
  *		clauses that apply to this join
- *
- * Returns a list of hashjoin paths.
  */
-static List *
+static void
 hash_inner_and_outer(Query *root,
 					 RelOptInfo *joinrel,
 					 RelOptInfo *outerrel,
 					 RelOptInfo *innerrel,
 					 List *restrictlist)
 {
-	List	   *hpath_list = NIL;
 	Relids		outerrelids = outerrel->relids;
 	Relids		innerrelids = innerrel->relids;
 	List	   *i;
@@ -558,7 +553,6 @@ hash_inner_and_outer(Query *root,
 				   *right,
 				   *inner;
 		Selectivity	innerdisbursion;
-		HashPath   *hash_path;
 
 		if (restrictinfo->hashjoinoperator == InvalidOid)
 			continue;			/* not hashjoinable */
@@ -581,17 +575,66 @@ hash_inner_and_outer(Query *root,
 		/* estimate disbursion of inner var for costing purposes */
 		innerdisbursion = estimate_disbursion(root, inner);
 
-		hash_path = create_hashjoin_path(joinrel,
-										 outerrel->cheapestpath,
-										 innerrel->cheapestpath,
-										 restrictlist,
-										 lcons(clause, NIL),
-										 innerdisbursion);
-
-		hpath_list = lappend(hpath_list, hash_path);
+		/*
+		 * We consider both the cheapest-total-cost and cheapest-startup-cost
+		 * outer paths.  There's no need to consider any but the cheapest-
+		 * total-cost inner path, however.
+		 */
+		add_path(joinrel, (Path *)
+				 create_hashjoin_path(joinrel,
+									  outerrel->cheapest_total_path,
+									  innerrel->cheapest_total_path,
+									  restrictlist,
+									  lcons(clause, NIL),
+									  innerdisbursion));
+		if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
+			add_path(joinrel, (Path *)
+					 create_hashjoin_path(joinrel,
+										  outerrel->cheapest_startup_path,
+										  innerrel->cheapest_total_path,
+										  restrictlist,
+										  lcons(clause, NIL),
+										  innerdisbursion));
 	}
+}
+
+/*
+ * best_innerjoin
+ *	  Find the cheapest index path that has already been identified by
+ *	  indexable_joinclauses() as being a possible inner path for the given
+ *	  outer relation(s) in a nestloop join.
+ *
+ * We compare indexpaths on total_cost only, assuming that they will all have
+ * zero or negligible startup_cost.  We might have to think harder someday...
+ *
+ * 'join_paths' is a list of potential inner indexscan join paths
+ * 'outer_relids' is the relid list of the outer join relation
+ *
+ * Returns the pathnode of the best path, or NULL if there's no
+ * usable path.
+ */
+static Path *
+best_innerjoin(List *join_paths, Relids outer_relids)
+{
+	Path	   *cheapest = (Path *) NULL;
+	List	   *join_path;
+
+	foreach(join_path, join_paths)
+	{
+		Path	   *path = (Path *) lfirst(join_path);
+
+		Assert(IsA(path, IndexPath));
 
-	return hpath_list;
+		/* path->joinrelids is the set of base rels that must be part of
+		 * outer_relids in order to use this inner path, because those
+		 * rels are used in the index join quals of this inner path.
+		 */
+		if (is_subseti(((IndexPath *) path)->joinrelids, outer_relids) &&
+			(cheapest == NULL ||
+			 compare_path_costs(path, cheapest, TOTAL_COST) < 0))
+			cheapest = path;
+	}
+	return cheapest;
 }
 
 /*
diff --git a/src/backend/optimizer/path/orindxpath.c b/src/backend/optimizer/path/orindxpath.c
index 9eb0484fc2f..6226100cfc7 100644
--- a/src/backend/optimizer/path/orindxpath.c
+++ b/src/backend/optimizer/path/orindxpath.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.36 2000/02/05 18:26:09 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.37 2000/02/15 20:49:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -19,6 +19,7 @@
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/internal.h"
+#include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/plancat.h"
 #include "optimizer/restrictinfo.h"
@@ -27,14 +28,13 @@
 
 static void best_or_subclause_indices(Query *root, RelOptInfo *rel,
 									  List *subclauses, List *indices,
-									  List **indexquals,
-									  List **indexids,
-									  Cost *cost);
+									  IndexPath *pathnode);
 static void best_or_subclause_index(Query *root, RelOptInfo *rel,
 									Expr *subclause, List *indices,
 									List **retIndexQual,
 									Oid *retIndexid,
-									Cost *retCost);
+									Cost *retStartupCost,
+									Cost *retTotalCost);
 
 
 /*
@@ -45,14 +45,13 @@ static void best_or_subclause_index(Query *root, RelOptInfo *rel,
  * 'rel' is the relation entry for which the paths are to be created
  * 'clauses' is the list of available restriction clause nodes
  *
- * Returns a list of index path nodes.
- *
+ * Returns nothing, but adds paths to rel->pathlist via add_path().
  */
-List *
+void
 create_or_index_paths(Query *root,
-					  RelOptInfo *rel, List *clauses)
+					  RelOptInfo *rel,
+					  List *clauses)
 {
-	List	   *path_list = NIL;
 	List	   *clist;
 
 	foreach(clist, clauses)
@@ -86,17 +85,6 @@ create_or_index_paths(Query *root,
 				 * best available index for each subclause.
 				 */
 				IndexPath  *pathnode = makeNode(IndexPath);
-				List	   *indexquals;
-				List	   *indexids;
-				Cost		cost;
-
-				best_or_subclause_indices(root,
-										  rel,
-										  clausenode->clause->args,
-										  clausenode->subclauseindices,
-										  &indexquals,
-										  &indexids,
-										  &cost);
 
 				pathnode->path.pathtype = T_IndexScan;
 				pathnode->path.parent = rel;
@@ -108,17 +96,21 @@ create_or_index_paths(Query *root,
 				 */
 				pathnode->path.pathkeys = NIL;
 
-				pathnode->indexid = indexids;
-				pathnode->indexqual = indexquals;
+				/* We don't actually care what order the index scans in ... */
+				pathnode->indexscandir = NoMovementScanDirection;
+
 				pathnode->joinrelids = NIL;	/* no join clauses here */
-				pathnode->path.path_cost = cost;
 
-				path_list = lappend(path_list, pathnode);
+				best_or_subclause_indices(root,
+										  rel,
+										  clausenode->clause->args,
+										  clausenode->subclauseindices,
+										  pathnode);
+
+				add_path(rel, (Path *) pathnode);
 			}
 		}
 	}
-
-	return path_list;
 }
 
 /*
@@ -128,53 +120,68 @@ create_or_index_paths(Query *root,
  *	  indices.	The cost is the sum of the individual index costs, since
  *	  the executor will perform a scan for each subclause of the 'or'.
  *
- * This routine also creates the indexquals and indexids lists that will
- * be needed by the executor.  The indexquals list has one entry for each
+ * This routine also creates the indexqual and indexid lists that will
+ * be needed by the executor.  The indexqual list has one entry for each
  * scan of the base rel, which is a sublist of indexqual conditions to
  * apply in that scan.  The implicit semantics are AND across each sublist
  * of quals, and OR across the toplevel list (note that the executor
- * takes care not to return any single tuple more than once).  The indexids
- * list gives the index to be used in each scan.
+ * takes care not to return any single tuple more than once).  The indexid
+ * list gives the OID of the index to be used in each scan.
  *
  * 'rel' is the node of the relation on which the indexes are defined
  * 'subclauses' are the subclauses of the 'or' clause
  * 'indices' is a list of sublists of the IndexOptInfo nodes that matched
  *		each subclause of the 'or' clause
- * '*indexquals' gets the constructed indexquals for the path (a list
+ * 'pathnode' is the IndexPath node being built.
+ *
+ * Results are returned by setting these fields of the passed pathnode:
+ * 'indexqual' gets the constructed indexquals for the path (a list
  *		of sublists of clauses, one sublist per scan of the base rel)
- * '*indexids' gets a list of the index OIDs for each scan of the rel
- * '*cost' gets the total cost of the path
+ * 'indexid' gets a list of the index OIDs for each scan of the rel
+ * 'startup_cost' and 'total_cost' get the complete path costs.
+ *
+ * 'startup_cost' is the startup cost for the first index scan only;
+ * startup costs for later scans will be paid later on, so they just
+ * get reflected in total_cost.
+ *
+ * NOTE: we choose each scan on the basis of its total cost, ignoring startup
+ * cost.  This is reasonable as long as all index types have zero or small
+ * startup cost, but we might have to work harder if any index types with
+ * nontrivial startup cost are ever invented.
  */
 static void
 best_or_subclause_indices(Query *root,
 						  RelOptInfo *rel,
 						  List *subclauses,
 						  List *indices,
-						  List **indexquals,	/* return value */
-						  List **indexids,		/* return value */
-						  Cost *cost)			/* return value */
+						  IndexPath *pathnode)
 {
 	List	   *slist;
 
-	*indexquals = NIL;
-	*indexids = NIL;
-	*cost = (Cost) 0.0;
+	pathnode->indexqual = NIL;
+	pathnode->indexid = NIL;
+	pathnode->path.startup_cost = 0;
+	pathnode->path.total_cost = 0;
 
 	foreach(slist, subclauses)
 	{
 		Expr	   *subclause = lfirst(slist);
 		List	   *best_indexqual;
 		Oid			best_indexid;
-		Cost		best_cost;
+		Cost		best_startup_cost;
+		Cost		best_total_cost;
 
 		best_or_subclause_index(root, rel, subclause, lfirst(indices),
-								&best_indexqual, &best_indexid, &best_cost);
+								&best_indexqual, &best_indexid,
+								&best_startup_cost, &best_total_cost);
 
 		Assert(best_indexid != InvalidOid);
 
-		*indexquals = lappend(*indexquals, best_indexqual);
-		*indexids = lappendi(*indexids, best_indexid);
-		*cost += best_cost;
+		pathnode->indexqual = lappend(pathnode->indexqual, best_indexqual);
+		pathnode->indexid = lappendi(pathnode->indexid, best_indexid);
+		if (slist == subclauses)		/* first scan? */
+			pathnode->path.startup_cost = best_startup_cost;
+		pathnode->path.total_cost += best_total_cost;
 
 		indices = lnext(indices);
 	}
@@ -182,16 +189,17 @@ best_or_subclause_indices(Query *root,
 
 /*
  * best_or_subclause_index
- *	  Determines which is the best index to be used with a subclause of
- *	  an 'or' clause by estimating the cost of using each index and selecting
- *	  the least expensive.
+ *	  Determines which is the best index to be used with a subclause of an
+ *	  'or' clause by estimating the cost of using each index and selecting
+ *	  the least expensive (considering total cost only, for now).
  *
  * 'rel' is the node of the relation on which the index is defined
  * 'subclause' is the OR subclause being considered
  * 'indices' is a list of IndexOptInfo nodes that match the subclause
  * '*retIndexQual' gets a list of the indexqual conditions for the best index
  * '*retIndexid' gets the OID of the best index
- * '*retCost' gets the cost of a scan with that index
+ * '*retStartupCost' gets the startup cost of a scan with that index
+ * '*retTotalCost' gets the total cost of a scan with that index
  */
 static void
 best_or_subclause_index(Query *root,
@@ -200,7 +208,8 @@ best_or_subclause_index(Query *root,
 						List *indices,
 						List **retIndexQual,	/* return value */
 						Oid *retIndexid,		/* return value */
-						Cost *retCost)			/* return value */
+						Cost *retStartupCost,	/* return value */
+						Cost *retTotalCost)		/* return value */
 {
 	bool		first_time = true;
 	List	   *ilist;
@@ -208,27 +217,28 @@ best_or_subclause_index(Query *root,
 	/* if we don't match anything, return zeros */
 	*retIndexQual = NIL;
 	*retIndexid = InvalidOid;
-	*retCost = 0.0;
+	*retStartupCost = 0;
+	*retTotalCost = 0;
 
 	foreach(ilist, indices)
 	{
 		IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
 		List	   *indexqual;
-		Cost		subcost;
+		Path		subclause_path;
 
 		Assert(IsA(index, IndexOptInfo));
 
 		/* Convert this 'or' subclause to an indexqual list */
 		indexqual = extract_or_indexqual_conditions(rel, index, subclause);
 
-		subcost = cost_index(root, rel, index, indexqual,
-							 false);
+		cost_index(&subclause_path, root, rel, index, indexqual, false);
 
-		if (first_time || subcost < *retCost)
+		if (first_time || subclause_path.total_cost < *retTotalCost)
 		{
 			*retIndexQual = indexqual;
 			*retIndexid = index->indexoid;
-			*retCost = subcost;
+			*retStartupCost = subclause_path.startup_cost;
+			*retTotalCost = subclause_path.total_cost;
 			first_time = false;
 		}
 	}
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 5aeda1e154e..b578e33f5c8 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.18 2000/01/26 05:56:34 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.19 2000/02/15 20:49:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -17,6 +17,7 @@
 #include "nodes/makefuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/joininfo.h"
+#include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/tlist.h"
 #include "optimizer/var.h"
@@ -25,9 +26,9 @@
 #include "utils/lsyscache.h"
 
 static PathKeyItem *makePathKeyItem(Node *key, Oid sortop);
-static Var *find_indexkey_var(int indexkey, List *tlist);
-static List *build_join_pathkey(List *pathkeys, List *join_rel_tlist,
-								List *joinclauses);
+static List *make_canonical_pathkey(Query *root, PathKeyItem *item);
+static Var *find_indexkey_var(Query *root, RelOptInfo *rel,
+							  AttrNumber varattno);
 
 
 /*--------------------
@@ -50,50 +51,122 @@ static List *build_join_pathkey(List *pathkeys, List *join_rel_tlist,
  *	Note that a multi-pass indexscan (OR clause scan) has NIL pathkeys since
  *	we can say nothing about the overall order of its result.  Also, an
  *	indexscan on an unordered type of index generates NIL pathkeys.  However,
- *	we can always create a pathkey by doing an explicit sort.
- *
- *	Multi-relation RelOptInfo Path's are more complicated.  Mergejoins are
- *	only performed with equijoins ("=").  Because of this, the resulting
- *	multi-relation path actually has more than one primary key.  For example,
- *	a mergejoin using a clause "tab1.col1 = tab2.col1" would generate pathkeys
- *	of ( (tab1.col1/sortop1 tab2.col1/sortop2) ), indicating that the major
- *	sort order of the Path can be taken to be *either* tab1.col1 or tab2.col1.
- *	They are equal, so they are both primary sort keys.  This allows future
- *	joins to use either var as a pre-sorted key to prevent upper Mergejoins
- *	from having to re-sort the Path.  This is why pathkeys is a List of Lists.
- *
- *	Note that while the order of the top list is meaningful (primary vs.
- *	secondary sort key), the order of each sublist is arbitrary.  No code
- *	working with pathkeys should generate a result that depends on the order
- *	of a pathkey sublist.
+ *	we can always create a pathkey by doing an explicit sort.  The pathkeys
+ *	for a sort plan's output just represent the sort key fields and the
+ *	ordering operators used.
+ *
+ *	Things get more interesting when we consider joins.  Suppose we do a
+ *	mergejoin between A and B using the mergeclause A.X = B.Y.  The output
+ *	of the mergejoin is sorted by X --- but it is also sorted by Y.  We
+ *	represent this fact by listing both keys in a single pathkey sublist:
+ *	( (A.X/xsortop B.Y/ysortop) ).  This pathkey asserts that the major
+ *	sort order of the Path can be taken to be *either* A.X or B.Y.
+ *	They are equal, so they are both primary sort keys.  By doing this,
+ *	we allow future joins to use either var as a pre-sorted key, so upper
+ *	Mergejoins may be able to avoid having to re-sort the Path.  This is
+ *	why pathkeys is a List of Lists.
  *
  *	We keep a sortop associated with each PathKeyItem because cross-data-type
- *	mergejoins are possible; for example int4=int8 is mergejoinable.  In this
- *	case we need to remember that the left var is ordered by int4lt while
- *	the right var is ordered by int8lt.  So the different members of each
- *	sublist could have different sortops.
- *
- *	When producing the pathkeys for a merge or nestloop join, we can keep
- *	all of the keys of the outer path, since the ordering of the outer path
- *	will be preserved in the result.  We add to each pathkey sublist any inner
- *	vars that are equijoined to any of the outer vars in the sublist.  In the
- *	nestloop case we have to be careful to consider only equijoin operators;
- *	the nestloop's join clauses might include non-equijoin operators.
- *	(Currently, we do this by considering only mergejoinable operators while
- *	making the pathkeys, since we have no separate marking for operators that
- *	are equijoins but aren't mergejoinable.)
+ *	mergejoins are possible; for example int4 = int8 is mergejoinable.
+ *	In this case we need to remember that the left var is ordered by int4lt
+ *	while the right var is ordered by int8lt.  So the different members of
+ *	each sublist could have different sortops.
+ *
+ *	Note that while the order of the top list is meaningful (primary vs.
+ *	secondary sort key), the order of each sublist is arbitrary.  Each sublist
+ *	should be regarded as a set of equivalent keys, with no significance
+ *	to the list order.
+ *
+ *	With a little further thought, it becomes apparent that pathkeys for
+ *	joins need not only come from mergejoins.  For example, if we do a
+ *	nestloop join between outer relation A and inner relation B, then any
+ *	pathkeys relevant to A are still valid for the join result: we have
+ *	not altered the order of the tuples from A.  Even more interesting,
+ *	if there was a mergeclause (more formally, an "equijoin clause") A.X=B.Y,
+ *	and A.X was a pathkey for the outer relation A, then we can assert that
+ *	B.Y is a pathkey for the join result; X was ordered before and still is,
+ *	and the joined values of Y are equal to the joined values of X, so Y
+ *	must now be ordered too.  This is true even though we used no mergejoin.
+ *
+ *	More generally, whenever we have an equijoin clause A.X = B.Y and a
+ *	pathkey A.X, we can add B.Y to that pathkey if B is part of the joined
+ *	relation the pathkey is for, *no matter how we formed the join*.
+ *
+ *	In short, then: when producing the pathkeys for a merge or nestloop join,
+ *	we can keep all of the keys of the outer path, since the ordering of the
+ *	outer path will be preserved in the result.  Furthermore, we can add to
+ *	each pathkey sublist any inner vars that are equijoined to any of the
+ *	outer vars in the sublist; this works regardless of whether we are
+ *	implementing the join using that equijoin clause as a mergeclause,
+ *	or merely enforcing the clause after-the-fact as a qpqual filter.
  *
  *	Although Hashjoins also work only with equijoin operators, it is *not*
  *	safe to consider the output of a Hashjoin to be sorted in any particular
  *	order --- not even the outer path's order.  This is true because the
  *	executor might have to split the join into multiple batches.  Therefore
- *	a Hashjoin is always given NIL pathkeys.
+ *	a Hashjoin is always given NIL pathkeys.  (Also, we need to use only
+ *	mergejoinable operators when deducing which inner vars are now sorted,
+ *	because a mergejoin operator tells us which left- and right-datatype
+ *	sortops can be considered equivalent, whereas a hashjoin operator
+ *	doesn't imply anything about sort order.)
  *
  *	Pathkeys are also useful to represent an ordering that we wish to achieve,
  *	since they are easily compared to the pathkeys of a potential candidate
  *	path.  So, SortClause lists are turned into pathkeys lists for use inside
  *	the optimizer.
  *
+ *	OK, now for how it *really* works:
+ *
+ *	We did implement pathkeys just as described above, and found that the
+ *	planner spent a huge amount of time comparing pathkeys, because the
+ *	representation of pathkeys as unordered lists made it expensive to decide
+ *	whether two were equal or not.  So, we've modified the representation
+ *	as described next.
+ *
+ *	If we scan the WHERE clause for equijoin clauses (mergejoinable clauses)
+ *	during planner startup, we can construct lists of equivalent pathkey items
+ *	for the query.  There could be more than two items per equivalence set;
+ *	for example, WHERE A.X = B.Y AND B.Y = C.Z AND D.R = E.S creates the
+ *	equivalence sets { A.X B.Y C.Z } and { D.R E.S } (plus associated sortops).
+ *	Any pathkey item that belongs to an equivalence set implies that all the
+ *	other items in its set apply to the relation too, or at least all the ones
+ *	that are for fields present in the relation.  (Some of the items in the
+ *	set might be for as-yet-unjoined relations.)  Furthermore, any multi-item
+ *	pathkey sublist that appears at any stage of planning the query *must* be
+ *	a subset of one or another of these equivalence sets; there's no way we'd
+ *	have put two items in the same pathkey sublist unless they were equijoined
+ *	in WHERE.
+ *
+ *	Now suppose that we allow a pathkey sublist to contain pathkey items for
+ *	vars that are not yet part of the pathkey's relation.  This introduces
+ *	no logical difficulty, because such items can easily be seen to be
+ *	irrelevant; we just mandate that they be ignored.  But having allowed
+ *	this, we can declare (by fiat) that any multiple-item pathkey sublist
+ *	must be equal() to the appropriate equivalence set.  In effect, whenever
+ *	we make a pathkey sublist that mentions any var appearing in an
+ *	equivalence set, we instantly add all the other vars equivalenced to it,
+ *	whether they appear yet in the pathkey's relation or not.  And we also
+ *	mandate that the pathkey sublist appear in the same order as the
+ *	equivalence set it comes from.  (In practice, we simply return a pointer
+ *	to the relevant equivalence set without building any new sublist at all.)
+ *	This makes comparing pathkeys very simple and fast, and saves a lot of
+ *	work and memory space for pathkey construction as well.
+ *
+ *	Note that pathkey sublists having just one item still exist, and are
+ *	not expected to be equal() to any equivalence set.  This occurs when
+ *	we describe a sort order that involves a var that's not mentioned in
+ *	any equijoin clause of the WHERE.  We could add singleton sets containing
+ *	such vars to the query's list of equivalence sets, but there's little
+ *	point in doing so.
+ *
+ *	By the way, it's OK and even useful for us to build equivalence sets
+ *	that mention multiple vars from the same relation.  For example, if
+ *	we have WHERE A.X = A.Y and we are scanning A using an index on X,
+ *	we can legitimately conclude that the path is sorted by Y as well;
+ *	and this could be handy if Y is the variable used in other join clauses
+ *	or ORDER BY.  So, any WHERE clause with a mergejoinable operator can
+ *	contribute to an equivalence set, even if it's not a join clause.
+ *
  *	-- bjm & tgl
  *--------------------
  */
@@ -113,6 +186,129 @@ makePathKeyItem(Node *key, Oid sortop)
 	return item;
 }
 
+/*
+ * add_equijoined_keys
+ *	  The given clause has a mergejoinable operator, so its two sides
+ *	  can be considered equal after restriction clause application; in
+ *	  particular, any pathkey mentioning one side (with the correct sortop)
+ *	  can be expanded to include the other as well.  Record the vars and
+ *	  associated sortops in the query's equi_key_list for future use.
+ *
+ * The query's equi_key_list field points to a list of sublists of PathKeyItem
+ * nodes, where each sublist is a set of two or more vars+sortops that have
+ * been identified as logically equivalent (and, therefore, we may consider
+ * any two in a set to be equal).  As described above, we will subsequently
+ * use direct pointers to one of these sublists to represent any pathkey
+ * that involves an equijoined variable.
+ *
+ * This code would actually work fine with expressions more complex than
+ * a single Var, but currently it won't see any because check_mergejoinable
+ * won't accept such clauses as mergejoinable.
+ */
+void
+add_equijoined_keys(Query *root, RestrictInfo *restrictinfo)
+{
+	Expr	   *clause = restrictinfo->clause;
+	PathKeyItem *item1 = makePathKeyItem((Node *) get_leftop(clause),
+										 restrictinfo->left_sortop);
+	PathKeyItem *item2 = makePathKeyItem((Node *) get_rightop(clause),
+										 restrictinfo->right_sortop);
+	List	   *newset,
+			   *cursetlink;
+
+	/* We might see a clause X=X; don't make a single-element list from it */
+	if (equal(item1, item2))
+		return;
+	/*
+	 * Our plan is to make a two-element set, then sweep through the existing
+	 * equijoin sets looking for matches to item1 or item2.  When we find one,
+	 * we remove that set from equi_key_list and union it into our new set.
+	 * When done, we add the new set to the front of equi_key_list.
+	 *
+	 * This is a standard UNION-FIND problem, for which there exist better
+	 * data structures than simple lists.  If this code ever proves to be
+	 * a bottleneck then it could be sped up --- but for now, simple is
+	 * beautiful.
+	 */
+	newset = lcons(item1, lcons(item2, NIL));
+
+	foreach(cursetlink, root->equi_key_list)
+	{
+		List	   *curset = lfirst(cursetlink);
+
+		if (member(item1, curset) || member(item2, curset))
+		{
+			/* Found a set to merge into our new set */
+			newset = LispUnion(newset, curset);
+			/* Remove old set from equi_key_list.  NOTE this does not change
+			 * lnext(cursetlink), so the outer foreach doesn't break.
+			 */
+			root->equi_key_list = lremove(curset, root->equi_key_list);
+			freeList(curset);	/* might as well recycle old cons cells */
+		}
+	}
+
+	root->equi_key_list = lcons(newset, root->equi_key_list);
+}
+
+/*
+ * make_canonical_pathkey
+ *	  Given a PathKeyItem, find the equi_key_list subset it is a member of,
+ *	  if any.  If so, return a pointer to that sublist, which is the
+ *	  canonical representation (for this query) of that PathKeyItem's
+ *	  equivalence set.  If it is not found, return a single-element list
+ *	  containing the PathKeyItem (when the item has no equivalence peers,
+ *	  we just allow it to be a standalone list).
+ *
+ * Note that this function must not be used until after we have completed
+ * scanning the WHERE clause for equijoin operators.
+ */
+static List *
+make_canonical_pathkey(Query *root, PathKeyItem *item)
+{
+	List	   *cursetlink;
+
+	foreach(cursetlink, root->equi_key_list)
+	{
+		List	   *curset = lfirst(cursetlink);
+
+		if (member(item, curset))
+			return curset;
+	}
+	return lcons(item, NIL);
+}
+
+/*
+ * canonicalize_pathkeys
+ *	   Convert a not-necessarily-canonical pathkeys list to canonical form.
+ *
+ * Note that this function must not be used until after we have completed
+ * scanning the WHERE clause for equijoin operators.
+ */
+List *
+canonicalize_pathkeys(Query *root, List *pathkeys)
+{
+	List	   *new_pathkeys = NIL;
+	List	   *i;
+
+	foreach(i, pathkeys)
+	{
+		List		   *pathkey = (List *) lfirst(i);
+		PathKeyItem	   *item;
+
+		/*
+		 * It's sufficient to look at the first entry in the sublist;
+		 * if there are more entries, they're already part of an
+		 * equivalence set by definition.
+		 */
+		Assert(pathkey != NIL);
+		item = (PathKeyItem *) lfirst(pathkey);
+		new_pathkeys = lappend(new_pathkeys,
+							   make_canonical_pathkey(root, item));
+	}
+	return new_pathkeys;
+}
+
 /****************************************************************************
  *		PATHKEY COMPARISONS
  ****************************************************************************/
@@ -126,15 +322,21 @@ makePathKeyItem(Node *key, Oid sortop)
  *	  it contains all the keys of the other plus more.  For example, either
  *	  ((A) (B)) or ((A B)) is better than ((A)).
  *
- *	This gets called a lot, so it is optimized.
+ *	  Because we actually only expect to see canonicalized pathkey sublists,
+ *	  we don't have to do the full two-way-subset-inclusion test on each
+ *	  pair of sublists that is implied by the above statement.  Instead we
+ *	  just do an equal().  In the normal case where multi-element sublists
+ *	  are pointers into the root's equi_key_list, equal() will be very fast:
+ *	  it will recognize pointer equality when the sublists are the same,
+ *	  and will fail at the first sublist element when they are not.
+ *
+ * Yes, this gets called enough to be worth coding it this tensely.
  */
 PathKeysComparison
 compare_pathkeys(List *keys1, List *keys2)
 {
 	List	   *key1,
 			   *key2;
-	bool		key1_subsetof_key2 = true,
-				key2_subsetof_key1 = true;
 
 	for (key1 = keys1, key2 = keys2;
 		 key1 != NIL && key2 != NIL;
@@ -142,36 +344,12 @@ compare_pathkeys(List *keys1, List *keys2)
 	{
 		List	   *subkey1 = lfirst(key1);
 		List	   *subkey2 = lfirst(key2);
-		List	   *i;
 
-		/* We have to do this the hard way since the ordering of the subkey
-		 * lists is arbitrary.
+		/* We will never have two subkeys where one is a subset of the other,
+		 * because of the canonicalization explained above.  Either they are
+		 * equal or they ain't.
 		 */
-		if (key1_subsetof_key2)
-		{
-			foreach(i, subkey1)
-			{
-				if (! member(lfirst(i), subkey2))
-				{
-					key1_subsetof_key2 = false;
-					break;
-				}
-			}
-		}
-
-		if (key2_subsetof_key1)
-		{
-			foreach(i, subkey2)
-			{
-				if (! member(lfirst(i), subkey1))
-				{
-					key2_subsetof_key1 = false;
-					break;
-				}
-			}
-		}
-
-		if (!key1_subsetof_key2 && !key2_subsetof_key1)
+		if (! equal(subkey1, subkey2))
 			return PATHKEYS_DIFFERENT; /* no need to keep looking */
 	}
 
@@ -180,18 +358,11 @@ compare_pathkeys(List *keys1, List *keys2)
 	 * of the other list are not NIL --- no pathkey list should ever have
 	 * a NIL sublist.)
 	 */
-	if (key1 != NIL)
-		key1_subsetof_key2 = false;
-	if (key2 != NIL)
-		key2_subsetof_key1 = false;
-
-	if (key1_subsetof_key2 && key2_subsetof_key1)
+	if (key1 == NIL && key2 == NIL)
 		return PATHKEYS_EQUAL;
-	if (key1_subsetof_key2)
-		return PATHKEYS_BETTER2;
-	if (key2_subsetof_key1)
-		return PATHKEYS_BETTER1;
-	return PATHKEYS_DIFFERENT;
+	if (key1 != NIL)
+		return PATHKEYS_BETTER1; /* key1 is longer */
+	return PATHKEYS_BETTER2;	/* key2 is longer */
 }
 
 /*
@@ -215,16 +386,16 @@ pathkeys_contained_in(List *keys1, List *keys2)
 
 /*
  * get_cheapest_path_for_pathkeys
- *	  Find the cheapest path in 'paths' that satisfies the given pathkeys.
- *	  Return NULL if no such path.
+ *	  Find the cheapest path (according to the specified criterion) that
+ *	  satisfies the given pathkeys.  Return NULL if no such path.
  *
- * 'paths' is a list of possible paths (either inner or outer)
- * 'pathkeys' represents a required ordering
- * if 'indexpaths_only' is true, only IndexPaths will be considered.
+ * 'paths' is a list of possible paths that all generate the same relation
+ * 'pathkeys' represents a required ordering (already canonicalized!)
+ * 'cost_criterion' is STARTUP_COST or TOTAL_COST
  */
 Path *
 get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
-							   bool indexpaths_only)
+							   CostSelector cost_criterion)
 {
 	Path	   *matched_path = NULL;
 	List	   *i;
@@ -233,15 +404,55 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
 	{
 		Path	   *path = (Path *) lfirst(i);
 
-		if (indexpaths_only && ! IsA(path, IndexPath))
+		/*
+		 * Since cost comparison is a lot cheaper than pathkey comparison,
+		 * do that first.  (XXX is that still true?)
+		 */
+		if (matched_path != NULL &&
+			compare_path_costs(matched_path, path, cost_criterion) <= 0)
 			continue;
 
 		if (pathkeys_contained_in(pathkeys, path->pathkeys))
-		{
-			if (matched_path == NULL ||
-				path->path_cost < matched_path->path_cost)
-				matched_path = path;
-		}
+			matched_path = path;
+	}
+	return matched_path;
+}
+
+/*
+ * get_cheapest_fractional_path_for_pathkeys
+ *	  Find the cheapest path (for retrieving a specified fraction of all
+ *	  the tuples) that satisfies the given pathkeys.
+ *	  Return NULL if no such path.
+ *
+ * See compare_fractional_path_costs() for the interpretation of the fraction
+ * parameter.
+ *
+ * 'paths' is a list of possible paths that all generate the same relation
+ * 'pathkeys' represents a required ordering (already canonicalized!)
+ * 'fraction' is the fraction of the total tuples expected to be retrieved
+ */
+Path *
+get_cheapest_fractional_path_for_pathkeys(List *paths,
+										  List *pathkeys,
+										  double fraction)
+{
+	Path	   *matched_path = NULL;
+	List	   *i;
+
+	foreach(i, paths)
+	{
+		Path	   *path = (Path *) lfirst(i);
+
+		/*
+		 * Since cost comparison is a lot cheaper than pathkey comparison,
+		 * do that first.
+		 */
+		if (matched_path != NULL &&
+			compare_fractional_path_costs(matched_path, path, fraction) <= 0)
+			continue;
+
+		if (pathkeys_contained_in(pathkeys, path->pathkeys))
+			matched_path = path;
 	}
 	return matched_path;
 }
@@ -255,18 +466,22 @@ get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
  *	  Build a pathkeys list that describes the ordering induced by an index
  *	  scan using the given index.  (Note that an unordered index doesn't
  *	  induce any ordering; such an index will have no sortop OIDS in
- *	  its "ordering" field.)
+ *	  its "ordering" field, and we will return NIL.)
  *
- * Vars in the resulting pathkeys list are taken from the rel's targetlist.
- * If we can't find the indexkey in the targetlist, we assume that the
- * ordering of that key is not interesting.
+ * If 'scandir' is BackwardScanDirection, attempt to build pathkeys
+ * representing a backwards scan of the index.  Return NIL if can't do it.
  */
 List *
-build_index_pathkeys(Query *root, RelOptInfo *rel, IndexOptInfo *index)
+build_index_pathkeys(Query *root,
+					 RelOptInfo *rel,
+					 IndexOptInfo *index,
+					 ScanDirection scandir)
 {
 	List	   *retval = NIL;
 	int		   *indexkeys = index->indexkeys;
 	Oid		   *ordering = index->ordering;
+	PathKeyItem *item;
+	Oid			sortop;
 
 	if (!indexkeys || indexkeys[0] == 0 ||
 		!ordering || ordering[0] == InvalidOid)
@@ -275,8 +490,6 @@ build_index_pathkeys(Query *root, RelOptInfo *rel, IndexOptInfo *index)
 	if (index->indproc)
 	{
 		/* Functional index: build a representation of the function call */
-		int			relid = lfirsti(rel->relids);
-		Oid			reloid = getrelid(relid, root->rtable);
 		Func	   *funcnode = makeNode(Func);
 		List	   *funcargs = NIL;
 
@@ -291,43 +504,42 @@ build_index_pathkeys(Query *root, RelOptInfo *rel, IndexOptInfo *index)
 
 		while (*indexkeys != 0)
 		{
-			int			varattno = *indexkeys;
-			Oid			vartypeid = get_atttype(reloid, varattno);
-			int32		type_mod = get_atttypmod(reloid, varattno);
-
 			funcargs = lappend(funcargs,
-							   makeVar(relid, varattno, vartypeid,
-									   type_mod, 0));
+							   find_indexkey_var(root, rel, *indexkeys));
 			indexkeys++;
 		}
 
+		sortop = *ordering;
+		if (ScanDirectionIsBackward(scandir))
+		{
+			sortop = get_commutator(sortop);
+			if (sortop == InvalidOid)
+				return NIL;		/* oops, no reverse sort operator? */
+		}
+
 		/* Make a one-sublist pathkeys list for the function expression */
-		retval = lcons(lcons(
-			makePathKeyItem((Node *) make_funcclause(funcnode, funcargs),
-							*ordering),
-			NIL), NIL);
+		item = makePathKeyItem((Node *) make_funcclause(funcnode, funcargs),
+							   sortop);
+		retval = lcons(make_canonical_pathkey(root, item), NIL);
 	}
 	else
 	{
 		/* Normal non-functional index */
-		List	   *rel_tlist = rel->targetlist;
-
 		while (*indexkeys != 0 && *ordering != InvalidOid)
 		{
-			Var		*relvar = find_indexkey_var(*indexkeys, rel_tlist);
+			Var		*relvar = find_indexkey_var(root, rel, *indexkeys);
 
-			/* If we can find no tlist entry for the n'th sort key,
-			 * then we're done generating pathkeys; any subsequent sort keys
-			 * no longer apply, since we can't represent the ordering properly
-			 * even if there are tlist entries for them.
-			 */
-			if (!relvar)
-				break;
-			/* OK, make a one-element sublist for this sort key */
-			retval = lappend(retval,
-							 lcons(makePathKeyItem((Node *) relvar,
-												   *ordering),
-								   NIL));
+			sortop = *ordering;
+			if (ScanDirectionIsBackward(scandir))
+			{
+				sortop = get_commutator(sortop);
+				if (sortop == InvalidOid)
+					break;		/* oops, no reverse sort operator? */
+			}
+
+			/* OK, make a sublist for this sort key */
+			item = makePathKeyItem((Node *) relvar, sortop);
+			retval = lappend(retval, make_canonical_pathkey(root, item));
 
 			indexkeys++;
 			ordering++;
@@ -338,21 +550,37 @@ build_index_pathkeys(Query *root, RelOptInfo *rel, IndexOptInfo *index)
 }
 
 /*
- * Find a var in a relation's targetlist that matches an indexkey attrnum.
+ * Find or make a Var node for the specified attribute of the rel.
+ *
+ * We first look for the var in the rel's target list, because that's
+ * easy and fast.  But the var might not be there (this should normally
+ * only happen for vars that are used in WHERE restriction clauses,
+ * but not in join clauses or in the SELECT target list).  In that case,
+ * gin up a Var node the hard way.
  */
 static Var *
-find_indexkey_var(int indexkey, List *tlist)
+find_indexkey_var(Query *root, RelOptInfo *rel, AttrNumber varattno)
 {
 	List	   *temp;
+	int			relid;
+	Oid			reloid,
+				vartypeid;
+	int32		type_mod;
 
-	foreach(temp, tlist)
+	foreach(temp, rel->targetlist)
 	{
 		Var	   *tle_var = get_expr(lfirst(temp));
 
-		if (IsA(tle_var, Var) && tle_var->varattno == indexkey)
+		if (IsA(tle_var, Var) && tle_var->varattno == varattno)
 			return tle_var;
 	}
-	return NULL;
+
+	relid = lfirsti(rel->relids);
+	reloid = getrelid(relid, root->rtable);
+	vartypeid = get_atttype(reloid, varattno);
+	type_mod = get_atttypmod(reloid, varattno);
+
+	return makeVar(relid, varattno, vartypeid, type_mod, 0);
 }
 
 /*
@@ -360,164 +588,33 @@ find_indexkey_var(int indexkey, List *tlist)
  *	  Build the path keys for a join relation constructed by mergejoin or
  *	  nestloop join.  These keys should include all the path key vars of the
  *	  outer path (since the join will retain the ordering of the outer path)
- *	  plus any vars of the inner path that are mergejoined to the outer vars.
+ *	  plus any vars of the inner path that are equijoined to the outer vars.
  *
- *	  Per the discussion at the top of this file, mergejoined inner vars
+ *	  Per the discussion at the top of this file, equijoined inner vars
  *	  can be considered path keys of the result, just the same as the outer
- *	  vars they were joined with.
- *
- *	  We can also use inner path vars as pathkeys of a nestloop join, but we
- *	  must be careful that we only consider equijoin clauses and not general
- *	  join clauses.  For example, "t1.a < t2.b" might be a join clause of a
- *	  nestloop, but it doesn't result in b acquiring the ordering of a!
- *	  joinpath.c handles that problem by only passing this routine clauses
- *	  that are marked mergejoinable, even if a nestloop join is being built.
- *	  Therefore we only have 't1.a = t2.b' style clauses, and can expect that
- *	  the inner var will acquire the outer's ordering no matter which join
- *	  method is actually used.
- *
- *	  We drop pathkeys that are not vars of the join relation's tlist,
- *	  on the assumption that they are not interesting to higher levels.
- *	  (Is this correct??  To support expression pathkeys we might want to
- *	  check that all vars mentioned in the key are in the tlist, instead.)
- *
- * All vars in the result are taken from the join relation's tlist,
- * not from the given pathkeys or joinclauses.
+ *	  vars they were joined with; furthermore, it doesn't matter what kind
+ *	  of join algorithm is actually used.
  *
  * 'outer_pathkeys' is the list of the outer path's path keys
  * 'join_rel_tlist' is the target list of the join relation
- * 'joinclauses' is the list of mergejoinable clauses to consider (note this
- *		is a list of RestrictInfos, not just bare qual clauses); can be NIL
+ * 'equi_key_list' is the query's list of pathkeyitem equivalence sets
  *
  * Returns the list of new path keys.
- *
  */
 List *
 build_join_pathkeys(List *outer_pathkeys,
 					List *join_rel_tlist,
-					List *joinclauses)
+					List *equi_key_list)
 {
-	List	   *final_pathkeys = NIL;
-	List	   *i;
-
-	foreach(i, outer_pathkeys)
-	{
-		List	   *outer_pathkey = lfirst(i);
-		List	   *new_pathkey;
-
-		new_pathkey = build_join_pathkey(outer_pathkey, join_rel_tlist,
-										 joinclauses);
-		/* if we can find no sortable vars for the n'th sort key,
-		 * then we're done generating pathkeys; any subsequent sort keys
-		 * no longer apply, since we can't represent the ordering properly.
-		 */
-		if (new_pathkey == NIL)
-			break;
-		final_pathkeys = lappend(final_pathkeys, new_pathkey);
-	}
-	return final_pathkeys;
-}
-
-/*
- * build_join_pathkey
- *	  Generate an individual pathkey sublist, consisting of the outer vars
- *	  already mentioned in 'pathkey' plus any inner vars that are joined to
- *	  them (and thus can now also be considered path keys, per discussion
- *	  at the top of this file).
- *
- *	  Note that each returned pathkey uses the var node found in
- *	  'join_rel_tlist' rather than the input pathkey or joinclause var node.
- *	  (Is this important?)
- *
- * Returns a new pathkey (list of PathKeyItems).
- */
-static List *
-build_join_pathkey(List *pathkey,
-				   List *join_rel_tlist,
-				   List *joinclauses)
-{
-	List	   *new_pathkey = NIL;
-	List	   *i,
-			   *j;
-
-	foreach(i, pathkey)
-	{
-		PathKeyItem *key = (PathKeyItem *) lfirst(i);
-		Node	   *tlist_key;
-
-		Assert(key && IsA(key, PathKeyItem));
-
-		tlist_key = matching_tlist_expr(key->key, join_rel_tlist);
-		if (tlist_key)
-			new_pathkey = lcons(makePathKeyItem(tlist_key,
-												key->sortop),
-								new_pathkey);
-
-		foreach(j, joinclauses)
-		{
-			RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
-			Expr	   *joinclause = restrictinfo->clause;
-			/* We assume the clause is a binary opclause... */
-			Node	   *l = (Node *) get_leftop(joinclause);
-			Node	   *r = (Node *) get_rightop(joinclause);
-			Node	   *other_var = NULL;
-			Oid			other_sortop = InvalidOid;
-
-			if (equal(key->key, l))
-			{
-				other_var = r;
-				other_sortop = restrictinfo->right_sortop;
-			}
-			else if (equal(key->key, r))
-			{
-				other_var = l;
-				other_sortop = restrictinfo->left_sortop;
-			}
-
-			if (other_var && other_sortop)
-			{
-				tlist_key = matching_tlist_expr(other_var, join_rel_tlist);
-				if (tlist_key)
-					new_pathkey = lcons(makePathKeyItem(tlist_key,
-														other_sortop),
-										new_pathkey);
-			}
-		}
-	}
-
-	return new_pathkey;
-}
-
-/*
- * commute_pathkeys
- *		Attempt to commute the operators in a set of pathkeys, producing
- *		pathkeys that describe the reverse sort order (DESC instead of ASC).
- *		Returns TRUE if successful (all the operators have commutators).
- *
- * CAUTION: given pathkeys are modified in place, even if not successful!!
- * Usually, caller should have just built or copied the pathkeys list to
- * ensure there are no unwanted side-effects.
- */
-bool
-commute_pathkeys(List *pathkeys)
-{
-	List	   *i;
-
-	foreach(i, pathkeys)
-	{
-		List	   *pathkey = lfirst(i);
-		List	   *j;
-
-		foreach(j, pathkey)
-		{
-			PathKeyItem	   *key = lfirst(j);
-
-			key->sortop = get_commutator(key->sortop);
-			if (key->sortop == InvalidOid)
-				return false;
-		}
-	}
-	return true;				/* successful */
+	/*
+	 * This used to be quite a complex bit of code, but now that all
+	 * pathkey sublists start out life canonicalized, we don't have to
+	 * do a darn thing here!  The inner-rel vars we used to need to add
+	 * are *already* part of the outer pathkey!
+	 *
+	 * I'd remove the routine entirely, but maybe someday we'll need it...
+	 */
+	return outer_pathkeys;
 }
 
 /****************************************************************************
@@ -529,11 +626,18 @@ commute_pathkeys(List *pathkeys)
  *		Generate a pathkeys list that represents the sort order specified
  *		by a list of SortClauses (GroupClauses will work too!)
  *
+ * NB: the result is NOT in canonical form, but must be passed through
+ * canonicalize_pathkeys() before it can be used for comparisons or
+ * labeling relation sort orders.  (We do things this way because
+ * union_planner needs to be able to construct requested pathkeys before
+ * the pathkey equivalence sets have been created for the query.)
+ *
  * 'sortclauses' is a list of SortClause or GroupClause nodes
  * 'tlist' is the targetlist to find the referenced tlist entries in
  */
 List *
-make_pathkeys_for_sortclauses(List *sortclauses, List *tlist)
+make_pathkeys_for_sortclauses(List *sortclauses,
+							  List *tlist)
 {
 	List	   *pathkeys = NIL;
 	List	   *i;
@@ -546,7 +650,11 @@ make_pathkeys_for_sortclauses(List *sortclauses, List *tlist)
 
 		sortkey = get_sortgroupclause_expr(sortcl, tlist);
 		pathkey = makePathKeyItem(sortkey, sortcl->sortop);
-		/* pathkey becomes a one-element sublist */
+		/*
+		 * The pathkey becomes a one-element sublist, for now;
+		 * canonicalize_pathkeys() might replace it with a longer
+		 * sublist later.
+		 */
 		pathkeys = lappend(pathkeys, lcons(pathkey, NIL));
 	}
 	return pathkeys;
@@ -599,6 +707,7 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos)
 		{
 			PathKeyItem	   *keyitem = lfirst(j);
 			Node		   *key = keyitem->key;
+			Oid				keyop = keyitem->sortop;
 			List		   *k;
 
 			foreach(k, restrictinfos)
@@ -607,8 +716,10 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos)
 
 				Assert(restrictinfo->mergejoinoperator != InvalidOid);
 
-				if ((equal(key, get_leftop(restrictinfo->clause)) ||
-					 equal(key, get_rightop(restrictinfo->clause))) &&
+				if (((keyop == restrictinfo->left_sortop &&
+					  equal(key, get_leftop(restrictinfo->clause))) ||
+					 (keyop == restrictinfo->right_sortop &&
+					  equal(key, get_rightop(restrictinfo->clause)))) &&
 					! member(restrictinfo, mergeclauses))
 				{
 					matched_restrictinfo = restrictinfo;
@@ -645,7 +756,7 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos)
  * 'mergeclauses' is a list of RestrictInfos for mergejoin clauses
  *			that will be used in a merge join.
  * 'tlist' is a relation target list for either the inner or outer
- *			side of the proposed join rel.
+ *			side of the proposed join rel.  (Not actually needed anymore)
  *
  * Returns a pathkeys list that can be applied to the indicated relation.
  *
@@ -654,7 +765,9 @@ find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos)
  * just make the keys, eh?
  */
 List *
-make_pathkeys_for_mergeclauses(List *mergeclauses, List *tlist)
+make_pathkeys_for_mergeclauses(Query *root,
+							   List *mergeclauses,
+							   List *tlist)
 {
 	List	   *pathkeys = NIL;
 	List	   *i;
@@ -664,32 +777,24 @@ make_pathkeys_for_mergeclauses(List *mergeclauses, List *tlist)
 		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
 		Node	   *key;
 		Oid			sortop;
+		PathKeyItem *item;
 
 		Assert(restrictinfo->mergejoinoperator != InvalidOid);
 
 		/*
 		 * Find the key and sortop needed for this mergeclause.
 		 *
-		 * We can use either side of the mergeclause, since we haven't yet
-		 * committed to which side will be inner.
+		 * Both sides of the mergeclause should appear in one of the
+		 * query's pathkey equivalence classes, so it doesn't matter
+		 * which one we use here.
 		 */
-		key = matching_tlist_expr((Node *) get_leftop(restrictinfo->clause),
-								  tlist);
+		key = (Node *) get_leftop(restrictinfo->clause);
 		sortop = restrictinfo->left_sortop;
-		if (! key)
-		{
-			key = matching_tlist_expr((Node *) get_rightop(restrictinfo->clause),
-									  tlist);
-			sortop = restrictinfo->right_sortop;
-		}
-		if (! key)
-			elog(ERROR, "make_pathkeys_for_mergeclauses: can't find key");
 		/*
 		 * Add a pathkey sublist for this sort item
 		 */
-		pathkeys = lappend(pathkeys,
-						   lcons(makePathKeyItem(key, sortop),
-								 NIL));
+		item = makePathKeyItem(key, sortop);
+		pathkeys = lappend(pathkeys, make_canonical_pathkey(root, item));
 	}
 
 	return pathkeys;
diff --git a/src/backend/optimizer/path/tidpath.c b/src/backend/optimizer/path/tidpath.c
index ab0427ef322..1e7dc43473b 100644
--- a/src/backend/optimizer/path/tidpath.c
+++ b/src/backend/optimizer/path/tidpath.c
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.4 2000/02/07 04:40:59 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/tidpath.c,v 1.5 2000/02/15 20:49:17 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -36,7 +36,7 @@
 #include "parser/parsetree.h"
 #include "utils/lsyscache.h"
 
-static List	*create_tidscan_joinpaths(RelOptInfo *);
+static void create_tidscan_joinpaths(RelOptInfo *rel);
 static List	*TidqualFromRestrictinfo(List *relids, List *restrictinfo);
 static bool	isEvaluable(int varno, Node *node);
 static Node	*TidequalClause(int varno, Expr *node);
@@ -234,61 +234,54 @@ TidqualFromRestrictinfo(List *relids, List *restrictinfo)
 
 /*
  * create_tidscan_joinpaths
- *	  Creates a path corresponding to a tid_direct scan, returning the
- *	  pathnode.
+ *	  Create innerjoin paths if there are suitable joinclauses.
  *
+ * XXX does this actually work?
  */
-List *
+static void
 create_tidscan_joinpaths(RelOptInfo *rel)
 {
 	List		*rlst = NIL,
 				*lst;
-	TidPath		*pathnode = (TidPath *) NULL;
-	List		*restinfo,
-				*tideval;
 
 	foreach (lst, rel->joininfo)
 	{
-		JoinInfo   *joininfo = (JoinInfo *)lfirst(lst);
+		JoinInfo   *joininfo = (JoinInfo *) lfirst(lst);
+		List		*restinfo,
+					*tideval;
 
 		restinfo = joininfo->jinfo_restrictinfo;
 		tideval = TidqualFromRestrictinfo(rel->relids, restinfo);
 		if (length(tideval) == 1)
 		{
-			pathnode = makeNode(TidPath);
+			TidPath		*pathnode = makeNode(TidPath);
 
 			pathnode->path.pathtype = T_TidScan;
 			pathnode->path.parent = rel;
 			pathnode->path.pathkeys = NIL;
-			pathnode->path.path_cost = cost_tidscan(rel, tideval);
 			pathnode->tideval = tideval;
 			pathnode->unjoined_relids = joininfo->unjoined_relids;
+
+			cost_tidscan(&pathnode->path, rel, tideval);
+
 			rlst = lappend(rlst, pathnode);
 		}
 	}
 	rel->innerjoin = nconc(rel->innerjoin, rlst);
-	return rlst;
 }
 
 /*
  * create_tidscan_paths
- *	  Creates a path corresponding to a tid direct scan, returning the
- *	  pathnode List.
- *
+ *	  Creates paths corresponding to tid direct scans of the given rel.
+ *	  Candidate paths are added to the rel's pathlist (using add_path).
  */
-List *
+void
 create_tidscan_paths(Query *root, RelOptInfo *rel)
 {
-	List	*rlst = NIL;
-	TidPath	*pathnode = (TidPath *) NULL;
 	List	*tideval = TidqualFromRestrictinfo(rel->relids,
 											   rel->baserestrictinfo);
 	
 	if (tideval)
-		pathnode = create_tidscan_path(rel, tideval);
-	if (pathnode)
-		rlst = lcons(pathnode, rlst);
+		add_path(rel, (Path *) create_tidscan_path(rel, tideval));
 	create_tidscan_joinpaths(rel);
-
-	return rlst;
 }