5 files changed, 91 insertions, 77 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 4b050d62ec6..a5074296065 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -3,14 +3,19 @@
  * costsize.c
  *	  Routines to compute (and set) relation sizes and path costs
  *
- * Path costs are measured in units of disk accesses: one sequential page
- * fetch has cost 1.  All else is scaled relative to a page fetch, using
- * the scaling parameters
+ * Path costs are measured in arbitrary units established by these basic
+ * parameters:
  *
+ *	seq_page_cost		Cost of a sequential page fetch
  *	random_page_cost	Cost of a non-sequential page fetch
  *	cpu_tuple_cost		Cost of typical CPU time to process a tuple
  *	cpu_index_tuple_cost  Cost of typical CPU time to process an index tuple
- *	cpu_operator_cost	Cost of CPU time to process a typical WHERE operator
+ *	cpu_operator_cost	Cost of CPU time to execute an operator or function
+ *
+ * We expect that the kernel will typically do some amount of read-ahead
+ * optimization; this in conjunction with seek costs means that seq_page_cost
+ * is normally considerably less than random_page_cost.  (However, if the
+ * database is fully cached in RAM, it is reasonable to set them equal.)
  *
  * We also use a rough estimate "effective_cache_size" of the number of
  * disk pages in Postgres + OS-level disk cache.  (We can't simply use
@@ -49,7 +54,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.155 2006/03/05 15:58:28 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.156 2006/06/05 02:49:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -85,12 +90,14 @@
 	 (path)->parent->rows)
 
 
-double		effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
+double		seq_page_cost = DEFAULT_SEQ_PAGE_COST;
 double		random_page_cost = DEFAULT_RANDOM_PAGE_COST;
 double		cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
 double		cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
 double		cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
 
+double		effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
+
 Cost		disable_cost = 100000000.0;
 
 bool		enable_seqscan = true;
@@ -156,14 +163,8 @@ cost_seqscan(Path *path, PlannerInfo *root,
 
 	/*
 	 * disk costs
-	 *
-	 * The cost of reading a page sequentially is 1.0, by definition. Note
-	 * that the Unix kernel will typically do some amount of read-ahead
-	 * optimization, so that this cost is less than the true cost of reading a
-	 * page from disk.	We ignore that issue here, but must take it into
-	 * account when estimating the cost of non-sequential accesses!
 	 */
-	run_cost += baserel->pages; /* sequential fetches with cost 1.0 */
+	run_cost += seq_page_cost * baserel->pages;
 
 	/* CPU costs */
 	startup_cost += baserel->baserestrictcost.startup;
@@ -194,20 +195,21 @@ cost_seqscan(Path *path, PlannerInfo *root,
  * with the entirely ad-hoc equations (writing relsize for
  * relpages/effective_cache_size):
  *	if relsize >= 1:
- *		random_page_cost - (random_page_cost-1)/2 * (1/relsize)
+ *		random_page_cost - (random_page_cost-seq_page_cost)/2 * (1/relsize)
  *	if relsize < 1:
- *		1 + ((random_page_cost-1)/2) * relsize ** 2
- * These give the right asymptotic behavior (=> 1.0 as relpages becomes
- * small, => random_page_cost as it becomes large) and meet in the middle
- * with the estimate that the cache is about 50% effective for a relation
- * of the same size as effective_cache_size.  (XXX this is probably all
- * wrong, but I haven't been able to find any theory about how effective
+ *		seq_page_cost + ((random_page_cost-seq_page_cost)/2) * relsize ** 2
+ * These give the right asymptotic behavior (=> seq_page_cost as relpages
+ * becomes small, => random_page_cost as it becomes large) and meet in the
+ * middle with the estimate that the cache is about 50% effective for a
+ * relation of the same size as effective_cache_size.  (XXX this is probably
+ * all wrong, but I haven't been able to find any theory about how effective
  * a disk cache should be presumed to be.)
  */
 static Cost
 cost_nonsequential_access(double relpages)
 {
 	double		relsize;
+	double		random_delta;
 
 	/* don't crash on bad input data */
 	if (relpages <= 0.0 || effective_cache_size <= 0.0)
@@ -215,19 +217,17 @@ cost_nonsequential_access(double relpages)
 
 	relsize = relpages / effective_cache_size;
 
+	random_delta = (random_page_cost - seq_page_cost) * 0.5;
 	if (relsize >= 1.0)
-		return random_page_cost - (random_page_cost - 1.0) * 0.5 / relsize;
+		return random_page_cost - random_delta / relsize;
 	else
-		return 1.0 + (random_page_cost - 1.0) * 0.5 * relsize * relsize;
+		return seq_page_cost + random_delta * relsize * relsize;
 }
 
 /*
  * cost_index
  *	  Determines and returns the cost of scanning a relation using an index.
  *
- *	  NOTE: an indexscan plan node can actually represent several passes,
- *	  but here we consider the cost of just one pass.
- *
  * 'index' is the index to be used
  * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics)
  * 'is_injoin' is T if we are considering using the index scan as the inside
@@ -327,9 +327,9 @@ cost_index(IndexPath *path, PlannerInfo *root,
 	 * be just sT.	What's more, these will be sequential fetches, not the
 	 * random fetches that occur in the uncorrelated case.	So, depending on
 	 * the extent of correlation, we should estimate the actual I/O cost
-	 * somewhere between s * T * 1.0 and PF * random_cost.	We currently
-	 * interpolate linearly between these two endpoints based on the
-	 * correlation squared (XXX is that appropriate?).
+	 * somewhere between s * T * seq_page_cost and PF * random_page_cost.
+	 * We currently interpolate linearly between these two endpoints based on
+	 * the correlation squared (XXX is that appropriate?).
 	 *
 	 * In any case the number of tuples fetched is Ns.
 	 *----------
@@ -346,8 +346,10 @@ cost_index(IndexPath *path, PlannerInfo *root,
 	{
 		pages_fetched =
 			(2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
-		if (pages_fetched > T)
+		if (pages_fetched >= T)
 			pages_fetched = T;
+		else
+			pages_fetched = ceil(pages_fetched);
 	}
 	else
 	{
@@ -364,6 +366,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
 			pages_fetched =
 				b + (tuples_fetched - lim) * (T - b) / T;
 		}
+		pages_fetched = ceil(pages_fetched);
 	}
 
 	/*
@@ -373,7 +376,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
 	 * rather than using cost_nonsequential_access, since we've already
 	 * accounted for caching effects by using the Mackert model.
 	 */
-	min_IO_cost = ceil(indexSelectivity * T);
+	min_IO_cost = ceil(indexSelectivity * T) * seq_page_cost;
 	max_IO_cost = pages_fetched * random_page_cost;
 
 	/*
@@ -461,19 +464,21 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
 
 	T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
 	pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
-	if (pages_fetched > T)
+	if (pages_fetched >= T)
 		pages_fetched = T;
+	else
+		pages_fetched = ceil(pages_fetched);
 
 	/*
 	 * For small numbers of pages we should charge random_page_cost apiece,
 	 * while if nearly all the table's pages are being read, it's more
-	 * appropriate to charge 1.0 apiece.  The effect is nonlinear, too. For
-	 * lack of a better idea, interpolate like this to determine the cost per
-	 * page.
+	 * appropriate to charge seq_page_cost apiece.  The effect is nonlinear,
+	 * too. For lack of a better idea, interpolate like this to determine the
+	 * cost per page.
 	 */
 	if (pages_fetched >= 2.0)
 		cost_per_page = random_page_cost -
-			(random_page_cost - 1.0) * sqrt(pages_fetched / T);
+			(random_page_cost - seq_page_cost) * sqrt(pages_fetched / T);
 	else
 		cost_per_page = random_page_cost;
 
@@ -833,9 +838,9 @@ cost_sort(Path *path, PlannerInfo *root,
 		else
 			log_runs = 1.0;
 		npageaccesses = 2.0 * npages * log_runs;
-		/* Assume half are sequential (cost 1), half are not */
+		/* Assume half are sequential, half are not */
 		startup_cost += npageaccesses *
-			(1.0 + cost_nonsequential_access(npages)) * 0.5;
+			(seq_page_cost + cost_nonsequential_access(npages)) * 0.5;
 	}
 
 	/*
@@ -871,8 +876,8 @@ cost_material(Path *path,
 		double		npages = ceil(nbytes / BLCKSZ);
 
 		/* We'll write during startup and read during retrieval */
-		startup_cost += npages;
-		run_cost += npages;
+		startup_cost += seq_page_cost * npages;
+		run_cost += seq_page_cost * npages;
 	}
 
 	/*
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 5aeadce0917..c75ceef3732 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.205 2006/05/02 11:28:55 teodor Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.206 2006/06/05 02:49:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -4555,9 +4555,9 @@ genericcostestimate(PlannerInfo *root,
 	 * Compute the index access cost.
 	 *
 	 * Disk cost: our generic assumption is that the index pages will be read
-	 * sequentially, so they have cost 1.0 each, not random_page_cost.
+	 * sequentially, so they cost seq_page_cost each, not random_page_cost.
 	 */
-	*indexTotalCost = numIndexPages;
+	*indexTotalCost = seq_page_cost * numIndexPages;
 
 	/*
 	 * CPU cost: any complex expressions in the indexquals will need to be
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 1ff2e0c8318..9ef561d4c12 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -10,7 +10,7 @@
  * Written by Peter Eisentraut <peter_e@gmx.net>.
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.320 2006/05/21 20:10:42 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.321 2006/06/05 02:49:58 tgl Exp $
  *
  *--------------------------------------------------------------------
  */
@@ -1595,57 +1595,63 @@ static struct config_int ConfigureNamesInt[] =
 static struct config_real ConfigureNamesReal[] =
 {
 	{
-		{"effective_cache_size", PGC_USERSET, QUERY_TUNING_COST,
-			gettext_noop("Sets the planner's assumption about size of the disk cache."),
-			gettext_noop("That is, the portion of the kernel's disk cache that "
-						 "will be used for PostgreSQL data files. This is measured in disk "
-						 "pages, which are normally 8 kB each.")
+		{"seq_page_cost", PGC_USERSET, QUERY_TUNING_COST,
+			gettext_noop("Sets the planner's estimate of the cost of a "
+						 "sequentially fetched disk page."),
+			NULL
 		},
-		&effective_cache_size,
-		DEFAULT_EFFECTIVE_CACHE_SIZE, 1, DBL_MAX, NULL, NULL
+		&seq_page_cost,
+		DEFAULT_SEQ_PAGE_COST, 0, DBL_MAX, NULL, NULL
 	},
 	{
 		{"random_page_cost", PGC_USERSET, QUERY_TUNING_COST,
-			gettext_noop("Sets the planner's estimate of the cost of a nonsequentially "
-						 "fetched disk page."),
-			gettext_noop("This is measured as a multiple of the cost of a "
-			  "sequential page fetch. A higher value makes it more likely a "
-						 "sequential scan will be used, a lower value makes it more likely an "
-						 "index scan will be used.")
+			gettext_noop("Sets the planner's estimate of the cost of a "
+						 "nonsequentially fetched disk page."),
+			NULL
 		},
 		&random_page_cost,
 		DEFAULT_RANDOM_PAGE_COST, 0, DBL_MAX, NULL, NULL
 	},
 	{
 		{"cpu_tuple_cost", PGC_USERSET, QUERY_TUNING_COST,
-			gettext_noop("Sets the planner's estimate of the cost of processing each tuple (row)."),
-			gettext_noop("This is measured as a fraction of the cost of a "
-						 "sequential page fetch.")
+			gettext_noop("Sets the planner's estimate of the cost of "
+						 "processing each tuple (row)."),
+			NULL
 		},
 		&cpu_tuple_cost,
 		DEFAULT_CPU_TUPLE_COST, 0, DBL_MAX, NULL, NULL
 	},
 	{
 		{"cpu_index_tuple_cost", PGC_USERSET, QUERY_TUNING_COST,
-			gettext_noop("Sets the planner's estimate of processing cost for each "
-						 "index tuple (row) during index scan."),
-			gettext_noop("This is measured as a fraction of the cost of a "
-						 "sequential page fetch.")
+			gettext_noop("Sets the planner's estimate of the cost of "
+						 "processing each index entry during an index scan."),
+			NULL
 		},
 		&cpu_index_tuple_cost,
 		DEFAULT_CPU_INDEX_TUPLE_COST, 0, DBL_MAX, NULL, NULL
 	},
 	{
 		{"cpu_operator_cost", PGC_USERSET, QUERY_TUNING_COST,
-			gettext_noop("Sets the planner's estimate of processing cost of each operator in WHERE."),
-			gettext_noop("This is measured as a fraction of the cost of a sequential "
-						 "page fetch.")
+			gettext_noop("Sets the planner's estimate of the cost of "
+						 "processing each operator or function call."),
+			NULL
 		},
 		&cpu_operator_cost,
 		DEFAULT_CPU_OPERATOR_COST, 0, DBL_MAX, NULL, NULL
 	},
 
 	{
+		{"effective_cache_size", PGC_USERSET, QUERY_TUNING_COST,
+			gettext_noop("Sets the planner's assumption about size of the disk cache."),
+			gettext_noop("That is, the portion of the kernel's disk cache that "
+						 "will be used for PostgreSQL data files. This is measured in disk "
+						 "pages, which are normally 8 kB each.")
+		},
+		&effective_cache_size,
+		DEFAULT_EFFECTIVE_CACHE_SIZE, 1, DBL_MAX, NULL, NULL
+	},
+
+	{
 		{"geqo_selection_bias", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("GEQO: selective pressure within the population."),
 			NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d7f32256d11..0499223d110 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -175,12 +175,12 @@
 
 # - Planner Cost Constants -
 
+#seq_page_cost = 1.0			# measured on an arbitrary scale
+#random_page_cost = 4.0			# same scale as above
+#cpu_tuple_cost = 0.01			# same scale as above
+#cpu_index_tuple_cost = 0.001		# same scale as above
+#cpu_operator_cost = 0.0025		# same scale as above
 #effective_cache_size = 1000		# typically 8KB each
-#random_page_cost = 4			# units are one sequential page fetch 
-					# cost
-#cpu_tuple_cost = 0.01			# (same)
-#cpu_index_tuple_cost = 0.001		# (same)
-#cpu_operator_cost = 0.0025		# (same)
 
 # - Genetic Query Optimizer -
 
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 25301eb41de..5d9a6c821c7 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.73 2006/03/05 15:58:57 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/cost.h,v 1.74 2006/06/05 02:49:58 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -21,12 +21,14 @@
 /* defaults for costsize.c's Cost parameters */
 /* NB: cost-estimation code should use the variables, not these constants! */
 /* If you change these, update backend/utils/misc/postgresql.sample.conf */
-#define DEFAULT_EFFECTIVE_CACHE_SIZE  1000.0	/* measured in pages */
+#define DEFAULT_SEQ_PAGE_COST  1.0
 #define DEFAULT_RANDOM_PAGE_COST  4.0
 #define DEFAULT_CPU_TUPLE_COST	0.01
 #define DEFAULT_CPU_INDEX_TUPLE_COST 0.001
 #define DEFAULT_CPU_OPERATOR_COST  0.0025
 
+#define DEFAULT_EFFECTIVE_CACHE_SIZE  1000.0	/* measured in pages */
+
 
 /*
  * prototypes for costsize.c
@@ -34,11 +36,12 @@
  */
 
 /* parameter variables and flags */
-extern double effective_cache_size;
-extern double random_page_cost;
-extern double cpu_tuple_cost;
+extern DLLIMPORT double seq_page_cost;
+extern DLLIMPORT double random_page_cost;
+extern DLLIMPORT double cpu_tuple_cost;
 extern DLLIMPORT double cpu_index_tuple_cost;
-extern double cpu_operator_cost;
+extern DLLIMPORT double cpu_operator_cost;
+extern double effective_cache_size;
 extern Cost disable_cost;
 extern bool enable_seqscan;
 extern bool enable_indexscan;