diff options
Diffstat (limited to 'src/backend/optimizer/path/costsize.c')
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 43 |
1 files changed, 23 insertions, 20 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index a19dd92c826..29b23948dfe 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -42,7 +42,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.97 2002/12/26 23:38:42 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.98 2002/12/30 15:21:21 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -85,7 +85,8 @@ bool enable_mergejoin = true; bool enable_hashjoin = true; -static Selectivity estimate_hash_bucketsize(Query *root, Var *var); +static Selectivity estimate_hash_bucketsize(Query *root, Var *var, + int nbuckets); static bool cost_qual_eval_walker(Node *node, Cost *total); static Selectivity approx_selectivity(Query *root, List *quals); static void set_rel_width(Query *root, RelOptInfo *rel); @@ -882,7 +883,9 @@ cost_hashjoin(Path *path, Query *root, outer_path->parent->width); double innerbytes = relation_byte_size(inner_path->parent->rows, inner_path->parent->width); - long hashtablebytes = SortMem * 1024L; + int virtualbuckets; + int physicalbuckets; + int numbatches; Selectivity innerbucketsize; List *hcl; @@ -898,6 +901,13 @@ cost_hashjoin(Path *path, Query *root, startup_cost += cpu_operator_cost * inner_path->parent->rows; run_cost += cpu_operator_cost * outer_path->parent->rows; + /* Get hash table size that executor would use for inner relation */ + ExecChooseHashTableSize(inner_path->parent->rows, + inner_path->parent->width, + &virtualbuckets, + &physicalbuckets, + &numbatches); + /* * Determine bucketsize fraction for inner relation. We use the * smallest bucketsize estimated for any individual hashclause; @@ -931,7 +941,8 @@ cost_hashjoin(Path *path, Query *root, if (thisbucketsize < 0) { /* not cached yet */ - thisbucketsize = estimate_hash_bucketsize(root, right); + thisbucketsize = estimate_hash_bucketsize(root, right, + virtualbuckets); restrictinfo->right_bucketsize = thisbucketsize; } } @@ -943,7 +954,8 @@ cost_hashjoin(Path *path, Query *root, if (thisbucketsize < 0) { /* not cached yet */ - thisbucketsize = estimate_hash_bucketsize(root, left); + thisbucketsize = estimate_hash_bucketsize(root, left, + virtualbuckets); restrictinfo->left_bucketsize = thisbucketsize; } } @@ -982,7 +994,7 @@ cost_hashjoin(Path *path, Query *root, * should be nice and sequential...). Writing the inner rel counts as * startup cost, all the rest as run cost. */ - if (innerbytes > hashtablebytes) + if (numbatches) { double outerpages = page_size(outer_path->parent->rows, outer_path->parent->width); @@ -1019,7 +1031,7 @@ cost_hashjoin(Path *path, Query *root, * smart enough to figure out how the restrict clauses might change the * distribution, so this will have to do for now. * - * We can get the number of buckets the executor will use for the given + * We are passed the number of buckets the executor will use for the given * input relation. If the data were perfectly distributed, with the same * number of tuples going into each available bucket, then the bucketsize * fraction would be 1/nbuckets. But this happy state of affairs will occur @@ -1039,13 +1051,10 @@ cost_hashjoin(Path *path, Query *root, * inner rel is well-dispersed (or the alternatives seem much worse). */ static Selectivity -estimate_hash_bucketsize(Query *root, Var *var) +estimate_hash_bucketsize(Query *root, Var *var, int nbuckets) { Oid relid; RelOptInfo *rel; - int virtualbuckets; - int physicalbuckets; - int numbatches; HeapTuple tuple; Form_pg_statistic stats; double estfract, @@ -1071,12 +1080,6 @@ estimate_hash_bucketsize(Query *root, Var *var) if (rel->tuples <= 0.0 || rel->rows <= 0.0) return 0.1; /* ensure we can divide below */ - /* Get hash table size that executor would use for this relation */ - ExecChooseHashTableSize(rel->rows, rel->width, - &virtualbuckets, - &physicalbuckets, - &numbatches); - tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(var->varattno), @@ -1093,7 +1096,7 @@ estimate_hash_bucketsize(Query *root, Var *var) case ObjectIdAttributeNumber: case SelfItemPointerAttributeNumber: /* these are unique, so buckets should be well-distributed */ - return 1.0 / (double) virtualbuckets; + return 1.0 / (double) nbuckets; case TableOidAttributeNumber: /* hashing this is a terrible idea... */ return 1.0; @@ -1134,8 +1137,8 @@ estimate_hash_bucketsize(Query *root, Var *var) * the number of buckets is less than the expected number of distinct * values; otherwise it is 1/ndistinct. */ - if (ndistinct > (double) virtualbuckets) - estfract = 1.0 / (double) virtualbuckets; + if (ndistinct > (double) nbuckets) + estfract = 1.0 / (double) nbuckets; else estfract = 1.0 / ndistinct; |