diff options
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r-- | src/backend/optimizer/path/Makefile.inc | 21 | ||||
-rw-r--r-- | src/backend/optimizer/path/allpaths.c | 351 | ||||
-rw-r--r-- | src/backend/optimizer/path/clausesel.c | 331 | ||||
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 456 | ||||
-rw-r--r-- | src/backend/optimizer/path/hashutils.c | 120 | ||||
-rw-r--r-- | src/backend/optimizer/path/indxpath.c | 1206 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinpath.c | 623 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinrels.c | 528 | ||||
-rw-r--r-- | src/backend/optimizer/path/joinutils.c | 432 | ||||
-rw-r--r-- | src/backend/optimizer/path/mergeutils.c | 122 | ||||
-rw-r--r-- | src/backend/optimizer/path/orindxpath.c | 271 | ||||
-rw-r--r-- | src/backend/optimizer/path/predmig.c | 773 | ||||
-rw-r--r-- | src/backend/optimizer/path/prune.c | 203 | ||||
-rw-r--r-- | src/backend/optimizer/path/xfunc.c | 1360 |
14 files changed, 6797 insertions, 0 deletions
diff --git a/src/backend/optimizer/path/Makefile.inc b/src/backend/optimizer/path/Makefile.inc new file mode 100644 index 00000000000..6bb014b0a90 --- /dev/null +++ b/src/backend/optimizer/path/Makefile.inc @@ -0,0 +1,21 @@ +#------------------------------------------------------------------------- +# +# Makefile.inc-- +# Makefile for optimizer/path +# +# Copyright (c) 1994, Regents of the University of California +# +# +# IDENTIFICATION +# $Header: /cvsroot/pgsql/src/backend/optimizer/path/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:35 scrappy Exp $ +# +#------------------------------------------------------------------------- + +SUBSRCS= allpaths.c clausesel.c costsize.c hashutils.c indxpath.c \ + joinpath.c joinrels.c joinutils.c mergeutils.c orindxpath.c \ + prune.c + +# not ready yet: predmig.c xfunc.c + + + diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c new file mode 100644 index 00000000000..0ed2139095d --- /dev/null +++ b/src/backend/optimizer/path/allpaths.c @@ -0,0 +1,351 @@ +/*------------------------------------------------------------------------- + * + * allpaths.c-- + * Routines to find possible search paths for processing a query + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.1.1.1 1996/07/09 06:21:35 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include <string.h> + +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/primnodes.h" + +#include "optimizer/internal.h" + +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" +#include "optimizer/clauses.h" +#include "optimizer/xfunc.h" +#include "optimizer/cost.h" + +#include "commands/creatinh.h" + +static void find_rel_paths(Query *root, List *rels); +static List *find_join_paths(Query *root, List *outer_rels, int levels_left); + +/* + * find-paths-- + * Finds all possible access paths for executing a query, returning the + * top level list of relation entries. + * + * 'rels' is the list of single relation entries appearing in the query + */ +List * +find_paths(Query *root, List *rels) +{ + int levels_left; + + /* + * Set the number of join (not nesting) levels yet to be processed. + */ + levels_left = length(rels); + + if (levels_left <= 0) + return NIL; + + /* + * Find the base relation paths. + */ + find_rel_paths(root, rels); + + if (levels_left <= 1) { + /* + * Unsorted single relation, no more processing is required. + */ + return (rels); + }else { + /* + * this means that joins or sorts are required. + * set selectivities of clauses that have not been set + * by an index. + */ + set_rest_relselec(root, rels); + + return(find_join_paths(root, rels, levels_left-1)); + } +} + +/* + * find-rel-paths-- + * Finds all paths available for scanning each relation entry in + * 'rels'. Sequential scan and any available indices are considered + * if possible(indices are not considered for lower nesting levels). + * All unique paths are attached to the relation's 'pathlist' field. + * + * MODIFIES: rels + */ +static void +find_rel_paths(Query *root, List *rels) +{ + List *temp; + Rel *rel; + List *lastpath; + + foreach(temp, rels) { + List *sequential_scan_list; + List *rel_index_scan_list; + List *or_index_scan_list; + + rel = (Rel *)lfirst(temp); + sequential_scan_list = lcons(create_seqscan_path(rel), + NIL); + + rel_index_scan_list = + find_index_paths(root, + rel, + find_relation_indices(root,rel), + rel->clauseinfo, + rel->joininfo); + + or_index_scan_list = + create_or_index_paths(root, rel, rel->clauseinfo); + + rel->pathlist = add_pathlist(rel, + sequential_scan_list, + append(rel_index_scan_list, + or_index_scan_list)); + + /* The unordered path is always the last in the list. + * If it is not the cheapest path, prune it. + */ + lastpath = rel->pathlist; + while(lnext(lastpath)!=NIL) + lastpath=lnext(lastpath); + prune_rel_path(rel, (Path*)lfirst(lastpath)); + /* + * if there is a qualification of sequential scan the selec. + * value is not set -- so set it explicitly -- Sunita + */ + set_rest_selec(root, rel->clauseinfo); + rel->size = compute_rel_size(rel); + rel->width = compute_rel_width(rel); + } + return; +} + +/* + * find-join-paths-- + * Find all possible joinpaths for a query by successively finding ways + * to join single relations into join relations. + * + * if BushyPlanFlag is set, bushy tree plans will be generated: + * Find all possible joinpaths(bushy trees) for a query by systematically + * finding ways to join relations(both original and derived) together. + * + * 'outer-rels' is the current list of relations for which join paths + * are to be found, i.e., he current list of relations that + * have already been derived. + * 'levels-left' is the current join level being processed, where '1' is + * the "last" level + * + * Returns the final level of join relations, i.e., the relation that is + * the result of joining all the original relations togehter. + */ +static List * +find_join_paths(Query *root, List *outer_rels, int levels_left) +{ + List *x; + List *new_rels; + Rel *rel; + + /* + * Determine all possible pairs of relations to be joined at this level. + * Determine paths for joining these relation pairs and modify 'new-rels' + * accordingly, then eliminate redundant join relations. + */ + new_rels = find_join_rels(root, outer_rels); + + find_all_join_paths(root, new_rels); + + new_rels = prune_joinrels(new_rels); + +#if 0 + /* + ** for each expensive predicate in each path in each distinct rel, + ** consider doing pullup -- JMH + */ + if (XfuncMode != XFUNC_NOPULL && XfuncMode != XFUNC_OFF) + foreach(x, new_rels) + xfunc_trypullup((Rel*)lfirst(x)); +#endif + + prune_rel_paths(new_rels); + + if(BushyPlanFlag) { + /* + * In case of bushy trees + * if there is still a join between a join relation and another + * relation, add a new joininfo that involves the join relation + * to the joininfo list of the other relation + */ + add_new_joininfos(root, new_rels,outer_rels); + } + + foreach(x, new_rels) { + rel = (Rel*)lfirst(x); + rel->size = compute_rel_size(rel); + rel->width = compute_rel_width(rel); + +/*#define OPTIMIZER_DEBUG*/ +#ifdef OPTIMIZER_DEBUG + printf("levels left: %d\n", levels_left); + debug_print_rel(root, rel); +#endif + } + + if(BushyPlanFlag) { + /* + * prune rels that have been completely incorporated into + * new join rels + */ + outer_rels = prune_oldrels(outer_rels); + /* + * merge join rels if then contain the same list of base rels + */ + outer_rels = merge_joinrels(new_rels,outer_rels); + root->join_relation_list_ = outer_rels; + } + else { + root->join_relation_list_ = new_rels; + } + + if(levels_left == 1) { + if(BushyPlanFlag) + return(final_join_rels(outer_rels)); + else + return(new_rels); + } else { + if(BushyPlanFlag) + return(find_join_paths(root, outer_rels, levels_left - 1)); + else + return(find_join_paths(root, new_rels, levels_left - 1)); + } +} + +/***************************************************************************** + * + *****************************************************************************/ + +static void +print_joinclauses(Query *root, List *clauses) +{ + List *l; + extern void print_expr(Node *expr, List *rtable); /* in print.c */ + + foreach(l, clauses) { + CInfo *c = lfirst(l); + + print_expr((Node*)c->clause, root->rtable); + if (lnext(l)) printf(" "); + } +} + +void +print_path(Query *root, Path *path, int indent) +{ + char *ptype = NULL; + JoinPath *jp; + bool join; + int i; + + for(i=0; i < indent; i++) + printf("\t"); + + switch(nodeTag(path)) { + case T_Path: + ptype = "SeqScan"; join=false; break; + case T_IndexPath: + ptype = "IdxScan"; join=false; break; + case T_JoinPath: + ptype = "Nestloop"; join=true; break; + case T_MergePath: + ptype = "MergeJoin"; join=true; break; + case T_HashPath: + ptype = "HashJoin"; join=true; break; + default: + break; + } + if (join) { + int size = path->parent->size; + jp = (JoinPath*)path; + printf("%s size=%d cost=%f\n", ptype, size, path->path_cost); + switch(nodeTag(path)) { + case T_MergePath: + case T_HashPath: + for(i=0; i < indent+1; i++) + printf("\t"); + printf(" clauses=("); + print_joinclauses(root, + ((JoinPath*)path)->pathclauseinfo); + printf(")\n"); + + if (nodeTag(path)==T_MergePath) { + MergePath *mp = (MergePath*)path; + if (mp->outersortkeys || mp->innersortkeys) { + for(i=0; i < indent+1; i++) + printf("\t"); + printf(" sortouter=%d sortinner=%d\n", + ((mp->outersortkeys)?1:0), + ((mp->innersortkeys)?1:0)); + } + } + break; + default: + break; + } + print_path(root, jp->outerjoinpath, indent+1); + print_path(root, jp->innerjoinpath, indent+1); + } else { + int size = path->parent->size; + int relid = lfirsti(path->parent->relids); + printf("%s(%d) size=%d cost=%f", + ptype, relid, size, path->path_cost); + + if (nodeTag(path)==T_IndexPath) { + List *k, *l; + + printf(" keys="); + foreach (k, path->keys) { + printf("("); + foreach (l, lfirst(k)) { + Var *var = lfirst(l); + printf("%d.%d", var->varnoold, var->varoattno); + if (lnext(l)) printf(", "); + } + printf(")"); + if (lnext(k)) printf(", "); + } + } + printf("\n"); + } +} + +#ifdef OPTIMIZER_DEBUG +static void +debug_print_rel(Query *root, Rel *rel) +{ + List *l; + + printf("("); + foreach(l, rel->relids) { + printf("%d ", lfirsti(l)); + } + printf("): size=%d width=%d\n", rel->size, rel->width); + + printf("\tpath list:\n"); + foreach (l, rel->pathlist) { + print_path(root, lfirst(l), 1); + } + printf("\tcheapest path:\n"); + print_path(root, rel->cheapestpath, 1); +} +#endif /* OPTIMIZER_DEBUG */ diff --git a/src/backend/optimizer/path/clausesel.c b/src/backend/optimizer/path/clausesel.c new file mode 100644 index 00000000000..634e1130794 --- /dev/null +++ b/src/backend/optimizer/path/clausesel.c @@ -0,0 +1,331 @@ +/*------------------------------------------------------------------------- + * + * clausesel.c-- + * Routines to compute and set clause selectivities + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.1.1.1 1996/07/09 06:21:35 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/primnodes.h" + +#include "optimizer/internal.h" +#include "optimizer/clauses.h" +#include "optimizer/clauseinfo.h" +#include "optimizer/cost.h" +#include "optimizer/plancat.h" + +#include "parser/parsetree.h" /* for getrelid() */ + +#include "catalog/pg_proc.h" +#include "catalog/pg_operator.h" + +#include "utils/elog.h" +#include "utils/lsyscache.h" + +static Cost compute_selec(Query *root, List *clauses, List *or_selectivities); + +/**************************************************************************** + * ROUTINES TO SET CLAUSE SELECTIVITIES + ****************************************************************************/ + +/* + * set_clause_selectivities - + * Sets the selectivity field for each of clause in 'clauseinfo-list' + * to 'new-selectivity'. If the selectivity has already been set, reset + * it only if the new one is better. + * + * Returns nothing of interest. + * + */ +void +set_clause_selectivities(List *clauseinfo_list, Cost new_selectivity) +{ + List *temp; + CInfo *clausenode; + Cost cost_clause; + + foreach (temp,clauseinfo_list) { + clausenode = (CInfo*)lfirst(temp); + cost_clause = clausenode->selectivity; + if ( FLOAT_IS_ZERO(cost_clause) || new_selectivity < cost_clause) { + clausenode->selectivity = new_selectivity; + } + } +} + +/* + * product_selec - + * Multiplies the selectivities of each clause in 'clauseinfo-list'. + * + * Returns a flonum corresponding to the selectivity of 'clauseinfo-list'. + */ +Cost +product_selec(List *clauseinfo_list) +{ + Cost result = 1.0; + if (clauseinfo_list!=NIL) { + List *xclausenode = NIL; + Cost temp; + + foreach(xclausenode,clauseinfo_list) { + temp = ((CInfo *)lfirst(xclausenode))->selectivity; + result = result * temp; + } + } + return(result); +} + +/* + * set_rest_relselec - + * Scans through clauses on each relation and assigns a selectivity to + * those clauses that haven't been assigned a selectivity by an index. + * + * Returns nothing of interest. + * MODIFIES: selectivities of the various rel's clauseinfo + * slots. + */ +void +set_rest_relselec(Query *root, List *rel_list) +{ + Rel *rel; + List *x; + + foreach (x,rel_list) { + rel = (Rel*)lfirst(x); + set_rest_selec(root, rel->clauseinfo); + } +} + +/* + * set_rest_selec - + * Sets the selectivity fields for those clauses within a single + * relation's 'clauseinfo-list' that haven't already been set. + * + * Returns nothing of interest. + * + */ +void +set_rest_selec(Query *root, List *clauseinfo_list) +{ + List *temp = NIL; + CInfo *clausenode = (CInfo*)NULL; + Cost cost_clause; + + foreach (temp,clauseinfo_list) { + clausenode = (CInfo*)lfirst(temp); + cost_clause = clausenode->selectivity; + + /* + * Check to see if the selectivity of this clause or any 'or' + * subclauses (if any) haven't been set yet. + */ + if (valid_or_clause(clausenode) || FLOAT_IS_ZERO(cost_clause)) { + clausenode->selectivity = + compute_clause_selec(root, + (Node*)clausenode->clause, + lcons(makeFloat(cost_clause), NIL)); + } + } +} + +/**************************************************************************** + * ROUTINES TO COMPUTE SELECTIVITIES + ****************************************************************************/ + +/* + * compute_clause_selec - + * Given a clause, this routine will compute the selectivity of the + * clause by calling 'compute_selec' with the appropriate parameters + * and possibly use that return value to compute the real selectivity + * of a clause. + * + * 'or-selectivities' are selectivities that have already been assigned + * to subclauses of an 'or' clause. + * + * Returns a flonum corresponding to the clause selectivity. + * + */ +Cost +compute_clause_selec(Query *root, Node *clause, List *or_selectivities) +{ + if (!is_opclause (clause)) { + /* if it's not an operator clause, then it is a boolean clause -jolly*/ + /* + * Boolean variables get a selectivity of 1/2. + */ + return(0.1); + } else if (not_clause (clause)) { + /* + * 'not' gets "1.0 - selectivity-of-inner-clause". + */ + return (1.000000 - compute_selec(root, + lcons(get_notclausearg((Expr*)clause), + NIL), + or_selectivities)); + } else if (or_clause(clause)) { + /* + * Both 'or' and 'and' clauses are evaluated as described in + * (compute_selec). + */ + return (compute_selec(root, + ((Expr*)clause)->args, or_selectivities)); + } else { + return(compute_selec(root, + lcons(clause,NIL),or_selectivities)); + } +} + +/* + * compute_selec - + * Computes the selectivity of a clause. + * + * If there is more than one clause in the argument 'clauses', then the + * desired selectivity is that of an 'or' clause. Selectivities for an + * 'or' clause such as (OR a b) are computed by finding the selectivity + * of a (s1) and b (s2) and computing s1+s2 - s1*s2. + * + * In addition, if the clause is an 'or' clause, individual selectivities + * may have already been assigned by indices to subclauses. These values + * are contained in the list 'or-selectivities'. + * + * Returns the clause selectivity as a flonum. + * + */ +static Cost +compute_selec(Query *root, List *clauses, List *or_selectivities) +{ + Cost s1 = 0; + List *clause = lfirst(clauses); + + if (clauses==NULL) { + s1 = 1.0; + } else if (IsA(clause,Param)) { + /* XXX How're we handling this before?? -ay */ + s1 = 1.0; + } else if (IsA(clause,Const)) { + s1 = ((bool) ((Const*) clause)->constvalue) ? 1.0 : 0.0; + } else if (IsA(clause,Var)) { + Oid relid = getrelid(((Var*)clause)->varno, + root->rtable); + + /* + * we have a bool Var. This is exactly equivalent to the clause: + * reln.attribute = 't' + * so we compute the selectivity as if that is what we have. The + * magic #define constants are a hack. I didn't want to have to + * do system cache look ups to find out all of that info. + */ + + s1 = restriction_selectivity(EqualSelectivityProcedure, + BooleanEqualOperator, + relid, + ((Var*)clause)->varoattno, + "t", + _SELEC_CONSTANT_RIGHT_); + } else if (or_selectivities) { + /* If s1 has already been assigned by an index, use that value. */ + List *this_sel = lfirst(or_selectivities); + + s1 = floatVal(this_sel); + } else if (is_funcclause((Node*)clause)) { + /* this isn't an Oper, it's a Func!! */ + /* + ** This is not an operator, so we guess at the selectivity. + ** THIS IS A HACK TO GET V4 OUT THE DOOR. FUNCS SHOULD BE + ** ABLE TO HAVE SELECTIVITIES THEMSELVES. + ** -- JMH 7/9/92 + */ + s1 = 0.1; + } else if (NumRelids((Node*) clause) == 1) { + /* ...otherwise, calculate s1 from 'clauses'. + * The clause is not a join clause, since there is + * only one relid in the clause. The clause + * selectivity will be based on the operator + * selectivity and operand values. + */ + Oid opno = ((Oper*)((Expr*)clause)->oper)->opno; + RegProcedure oprrest = get_oprrest(opno); + Oid relid; + int relidx; + AttrNumber attno; + Datum constval; + int flag; + + get_relattval((Node*)clause, &relidx, &attno, &constval, &flag); + relid = getrelid(relidx, root->rtable); + + /* if the oprrest procedure is missing for whatever reason, + use a selectivity of 0.5*/ + if (!oprrest) + s1 = (Cost) (0.5); + else + if (attno == InvalidAttrNumber) { + /* attno can be Invalid if the clause had a function in it, + i.e. WHERE myFunc(f) = 10 */ + /* this should be FIXED somehow to use function selectivity */ + s1 = (Cost) (0.5); + } else + s1 = (Cost) restriction_selectivity(oprrest, + opno, + relid, + attno, + (char *)constval, + flag); + + } else { + /* The clause must be a join clause. The clause + * selectivity will be based on the relations to be + * scanned and the attributes they are to be joined + * on. + */ + Oid opno = ((Oper*)((Expr*)clause)->oper)->opno; + RegProcedure oprjoin = get_oprjoin (opno); + int relid1, relid2; + AttrNumber attno1, attno2; + + get_rels_atts((Node*)clause, &relid1, &attno1, &relid2, &attno2); + relid1 = getrelid(relid1, root->rtable); + relid2 = getrelid(relid2, root->rtable); + + /* if the oprjoin procedure is missing for whatever reason, + use a selectivity of 0.5*/ + if (!oprjoin) + s1 = (Cost) (0.5); + else + s1 = (Cost) join_selectivity(oprjoin, + opno, + relid1, + attno1, + relid2, + attno2); + } + + /* A null clause list eliminates no tuples, so return a selectivity + * of 1.0. If there is only one clause, the selectivity is not + * that of an 'or' clause, but rather that of the single clause. + */ + + if (length (clauses) < 2) { + return(s1); + } else { + /* Compute selectivity of the 'or'ed subclauses. */ + /* Added check for taking lnext(NIL). -- JMH 3/9/92 */ + Cost s2; + + if (or_selectivities != NIL) + s2 = compute_selec(root, lnext(clauses), lnext(or_selectivities)); + else + s2 = compute_selec(root, lnext(clauses), NIL); + return(s1 + s2 - s1 * s2); + } +} + diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c new file mode 100644 index 00000000000..14bbb5f8e4f --- /dev/null +++ b/src/backend/optimizer/path/costsize.c @@ -0,0 +1,456 @@ +/*------------------------------------------------------------------------- + * + * costsize.c-- + * Routines to compute (and set) relation sizes and path costs + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.1.1.1 1996/07/09 06:21:35 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include <math.h> +#ifdef WIN32 +#include <float.h> +#include <limits.h> +#define MAXINT INT_MAX +#else +# if defined(PORTNAME_BSD44_derived) || defined(PORTNAME_bsdi) +# include <machine/limits.h> +# define MAXINT INT_MAX +# else +# include <values.h> +# endif /* !PORTNAME_BSD44_derived */ +#endif /* WIN32 */ + +#include "postgres.h" + +#include "nodes/relation.h" + +#include "optimizer/cost.h" +#include "optimizer/internal.h" +#include "optimizer/keys.h" +#include "optimizer/tlist.h" + +#include "storage/bufmgr.h" /* for BLCKSZ */ + +static int compute_attribute_width(TargetEntry *tlistentry); +static double base_log(double x, double b); + +int _disable_cost_ = 30000000; + +bool _enable_seqscan_ = true; +bool _enable_indexscan_ = true; +bool _enable_sort_ = true; +bool _enable_hash_ = true; +bool _enable_nestloop_ = true; +bool _enable_mergesort_ = true; +bool _enable_hashjoin_ = true; + +/* + * cost_seqscan-- + * Determines and returns the cost of scanning a relation sequentially. + * If the relation is a temporary to be materialized from a query + * embedded within a data field (determined by 'relid' containing an + * attribute reference), then a predetermined constant is returned (we + * have NO IDEA how big the result of a POSTQUEL procedure is going to + * be). + * + * disk = p + * cpu = *CPU-PAGE-WEIGHT* * t + * + * 'relid' is the relid of the relation to be scanned + * 'relpages' is the number of pages in the relation to be scanned + * (as determined from the system catalogs) + * 'reltuples' is the number of tuples in the relation to be scanned + * + * Returns a flonum. + * + */ +Cost +cost_seqscan(int relid, int relpages, int reltuples) +{ + Cost temp = 0; + + if ( !_enable_seqscan_ ) + temp += _disable_cost_; + + if (relid < 0) { + /* + * cost of sequentially scanning a materialized temporary relation + */ + temp += _TEMP_SCAN_COST_; + } else { + temp += relpages; + temp += _CPU_PAGE_WEIGHT_ * reltuples; + } + Assert(temp >= 0); + return(temp); +} + + +/* + * cost_index-- + * Determines and returns the cost of scanning a relation using an index. + * + * disk = expected-index-pages + expected-data-pages + * cpu = *CPU-PAGE-WEIGHT* * + * (expected-index-tuples + expected-data-tuples) + * + * 'indexid' is the index OID + * 'expected-indexpages' is the number of index pages examined in the scan + * 'selec' is the selectivity of the index + * 'relpages' is the number of pages in the main relation + * 'reltuples' is the number of tuples in the main relation + * 'indexpages' is the number of pages in the index relation + * 'indextuples' is the number of tuples in the index relation + * + * Returns a flonum. + * + */ +Cost +cost_index(Oid indexid, + int expected_indexpages, + Cost selec, + int relpages, + int reltuples, + int indexpages, + int indextuples, + bool is_injoin) +{ + Cost temp; + Cost temp2; + + temp = temp2 = (Cost) 0; + + if (!_enable_indexscan_ && !is_injoin) + temp += _disable_cost_; + + /* expected index relation pages */ + temp += expected_indexpages; + + /* about one base relation page */ + temp += Min(relpages,(int)ceil((double)selec*indextuples)); + + /* + * per index tuple + */ + temp2 += selec * indextuples; + temp2 += selec * reltuples; + + temp = temp + (_CPU_PAGE_WEIGHT_ * temp2); + Assert(temp >= 0); + return(temp); +} + +/* + * cost_sort-- + * Determines and returns the cost of sorting a relation by considering + * 1. the cost of doing an external sort: XXX this is probably too low + * disk = (p lg p) + * cpu = *CPU-PAGE-WEIGHT* * (t lg t) + * 2. the cost of reading the sort result into memory (another seqscan) + * unless 'noread' is set + * + * 'keys' is a list of sort keys + * 'tuples' is the number of tuples in the relation + * 'width' is the average tuple width in bytes + * 'noread' is a flag indicating that the sort result can remain on disk + * (i.e., the sort result is the result relation) + * + * Returns a flonum. + * + */ +Cost +cost_sort(List *keys, int tuples, int width, bool noread) +{ + Cost temp = 0; + int npages = page_size (tuples,width); + Cost pages = (Cost)npages; + Cost numTuples = tuples; + + if ( !_enable_sort_ ) + temp += _disable_cost_ ; + if (tuples == 0 || keys==NULL) + { + Assert(temp >= 0); + return(temp); + } + temp += pages * base_log((double)pages, (double)2.0); + + /* + * could be base_log(pages, NBuffers), but we are only doing 2-way merges + */ + temp += _CPU_PAGE_WEIGHT_ * + numTuples * base_log((double)pages,(double)2.0); + + if( !noread ) + temp = temp + cost_seqscan(_TEMP_RELATION_ID_, npages, tuples); + Assert(temp >= 0); + + return(temp); +} + + +/* + * cost_result-- + * Determines and returns the cost of writing a relation of 'tuples' + * tuples of 'width' bytes out to a result relation. + * + * Returns a flonum. + * + */ +Cost +cost_result(int tuples, int width) +{ + Cost temp =0; + temp = temp + page_size(tuples,width); + temp = temp + _CPU_PAGE_WEIGHT_ * tuples; + Assert(temp >= 0); + return(temp); +} + +/* + * cost_nestloop-- + * Determines and returns the cost of joining two relations using the + * nested loop algorithm. + * + * 'outercost' is the (disk+cpu) cost of scanning the outer relation + * 'innercost' is the (disk+cpu) cost of scanning the inner relation + * 'outertuples' is the number of tuples in the outer relation + * + * Returns a flonum. + * + */ +Cost +cost_nestloop(Cost outercost, + Cost innercost, + int outertuples, + int innertuples, + int outerpages, + bool is_indexjoin) +{ + Cost temp =0; + + if ( !_enable_nestloop_ ) + temp += _disable_cost_; + temp += outercost; + temp += outertuples * innercost; + Assert(temp >= 0); + + return(temp); +} + +/* + * cost_mergesort-- + * 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the + * outer and inner relations + * 'outersortkeys' and 'innersortkeys' are lists of the keys to be used + * to sort the outer and inner relations + * 'outertuples' and 'innertuples' are the number of tuples in the outer + * and inner relations + * 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes) + * of the tuples of the outer and inner relations + * + * Returns a flonum. + * + */ +Cost +cost_mergesort(Cost outercost, + Cost innercost, + List *outersortkeys, + List *innersortkeys, + int outersize, + int innersize, + int outerwidth, + int innerwidth) +{ + Cost temp = 0; + + if ( !_enable_mergesort_ ) + temp += _disable_cost_; + + temp += outercost; + temp += innercost; + temp += cost_sort(outersortkeys,outersize,outerwidth,false); + temp += cost_sort(innersortkeys,innersize,innerwidth,false); + temp += _CPU_PAGE_WEIGHT_ * (outersize + innersize); + Assert(temp >= 0); + + return(temp); +} + +/* + * cost_hashjoin-- XXX HASH + * 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the + * outer and inner relations + * 'outerkeys' and 'innerkeys' are lists of the keys to be used + * to hash the outer and inner relations + * 'outersize' and 'innersize' are the number of tuples in the outer + * and inner relations + * 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes) + * of the tuples of the outer and inner relations + * + * Returns a flonum. + */ +Cost +cost_hashjoin(Cost outercost, + Cost innercost, + List *outerkeys, + List *innerkeys, + int outersize, + int innersize, + int outerwidth, + int innerwidth) +{ + Cost temp = 0; + int outerpages = page_size (outersize,outerwidth); + int innerpages = page_size (innersize,innerwidth); + int nrun = ceil((double)outerpages/(double)NBuffers); + + if (outerpages < innerpages) + return _disable_cost_; + if ( !_enable_hashjoin_ ) + temp += _disable_cost_; +/* temp += outercost + (nrun + 1) * innercost; */ + /* + the innercost shouldn't be used it. Instead the + cost of hashing the innerpath should be used + + ASSUME innercost is 1 for now -- a horrible hack + - jolly + */ + temp += outercost + (nrun + 1); + + temp += _CPU_PAGE_WEIGHT_ * (outersize + nrun * innersize); + Assert(temp >= 0); + + return(temp); +} + +/* + * compute-rel-size-- + * Computes the size of each relation in 'rel-list' (after applying + * restrictions), by multiplying the selectivity of each restriction + * by the original size of the relation. + * + * Sets the 'size' field for each relation entry with this computed size. + * + * Returns the size. + */ +int compute_rel_size(Rel *rel) +{ + Cost temp; + int temp1; + + temp = rel->tuples * product_selec(rel->clauseinfo); + Assert(temp >= 0); + if (temp >= (MAXINT - 1)) { + temp1 = MAXINT; + } else { + temp1 = ceil((double) temp); + } + Assert(temp1 >= 0); + Assert(temp1 <= MAXINT); + return(temp1); +} + +/* + * compute-rel-width-- + * Computes the width in bytes of a tuple from 'rel'. + * + * Returns the width of the tuple as a fixnum. + */ +int +compute_rel_width(Rel *rel) +{ + return (compute_targetlist_width(get_actual_tlist(rel->targetlist))); +} + +/* + * compute-targetlist-width-- + * Computes the width in bytes of a tuple made from 'targetlist'. + * + * Returns the width of the tuple as a fixnum. + */ +int +compute_targetlist_width(List *targetlist) +{ + List *temp_tl; + int tuple_width = 0; + + foreach (temp_tl, targetlist) { + tuple_width = tuple_width + + compute_attribute_width(lfirst(temp_tl)); + } + return(tuple_width); +} + +/* + * compute-attribute-width-- + * Given a target list entry, find the size in bytes of the attribute. + * + * If a field is variable-length, it is assumed to be at least the size + * of a TID field. + * + * Returns the width of the attribute as a fixnum. + */ +static int +compute_attribute_width(TargetEntry *tlistentry) +{ + int width = get_typlen(tlistentry->resdom->restype); + if (width < 0) + return(_DEFAULT_ATTRIBUTE_WIDTH_); + else + return(width); +} + +/* + * compute-joinrel-size-- + * Computes the size of the join relation 'joinrel'. + * + * Returns a fixnum. + */ +int +compute_joinrel_size(JoinPath *joinpath) +{ + Cost temp = 1.0; + int temp1 = 0; + + temp *= ((Path*)joinpath->outerjoinpath)->parent->size; + temp *= ((Path*)joinpath->innerjoinpath)->parent->size; + + temp = temp * product_selec(joinpath->pathclauseinfo); + if (temp >= (MAXINT -1)) { + temp1 = MAXINT; + } else { + /* should be ceil here, we don't want joinrel size's of one, do we? */ + temp1 = ceil((double)temp); + } + Assert(temp1 >= 0); + + return(temp1); +} + +/* + * page-size-- + * Returns an estimate of the number of pages covered by a given + * number of tuples of a given width (size in bytes). + */ +int page_size(int tuples, int width) +{ + int temp =0; + + temp = ceil((double)(tuples * (width + sizeof(HeapTupleData))) + / BLCKSZ); + Assert(temp >= 0); + return(temp); +} + +static double +base_log(double x, double b) +{ + return(log(x)/log(b)); +} diff --git a/src/backend/optimizer/path/hashutils.c b/src/backend/optimizer/path/hashutils.c new file mode 100644 index 00000000000..cdbd9b6d901 --- /dev/null +++ b/src/backend/optimizer/path/hashutils.c @@ -0,0 +1,120 @@ +/*------------------------------------------------------------------------- + * + * hashutils.c-- + * Utilities for finding applicable merge clauses and pathkeys + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/Attic/hashutils.c,v 1.1.1.1 1996/07/09 06:21:35 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" +#include "nodes/pg_list.h" +#include "nodes/relation.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/clauses.h" + + +static HInfo *match_hashop_hashinfo(Oid hashop, List *hashinfo_list); + +/* + * group-clauses-by-hashop-- + * If a join clause node in 'clauseinfo-list' is hashjoinable, store + * it within a hashinfo node containing other clause nodes with the same + * hash operator. + * + * 'clauseinfo-list' is the list of clauseinfo nodes + * 'inner-relid' is the relid of the inner join relation + * + * Returns the new list of hashinfo nodes. + * + */ +List * +group_clauses_by_hashop(List *clauseinfo_list, + int inner_relid) +{ + List *hashinfo_list = NIL; + CInfo *clauseinfo = (CInfo*)NULL; + List *i = NIL; + Oid hashjoinop = 0; + + foreach (i,clauseinfo_list) { + clauseinfo = (CInfo*)lfirst(i); + hashjoinop = clauseinfo->hashjoinoperator; + + /* + * Create a new hashinfo node and add it to 'hashinfo-list' if one + * does not yet exist for this hash operator. + */ + if (hashjoinop ) { + HInfo *xhashinfo = (HInfo*)NULL; + Expr *clause = clauseinfo->clause; + Var *leftop = get_leftop(clause); + Var *rightop = get_rightop(clause); + JoinKey *keys = (JoinKey*)NULL; + + xhashinfo = + match_hashop_hashinfo(hashjoinop,hashinfo_list); + + if (inner_relid == leftop->varno){ + keys = makeNode(JoinKey); + keys->outer = rightop; + keys->inner = leftop; + } else { + keys = makeNode(JoinKey); + keys->outer = leftop; + keys->inner = rightop; + } + + if (xhashinfo==NULL) { + xhashinfo = makeNode(HInfo); + xhashinfo->hashop = hashjoinop; + + xhashinfo->jmethod.jmkeys = NIL; + xhashinfo->jmethod.clauses = NIL; + + /* XXX was push */ + hashinfo_list = lappend(hashinfo_list,xhashinfo); + hashinfo_list = nreverse(hashinfo_list); + } + + xhashinfo->jmethod.clauses = + lcons(clause, xhashinfo->jmethod.clauses); + + xhashinfo->jmethod.jmkeys = + lcons(keys, xhashinfo->jmethod.jmkeys); + } + } + return(hashinfo_list); +} + + +/* + * match-hashop-hashinfo-- + * Searches the list 'hashinfo-list' for a hashinfo node whose hash op + * field equals 'hashop'. + * + * Returns the node if it exists. + * + */ +static HInfo * +match_hashop_hashinfo(Oid hashop, List *hashinfo_list) +{ + Oid key = 0; + HInfo *xhashinfo = (HInfo*)NULL; + List *i = NIL; + + foreach( i, hashinfo_list) { + xhashinfo = (HInfo*)lfirst(i); + key = xhashinfo->hashop; + if (hashop == key) { /* found */ + return(xhashinfo); /* should be a hashinfo node ! */ + } + } + return((HInfo*)NIL); +} diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c new file mode 100644 index 00000000000..844571847f9 --- /dev/null +++ b/src/backend/optimizer/path/indxpath.c @@ -0,0 +1,1206 @@ +/*------------------------------------------------------------------------- + * + * indxpath.c-- + * Routines to determine which indices are usable for scanning a + * given relation + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.1.1.1 1996/07/09 06:21:35 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include <math.h> +#include "postgres.h" +#include "access/attnum.h" +#include "access/heapam.h" +#include "access/nbtree.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" + +#include "utils/lsyscache.h" +#include "utils/elog.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/clauses.h" +#include "optimizer/clauseinfo.h" +#include "optimizer/plancat.h" +#include "optimizer/keys.h" +#include "optimizer/cost.h" +#include "optimizer/pathnode.h" +#include "optimizer/xfunc.h" +#include "optimizer/ordering.h" + + +#include "catalog/catname.h" +#include "catalog/pg_amop.h" +#include "catalog/pg_proc.h" + +#include "executor/executor.h" +#include "parser/parsetree.h" /* for getrelid() */ + + +static void match_index_orclauses(Rel *rel, Rel *index, int indexkey, + int xclass, List *clauseinfo_list); +static bool match_index_to_operand(int indexkey, Expr *operand, + Rel *rel, Rel *index); +static List *match_index_orclause(Rel *rel, Rel *index, int indexkey, + int xclass, List *or_clauses, List *other_matching_indices); +static List *group_clauses_by_indexkey(Rel *rel, Rel *index, + int *indexkeys, Oid *classes, List *clauseinfo_list, + bool join); +static CInfo *match_clause_to_indexkey(Rel *rel, Rel *index, int indexkey, + int xclass, CInfo *clauseInfo, bool join); +static bool pred_test(List *predicate_list, List *clauseinfo_list, + List *joininfo_list); +static bool one_pred_test(Expr *predicate, List *clauseinfo_list); +static bool one_pred_clause_expr_test(Expr *predicate, Node *clause); +static bool one_pred_clause_test(Expr *predicate, Node *clause); +static bool clause_pred_clause_test(Expr *predicate, Node *clause); +static List *indexable_joinclauses(Rel *rel, Rel *index, List *joininfo_list); +static List *index_innerjoin(Query *root, Rel *rel, + List *clausegroup_list, Rel *index); +static List *create_index_paths(Query *root, Rel *rel, Rel *index, + List *clausegroup_list, bool join); +static List *add_index_paths(List *indexpaths, List *new_indexpaths); +static bool function_index_operand(Expr *funcOpnd, Rel *rel, Rel *index); +static bool SingleAttributeIndex(Rel *index); + +/* If Spyros can use a constant PRS2_BOOL_TYPEID, I can use this */ +#define BOOL_TYPEID ((Oid) 16) + +/* + * find-index-paths-- + * Finds all possible index paths by determining which indices in the + * list 'indices' are usable. + * + * To be usable, an index must match against either a set of + * restriction clauses or join clauses. + * + * Note that the current implementation requires that there exist + * matching clauses for every key in the index (i.e., no partial + * matches are allowed). + * + * If an index can't be used with restriction clauses, but its keys + * match those of the result sort order (according to information stored + * within 'sortkeys'), then the index is also considered. + * + * 'rel' is the relation entry to which these index paths correspond + * 'indices' is a list of possible index paths + * 'clauseinfo-list' is a list of restriction clauseinfo nodes for 'rel' + * 'joininfo-list' is a list of joininfo nodes for 'rel' + * 'sortkeys' is a node describing the result sort order (from + * (find_sortkeys)) + * + * Returns a list of index nodes. + * + */ +List * +find_index_paths (Query *root, + Rel *rel, + List *indices, + List *clauseinfo_list, + List *joininfo_list) +{ + List *scanclausegroups = NIL; + List *scanpaths = NIL; + Rel *index = (Rel *)NULL; + List *joinclausegroups = NIL; + List *joinpaths = NIL; + List *retval = NIL; + extern List *add_index_paths(); + + if(indices == NIL) + return(NULL); + + index = (Rel*)lfirst (indices); + + retval = find_index_paths(root, + rel, + lnext (indices), + clauseinfo_list, + joininfo_list); + + /* If this is a partial index, return if it fails the predicate test */ + if (index->indpred != NIL) + if (!pred_test(index->indpred, clauseinfo_list, joininfo_list)) + return retval; + + /* 1. If this index has only one key, try matching it against + * subclauses of an 'or' clause. The fields of the clauseinfo + * nodes are marked with lists of the matching indices no path + * are actually created. + * + * XXX NOTE: Currently btrees dos not support indices with + * > 1 key, so the following test will always be true for + * now but we have decided not to support index-scans + * on disjunction . -- lp + */ + if (SingleAttributeIndex(index)) + { + match_index_orclauses (rel, + index, + index->indexkeys[0], + index->classlist[0], + clauseinfo_list); + } + + /* + * 2. If the keys of this index match any of the available + * restriction clauses, then create pathnodes corresponding + * to each group of usable clauses. + */ + scanclausegroups = group_clauses_by_indexkey(rel, + index, + index->indexkeys, + index->classlist, + clauseinfo_list, + false); + + scanpaths = NIL; + if (scanclausegroups != NIL) + scanpaths = create_index_paths (root, + rel, + index, + scanclausegroups, + false); + + /* + * 3. If this index can be used with any join clause, then + * create pathnodes for each group of usable clauses. An + * index can be used with a join clause if its ordering is + * useful for a mergejoin, or if the index can possibly be + * used for scanning the inner relation of a nestloop join. + */ + joinclausegroups = indexable_joinclauses(rel,index,joininfo_list); + joinpaths = NIL; + + if (joinclausegroups != NIL) + { + List *new_join_paths = create_index_paths(root, rel, + index, + joinclausegroups, + true); + List *innerjoin_paths = index_innerjoin(root, rel,joinclausegroups,index); + + rel->innerjoin = nconc (rel->innerjoin, innerjoin_paths); + joinpaths = new_join_paths; + } + + /* + * Some sanity checks to make sure that + * the indexpath is valid. + */ + if (joinpaths!=NULL) + retval = add_index_paths(joinpaths,retval); + if (scanpaths!=NULL) + retval = add_index_paths(scanpaths,retval); + + return retval; + +} + + +/**************************************************************************** + * ---- ROUTINES TO MATCH 'OR' CLAUSES ---- + ****************************************************************************/ + + +/* + * match-index-orclauses-- + * Attempt to match an index against subclauses within 'or' clauses. + * If the index does match, then the clause is marked with information + * about the index. + * + * Essentially, this adds 'index' to the list of indices in the + * ClauseInfo field of each of the clauses which it matches. + * + * 'rel' is the node of the relation on which the index is defined. + * 'index' is the index node. + * 'indexkey' is the (single) key of the index + * 'class' is the class of the operator corresponding to 'indexkey'. + * 'clauseinfo-list' is the list of available restriction clauses. + * + * Returns nothing. + * + */ +static void +match_index_orclauses(Rel *rel, + Rel *index, + int indexkey, + int xclass, + List *clauseinfo_list) +{ + CInfo *clauseinfo = (CInfo*)NULL; + List *i = NIL; + + foreach (i, clauseinfo_list) { + clauseinfo = (CInfo*)lfirst(i); + if (valid_or_clause(clauseinfo)) { + + /* Mark the 'or' clause with a list of indices which + * match each of its subclauses. The list is + * generated by adding 'index' to the existing + * list where appropriate. + */ + clauseinfo->indexids = + match_index_orclause (rel,index,indexkey, + xclass, + clauseinfo->clause->args, + clauseinfo->indexids); + } + } +} + +/* + * match_index_operand-- + * Generalize test for a match between an existing index's key + * and the operand on the rhs of a restriction clause. Now check + * for functional indices as well. + */ +static bool +match_index_to_operand(int indexkey, + Expr *operand, + Rel *rel, + Rel *index) +{ + /* + * Normal index. + */ + if (index->indproc == InvalidOid) + return match_indexkey_operand(indexkey, (Var*)operand, rel); + + /* + * functional index check + */ + return (function_index_operand(operand, rel, index)); +} + +/* + * match-index-orclause-- + * Attempts to match an index against the subclauses of an 'or' clause. + * + * A match means that: + * (1) the operator within the subclause can be used with one + * of the index's operator classes, and + * (2) there is a usable key that matches the variable within a + * sargable clause. + * + * 'or-clauses' are the remaining subclauses within the 'or' clause + * 'other-matching-indices' is the list of information on other indices + * that have already been matched to subclauses within this + * particular 'or' clause (i.e., a list previously generated by + * this routine) + * + * Returns a list of the form ((a b c) (d e f) nil (g h) ...) where + * a,b,c are nodes of indices that match the first subclause in + * 'or-clauses', d,e,f match the second subclause, no indices + * match the third, g,h match the fourth, etc. + */ +static List * +match_index_orclause(Rel *rel, + Rel *index, + int indexkey, + int xclass, + List *or_clauses, + List *other_matching_indices) +{ + Node *clause = NULL; + List *matched_indices = other_matching_indices; + List *index_list = NIL; + List *clist; + List *ind; + + if (!matched_indices) + matched_indices = lcons(NIL, NIL); + + for (clist = or_clauses, ind = matched_indices; + clist; + clist = lnext(clist), ind = lnext(ind)) + { + clause = lfirst(clist); + if (is_opclause (clause) && + op_class(((Oper*)((Expr*)clause)->oper)->opno, + xclass, index->relam) && + match_index_to_operand(indexkey, + (Expr*)get_leftop((Expr*)clause), + rel, + index) && + IsA(get_rightop((Expr*)clause),Const)) { + + matched_indices = lcons(index, matched_indices); + index_list = lappend(index_list, + matched_indices); + } + } + return(index_list); + +} + +/**************************************************************************** + * ---- ROUTINES TO CHECK RESTRICTIONS ---- + ****************************************************************************/ + + +/* + * DoneMatchingIndexKeys() - MACRO + * + * Determine whether we should continue matching index keys in a clause. + * Depends on if there are more to match or if this is a functional index. + * In the latter case we stop after the first match since the there can + * be only key (i.e. the function's return value) and the attributes in + * keys list represent the arguments to the function. -mer 3 Oct. 1991 + */ +#define DoneMatchingIndexKeys(indexkeys, index) \ + (indexkeys[0] == 0 || \ + (index->indproc != InvalidOid)) + +/* + * group-clauses-by-indexkey-- + * Determines whether there are clauses which will match each and every + * one of the remaining keys of an index. + * + * 'rel' is the node of the relation corresponding to the index. + * 'indexkeys' are the remaining index keys to be matched. + * 'classes' are the classes of the index operators on those keys. + * 'clauses' is either: + * (1) the list of available restriction clauses on a single + * relation, or + * (2) a list of join clauses between 'rel' and a fixed set of + * relations, + * depending on the value of 'join'. + * 'startlist' is a list of those clause nodes that have matched the keys + * that have already been checked. + * 'join' is a flag indicating that the clauses being checked are join + * clauses. + * + * Returns all possible groups of clauses that will match (given that + * one or more clauses can match any of the remaining keys). + * E.g., if you have clauses A, B, and C, ((A B) (A C)) might be + * returned for an index with 2 keys. + * + */ +static List * +group_clauses_by_indexkey(Rel *rel, + Rel *index, + int *indexkeys, + Oid *classes, + List *clauseinfo_list, + bool join) +{ + List *curCinfo = NIL; + CInfo *matched_clause = (CInfo*)NULL; + List *clausegroup = NIL; + + + if (clauseinfo_list == NIL) + return NIL; + + foreach (curCinfo,clauseinfo_list) { + CInfo *temp = (CInfo*)lfirst(curCinfo); + int *curIndxKey = indexkeys; + Oid *curClass = classes; + + do { + /* + * If we can't find any matching clauses for the first of + * the remaining keys, give up. + */ + matched_clause = match_clause_to_indexkey (rel, + index, + curIndxKey[0], + curClass[0], + temp, + join); + if (!matched_clause) + break; + + clausegroup = lcons(matched_clause, clausegroup); + curIndxKey++; + curClass++; + + } while ( !DoneMatchingIndexKeys(curIndxKey, index) ); + } + + if (clausegroup != NIL) + return(lcons(clausegroup, NIL)); + return NIL; +} + +/* + * IndexScanableClause () MACRO + * + * Generalize condition on which we match a clause with an index. + * Now we can match with functional indices. + */ +#define IndexScanableOperand(opnd, indkeys, rel, index) \ + ((index->indproc == InvalidOid) ? \ + equal_indexkey_var(indkeys,opnd) : \ + function_index_operand((Expr*)opnd,rel,index)) + +/* + * match_clause_to-indexkey-- + * Finds the first of a relation's available restriction clauses that + * matches a key of an index. + * + * To match, the clause must: + * (1) be in the form (op var const) if the clause is a single- + * relation clause, and + * (2) contain an operator which is in the same class as the index + * operator for this key. + * + * If the clause being matched is a join clause, then 'join' is t. + * + * Returns a single clauseinfo node corresponding to the matching + * clause. + * + * NOTE: returns nil if clause is an or_clause. + * + */ +static CInfo * +match_clause_to_indexkey(Rel *rel, + Rel *index, + int indexkey, + int xclass, + CInfo *clauseInfo, + bool join) +{ + Expr *clause = clauseInfo->clause; + Var *leftop, *rightop; + Oid join_op = InvalidOid; + bool isIndexable = false; + + if (or_clause((Node*)clause) || + not_clause((Node*)clause) || single_node((Node*)clause)) + return ((CInfo*)NULL); + + leftop = get_leftop(clause); + rightop = get_rightop(clause); + /* + * If this is not a join clause, check for clauses of the form: + * (operator var/func constant) and (operator constant var/func) + */ + if (!join) + { + Oid restrict_op = InvalidOid; + + /* + * Check for standard s-argable clause + */ + if (IsA(rightop,Const)) + { + restrict_op = ((Oper*)((Expr*)clause)->oper)->opno; + isIndexable = + ( op_class(restrict_op, xclass, index->relam) && + IndexScanableOperand(leftop, + indexkey, + rel, + index) ); + } + + /* + * Must try to commute the clause to standard s-arg format. + */ + else if (IsA(leftop,Const)) + { + restrict_op = + get_commutator(((Oper*)((Expr*)clause)->oper)->opno); + + if ( (restrict_op != InvalidOid) && + op_class(restrict_op, xclass, index->relam) && + IndexScanableOperand(rightop, + indexkey,rel,index) ) + { + isIndexable = true; + /* + * In place list modification. + * (op const var/func) -> (op var/func const) + */ + /* BUG! Old version: + CommuteClause(clause, restrict_op); + */ + CommuteClause((Node*)clause); + } + } + } + /* + * Check for an indexable scan on one of the join relations. + * clause is of the form (operator var/func var/func) + */ + else + { + if (match_index_to_operand(indexkey,(Expr*)rightop,rel,index)) { + + join_op = get_commutator(((Oper*)((Expr*)clause)->oper)->opno); + + } else if (match_index_to_operand(indexkey, + (Expr*)leftop,rel,index)) { + join_op = ((Oper*)((Expr*)clause)->oper)->opno; + } + + if ( join_op && op_class(join_op,xclass,index->relam) && + join_clause_p((Node*)clause)) + { + isIndexable = true; + + /* + * If we're using the operand's commutator we must + * commute the clause. + */ + if (join_op != ((Oper*)((Expr*)clause)->oper)->opno) + CommuteClause((Node*)clause); + } + } + + if (isIndexable) + return(clauseInfo); + + return(NULL); +} + +/**************************************************************************** + * ---- ROUTINES TO DO PARTIAL INDEX PREDICATE TESTS ---- + ****************************************************************************/ + +/* + * pred_test-- + * Does the "predicate inclusion test" for partial indexes. + * + * Recursively checks whether the clauses in clauseinfo_list imply + * that the given predicate is true. + * + * This routine (together with the routines it calls) iterates over + * ANDs in the predicate first, then reduces the qualification + * clauses down to their constituent terms, and iterates over ORs + * in the predicate last. This order is important to make the test + * succeed whenever possible (assuming the predicate has been + * successfully cnfify()-ed). --Nels, Jan '93 + */ +static bool +pred_test(List *predicate_list, List *clauseinfo_list, List *joininfo_list) +{ + List *pred, *items, *item; + + /* + * Note: if Postgres tried to optimize queries by forming equivalence + * classes over equi-joined attributes (i.e., if it recognized that a + * qualification such as "where a.b=c.d and a.b=5" could make use of + * an index on c.d), then we could use that equivalence class info + * here with joininfo_list to do more complete tests for the usability + * of a partial index. For now, the test only uses restriction + * clauses (those in clauseinfo_list). --Nels, Dec '92 + */ + + if (predicate_list == NULL) + return true; /* no predicate: the index is usable */ + if (clauseinfo_list == NULL) + return false; /* no restriction clauses: the test must fail */ + + foreach (pred, predicate_list) { + /* if any clause is not implied, the whole predicate is not implied */ + if (and_clause(lfirst(pred))) { + items = ((Expr*)lfirst(pred))->args; + foreach (item, items) { + if (!one_pred_test(lfirst(item), clauseinfo_list)) + return false; + } + } + else if (!one_pred_test(lfirst(pred), clauseinfo_list)) + return false; + } + return true; +} + + +/* + * one_pred_test-- + * Does the "predicate inclusion test" for one conjunct of a predicate + * expression. + */ +static bool +one_pred_test(Expr *predicate, List *clauseinfo_list) +{ + CInfo *clauseinfo; + List *item; + + Assert(predicate != NULL); + foreach (item, clauseinfo_list) { + clauseinfo = (CInfo *)lfirst(item); + /* if any clause implies the predicate, return true */ + if (one_pred_clause_expr_test(predicate, (Node*)clauseinfo->clause)) + return true; + } + return false; +} + + +/* + * one_pred_clause_expr_test-- + * Does the "predicate inclusion test" for a general restriction-clause + * expression. + */ +static bool +one_pred_clause_expr_test(Expr *predicate, Node *clause) +{ + List *items, *item; + + if (is_opclause(clause)) + return one_pred_clause_test(predicate, clause); + else if (or_clause(clause)) { + items = ((Expr*)clause)->args; + foreach (item, items) { + /* if any OR item doesn't imply the predicate, clause doesn't */ + if (!one_pred_clause_expr_test(predicate, lfirst(item))) + return false; + } + return true; + }else if (and_clause(clause)) { + items = ((Expr*)clause)->args; + foreach (item, items) { + /* if any AND item implies the predicate, the whole clause does */ + if (one_pred_clause_expr_test(predicate, lfirst(item))) + return true; + } + return false; + }else { + /* unknown clause type never implies the predicate */ + return false; + } +} + + +/* + * one_pred_clause_test-- + * Does the "predicate inclusion test" for one conjunct of a predicate + * expression for a simple restriction clause. + */ +static bool +one_pred_clause_test(Expr *predicate, Node *clause) +{ + List *items, *item; + + if (is_opclause((Node*)predicate)) + return clause_pred_clause_test(predicate, clause); + else if (or_clause((Node*)predicate)) { + items = predicate->args; + foreach (item, items) { + /* if any item is implied, the whole predicate is implied */ + if (one_pred_clause_test(lfirst(item), clause)) + return true; + } + return false; + }else if (and_clause((Node*)predicate)) { + items = predicate->args; + foreach (item, items) { + /* + * if any item is not implied, the whole predicate is not + * implied + */ + if (!one_pred_clause_test(lfirst(item), clause)) + return false; + } + return true; + } + else { + elog(DEBUG, "Unsupported predicate type, index will not be used"); + return false; + } +} + + +/* + * Define an "operator implication table" for btree operators ("strategies"). + * The "strategy numbers" are: (1) < (2) <= (3) = (4) >= (5) > + * + * The interpretation of: + * + * test_op = BT_implic_table[given_op-1][target_op-1] + * + * where test_op, given_op and target_op are strategy numbers (from 1 to 5) + * of btree operators, is as follows: + * + * If you know, for some ATTR, that "ATTR given_op CONST1" is true, and you + * want to determine whether "ATTR target_op CONST2" must also be true, then + * you can use "CONST1 test_op CONST2" as a test. If this test returns true, + * then the target expression must be true; if the test returns false, then + * the target expression may be false. + * + * An entry where test_op==0 means the implication cannot be determined, i.e., + * this test should always be considered false. + */ + +StrategyNumber BT_implic_table[BTMaxStrategyNumber][BTMaxStrategyNumber] = { + {2, 2, 0, 0, 0}, + {1, 2, 0, 0, 0}, + {1, 2, 3, 4, 5}, + {0, 0, 0, 4, 5}, + {0, 0, 0, 4, 4} +}; + + +/* + * clause_pred_clause_test-- + * Use operator class info to check whether clause implies predicate. + * + * Does the "predicate inclusion test" for a "simple clause" predicate + * for a single "simple clause" restriction. Currently, this only handles + * (binary boolean) operators that are in some btree operator class. + * Eventually, rtree operators could also be handled by defining an + * appropriate "RT_implic_table" array. + */ +static bool +clause_pred_clause_test(Expr *predicate, Node *clause) +{ + Var *pred_var, *clause_var; + Const *pred_const, *clause_const; + Oid pred_op, clause_op, test_op; + Oid opclass_id; + StrategyNumber pred_strategy, clause_strategy, test_strategy; + Oper *test_oper; + Expr *test_expr; + bool test_result, isNull; + Relation relation; + HeapScanDesc scan; + HeapTuple tuple; + ScanKeyData entry[3]; + Form_pg_amop form; + + pred_var = (Var*)get_leftop(predicate); + pred_const = (Const*)get_rightop(predicate); + clause_var = (Var*)get_leftop((Expr*)clause); + clause_const = (Const*)get_rightop((Expr*)clause); + + /* Check the basic form; for now, only allow the simplest case */ + if (!is_opclause(clause) || + !IsA(clause_var,Var) || + !IsA(clause_const,Const) || + !IsA(predicate->oper,Oper) || + !IsA(pred_var,Var) || + !IsA(pred_const,Const)) { + return false; + } + + /* + * The implication can't be determined unless the predicate and the clause + * refer to the same attribute. + */ + if (clause_var->varattno != pred_var->varattno) + return false; + + /* Get the operators for the two clauses we're comparing */ + pred_op = ((Oper*)((Expr*)predicate)->oper)->opno; + clause_op = ((Oper*)((Expr*)clause)->oper)->opno; + + + /* + * 1. Find a "btree" strategy number for the pred_op + */ + /* XXX - hardcoded amopid value 403 to find "btree" operator classes */ + ScanKeyEntryInitialize(&entry[0], 0, + Anum_pg_amop_amopid, + ObjectIdEqualRegProcedure, + ObjectIdGetDatum(403)); + + ScanKeyEntryInitialize(&entry[1], 0, + Anum_pg_amop_amopopr, + ObjectIdEqualRegProcedure, + ObjectIdGetDatum(pred_op)); + + relation = heap_openr(AccessMethodOperatorRelationName); + + /* + * The following assumes that any given operator will only be in a single + * btree operator class. This is true at least for all the pre-defined + * operator classes. If it isn't true, then whichever operator class + * happens to be returned first for the given operator will be used to + * find the associated strategy numbers for the test. --Nels, Jan '93 + */ + scan = heap_beginscan(relation, false, NowTimeQual, 2, entry); + tuple = heap_getnext(scan, false, (Buffer *)NULL); + if (! HeapTupleIsValid(tuple)) { + elog(DEBUG, "clause_pred_clause_test: unknown pred_op"); + return false; + } + form = (Form_pg_amop) GETSTRUCT(tuple); + + /* Get the predicate operator's strategy number (1 to 5) */ + pred_strategy = (StrategyNumber)form->amopstrategy; + + /* Remember which operator class this strategy number came from */ + opclass_id = form->amopclaid; + + heap_endscan(scan); + + + /* + * 2. From the same opclass, find a strategy num for the clause_op + */ + ScanKeyEntryInitialize(&entry[1], 0, + Anum_pg_amop_amopclaid, + ObjectIdEqualRegProcedure, + ObjectIdGetDatum(opclass_id)); + + ScanKeyEntryInitialize(&entry[2], 0, + Anum_pg_amop_amopopr, + ObjectIdEqualRegProcedure, + ObjectIdGetDatum(clause_op)); + + scan = heap_beginscan(relation, false, NowTimeQual, 3, entry); + tuple = heap_getnext(scan, false, (Buffer *)NULL); + if (! HeapTupleIsValid(tuple)) { + elog(DEBUG, "clause_pred_clause_test: unknown clause_op"); + return false; + } + form = (Form_pg_amop) GETSTRUCT(tuple); + + /* Get the restriction clause operator's strategy number (1 to 5) */ + clause_strategy = (StrategyNumber)form->amopstrategy; + heap_endscan(scan); + + + /* + * 3. Look up the "test" strategy number in the implication table + */ + + test_strategy = BT_implic_table[clause_strategy-1][pred_strategy-1]; + if (test_strategy == 0) + return false; /* the implication cannot be determined */ + + + /* + * 4. From the same opclass, find the operator for the test strategy + */ + + ScanKeyEntryInitialize(&entry[2], 0, + Anum_pg_amop_amopstrategy, + Integer16EqualRegProcedure, + Int16GetDatum(test_strategy)); + + scan = heap_beginscan(relation, false, NowTimeQual, 3, entry); + tuple = heap_getnext(scan, false, (Buffer *)NULL); + if (! HeapTupleIsValid(tuple)) { + elog(DEBUG, "clause_pred_clause_test: unknown test_op"); + return false; + } + form = (Form_pg_amop) GETSTRUCT(tuple); + + /* Get the test operator */ + test_op = form->amopopr; + heap_endscan(scan); + + + /* + * 5. Evaluate the test + */ + test_oper = makeOper(test_op, /* opno */ + InvalidOid, /* opid */ + BOOL_TYPEID, /* opresulttype */ + 0, /* opsize */ + NULL); /* op_fcache */ + (void) replace_opid(test_oper); + + test_expr = make_opclause(test_oper, + copyObject(clause_const), + copyObject(pred_const)); + +#ifndef OMIT_PARTIAL_INDEX + test_result = ExecEvalExpr((Node*)test_expr, NULL, &isNull, NULL); +#endif /* OMIT_PARTIAL_INDEX */ + if (isNull) { + elog(DEBUG, "clause_pred_clause_test: null test result"); + return false; + } + return test_result; +} + + +/**************************************************************************** + * ---- ROUTINES TO CHECK JOIN CLAUSES ---- + ****************************************************************************/ + +/* + * indexable-joinclauses-- + * Finds all groups of join clauses from among 'joininfo-list' that can + * be used in conjunction with 'index'. + * + * The first clause in the group is marked as having the other relation + * in the join clause as its outer join relation. + * + * Returns a list of these clause groups. + * + */ +static List * +indexable_joinclauses(Rel *rel, Rel *index, List *joininfo_list) +{ + JInfo *joininfo = (JInfo*)NULL; + List *cg_list = NIL; + List *i = NIL; + List *clausegroups = NIL; + + foreach(i,joininfo_list) { + joininfo = (JInfo*)lfirst(i); + clausegroups = + group_clauses_by_indexkey (rel, + index, + index->indexkeys, + index->classlist, + joininfo->jinfoclauseinfo, + true); + + if (clausegroups != NIL) { + List *clauses = lfirst(clausegroups); + + ((CInfo*)lfirst(clauses))->cinfojoinid = + joininfo->otherrels; + } + cg_list = nconc(cg_list,clausegroups); + } + return(cg_list); +} + +/**************************************************************************** + * ---- PATH CREATION UTILITIES ---- + ****************************************************************************/ + +/* + * extract_restrict_clauses - + * the list of clause info contains join clauses and restriction clauses. + * This routine returns the restriction clauses only. + */ +static List * +extract_restrict_clauses(List *clausegroup) +{ + List *restrict_cls = NIL; + List *l; + + foreach (l, clausegroup) { + CInfo *cinfo = lfirst(l); + + if (!join_clause_p((Node*)cinfo->clause)) { + restrict_cls = lappend(restrict_cls, cinfo); + } + } + return restrict_cls; +} + +/* + * index-innerjoin-- + * Creates index path nodes corresponding to paths to be used as inner + * relations in nestloop joins. + * + * 'clausegroup-list' is a list of list of clauseinfo nodes which can use + * 'index' on their inner relation. + * + * Returns a list of index pathnodes. + * + */ +static List * +index_innerjoin(Query *root, Rel *rel, List *clausegroup_list, Rel *index) +{ + List *clausegroup = NIL; + List *cg_list = NIL; + List *i = NIL; + IndexPath *pathnode = (IndexPath*)NULL; + Cost temp_selec; + float temp_pages; + + foreach(i,clausegroup_list) { + List *attnos, *values, *flags; + + clausegroup = lfirst(i); + pathnode = makeNode(IndexPath); + + get_joinvars(lfirsti(rel->relids),clausegroup, + &attnos, &values, &flags); + index_selectivity(lfirsti(index->relids), + index->classlist, + get_opnos(clausegroup), + getrelid((int)lfirst(rel->relids), + root->rtable), + attnos, + values, + flags, + length(clausegroup), + &temp_pages, + &temp_selec); + pathnode->path.pathtype = T_IndexScan; + pathnode->path.parent = rel; + pathnode->indexid = index->relids; + pathnode->indexqual = clausegroup; + + pathnode->path.joinid = ((CInfo*)lfirst(clausegroup))->cinfojoinid; + + pathnode->path.path_cost = + cost_index((Oid)lfirst(index->relids), + (int)temp_pages, + temp_selec, + rel->pages, + rel->tuples, + index->pages, + index->tuples, + true); + + /* copy clauseinfo list into path for expensive function processing + -- JMH, 7/7/92 */ + pathnode->path.locclauseinfo = + set_difference(copyObject((Node*)rel->clauseinfo), + clausegroup); + +#if 0 /* fix xfunc */ + /* add in cost for expensive functions! -- JMH, 7/7/92 */ + if (XfuncMode != XFUNC_OFF) { + ((Path*)pathnode)->path_cost += + xfunc_get_path_cost((Path*)pathnode); + } +#endif + cg_list = lappend(cg_list,pathnode); + } + return(cg_list); +} + +/* + * create-index-paths-- + * Creates a list of index path nodes for each group of clauses + * (restriction or join) that can be used in conjunction with an index. + * + * 'rel' is the relation for which 'index' is defined + * 'clausegroup-list' is the list of clause groups (lists of clauseinfo + * nodes) grouped by mergesortorder + * 'join' is a flag indicating whether or not the clauses are join + * clauses + * + * Returns a list of new index path nodes. + * + */ +static List * +create_index_paths(Query *root, + Rel *rel, + Rel *index, + List *clausegroup_list, + bool join) +{ + List *clausegroup = NIL; + List *ip_list = NIL; + List *i = NIL; + List *j = NIL; + IndexPath *temp_path; + + foreach(i, clausegroup_list) { + CInfo *clauseinfo; + List *temp_node = NIL; + bool temp = true; + + clausegroup = lfirst(i); + + foreach (j,clausegroup) { + clauseinfo = (CInfo*)lfirst(j); + if (!(join_clause_p((Node*)clauseinfo->clause) && + equal_path_merge_ordering(index->ordering, + clauseinfo->mergesortorder))) { + temp = false; + } + } + + if (!join || temp) { /* restriction, ordering scan */ + temp_path = create_index_path (root, rel,index,clausegroup,join); + temp_node = + lcons(temp_path, NIL); + ip_list = nconc(ip_list,temp_node); + } + } + return(ip_list); +} + +static List * +add_index_paths(List *indexpaths, List *new_indexpaths) +{ + return append(indexpaths, new_indexpaths); +} + +static bool +function_index_operand(Expr *funcOpnd, Rel *rel, Rel *index) +{ + Oid heapRelid = (Oid)lfirst(rel->relids); + Func *function; + List *funcargs; + int *indexKeys = index->indexkeys; + List *arg; + int i; + + /* + * sanity check, make sure we know what we're dealing with here. + */ + if (funcOpnd==NULL || + nodeTag(funcOpnd)!=T_Expr || funcOpnd->opType!=FUNC_EXPR || + funcOpnd->oper==NULL || indexKeys==NULL) + return false; + + function = (Func*)funcOpnd->oper; + funcargs = funcOpnd->args; + + if (function->funcid != index->indproc) + return false; + + /* + * Check that the arguments correspond to the same arguments used + * to create the functional index. To do this we must check that + * 1. refer to the right relatiion. + * 2. the args have the right attr. numbers in the right order. + * + * + * Check all args refer to the correct relation (i.e. the one with + * the functional index defined on it (rel). To do this we can + * simply compare range table entry numbers, they must be the same. + */ + foreach (arg, funcargs) { + if (heapRelid != ((Var*)lfirst(arg))->varno) + return false; + } + + /* + * check attr numbers and order. + */ + i = 0; + foreach (arg, funcargs) { + + if (indexKeys[i]==0) + return (false); + + if (((Var*)lfirst(arg))->varattno != indexKeys[i]) + return (false); + + i++; + } + + return true; +} + +static bool +SingleAttributeIndex(Rel *index) +{ + /* + * return false for now as I don't know if we support index scans + * on disjunction and the code doesn't work + */ + return (false); + +#if 0 + /* + * Non-functional indices. + */ + if (index->indproc == InvalidOid) + return (index->indexkeys[0] != 0 && + index->indexkeys[1] == 0); + + /* + * We have a functional index which is a single attr index + */ + return true; +#endif +} diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c new file mode 100644 index 00000000000..e727388715c --- /dev/null +++ b/src/backend/optimizer/path/joinpath.c @@ -0,0 +1,623 @@ +/*------------------------------------------------------------------------- + * + * joinpath.c-- + * Routines to find all possible paths for processing a set of joins + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.1.1.1 1996/07/09 06:21:35 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include <math.h> + +#include "storage/buf_internals.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/plannodes.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" +#include "optimizer/keys.h" +#include "optimizer/cost.h" /* for _enable_{hashjoin, _enable_mergesort} */ + +static Path *best_innerjoin(List *join_paths, List *outer_relid); +static List *sort_inner_and_outer(Rel *joinrel, Rel *outerrel, Rel *innerrel, + List *mergeinfo_list); +static List *match_unsorted_outer(Rel *joinrel, Rel *outerrel, Rel *innerrel, + List *outerpath_list, Path *cheapest_inner, Path *best_innerjoin, + List *mergeinfo_list); +static List *match_unsorted_inner(Rel *joinrel, Rel *outerrel, Rel *innerrel, + List *innerpath_list, List *mergeinfo_list); +static bool EnoughMemoryForHashjoin(Rel *hashrel); +static List *hash_inner_and_outer(Rel *joinrel, Rel *outerrel, Rel *innerrel, + List *hashinfo_list); + +/* + * find-all-join-paths-- + * Creates all possible ways to process joins for each of the join + * relations in the list 'joinrels.' Each unique path will be included + * in the join relation's 'pathlist' field. + * + * In postgres, n-way joins are handled left-only(permuting clauseless + * joins doesn't usually win much). + * + * if BushyPlanFlag is true, bushy tree plans will be generated + * + * 'joinrels' is the list of relation entries to be joined + * + * Modifies the pathlist field of the appropriate rel node to contain + * the unique join paths. + * If bushy trees are considered, may modify the relid field of the + * join rel nodes to flatten the lists. + * + * Returns nothing of interest. (?) + * It does a destructive modification. + */ +void +find_all_join_paths(Query *root, List *joinrels) +{ + List *mergeinfo_list = NIL; + List *hashinfo_list = NIL; + List *temp_list = NIL; + List *path = NIL; + + while (joinrels != NIL) { + Rel *joinrel = (Rel *)lfirst(joinrels); + List *innerrelids; + List *outerrelids; + Rel *innerrel; + Rel *outerrel; + Path *bestinnerjoin; + List *pathlist = NIL; + + innerrelids = lsecond(joinrel->relids); + outerrelids = lfirst(joinrel->relids); + + /* + * base relation id is an integer and join relation relid is a + * list of integers. + */ + innerrel = (length(innerrelids)==1)? + get_base_rel(root, lfirsti(innerrelids)) : get_join_rel(root,innerrelids); + outerrel = (length(outerrelids)==1)? + get_base_rel(root, lfirsti(outerrelids)) : get_join_rel(root, outerrelids); + + bestinnerjoin = best_innerjoin(innerrel->innerjoin, + outerrel->relids); + if( _enable_mergesort_ ) { + mergeinfo_list = + group_clauses_by_order(joinrel->clauseinfo, + lfirsti(innerrel->relids)); + } + + if( _enable_hashjoin_ ) { + hashinfo_list = + group_clauses_by_hashop(joinrel->clauseinfo, + lfirsti(innerrel->relids)); + } + + /* need to flatten the relids list */ + joinrel->relids = intAppend(outerrelids, innerrelids); + + /* + * 1. Consider mergesort paths where both relations must be + * explicitly sorted. + */ + pathlist = sort_inner_and_outer(joinrel,outerrel, + innerrel,mergeinfo_list); + + /* + * 2. Consider paths where the outer relation need not be explicitly + * sorted. This may include either nestloops and mergesorts where + * the outer path is already ordered. + */ + pathlist = + add_pathlist(joinrel, pathlist, + match_unsorted_outer(joinrel, + outerrel, + innerrel, + outerrel->pathlist, + (Path*)innerrel->cheapestpath, + bestinnerjoin, + mergeinfo_list)); + + /* + * 3. Consider paths where the inner relation need not be explicitly + * sorted. This may include nestloops and mergesorts the actual + * nestloop nodes were constructed in (match-unsorted-outer). + */ + pathlist = + add_pathlist(joinrel,pathlist, + match_unsorted_inner(joinrel,outerrel, + innerrel, + innerrel->pathlist, + mergeinfo_list)); + + /* + * 4. Consider paths where both outer and inner relations must be + * hashed before being joined. + */ + + pathlist = + add_pathlist(joinrel, pathlist, + hash_inner_and_outer(joinrel,outerrel, + innerrel,hashinfo_list)); + + joinrel->pathlist = pathlist; + + /* + * 'OuterJoinCost is only valid when calling (match-unsorted-inner) + * with the same arguments as the previous invokation of + * (match-unsorted-outer), so clear the field before going on. + */ + temp_list = innerrel->pathlist; + foreach(path, temp_list) { + + /* + * XXX + * + * This gross hack is to get around an apparent optimizer bug on + * Sparc (or maybe it is a bug of ours?) that causes really wierd + * behavior. + */ + if (IsA_JoinPath(path)) { + ((Path*)lfirst(path))->outerjoincost = (Cost) 0; + } + + /* do it iff it is a join path, which is not always + true, esp since the base level */ + } + + joinrels = lnext(joinrels); + } +} + +/* + * best-innerjoin-- + * Find the cheapest index path that has already been identified by + * (indexable_joinclauses) as being a possible inner path for the given + * outer relation in a nestloop join. + * + * 'join-paths' is a list of join nodes + * 'outer-relid' is the relid of the outer join relation + * + * Returns the pathnode of the selected path. + */ +static Path * +best_innerjoin(List *join_paths, List *outer_relids) +{ + Path *cheapest = (Path*)NULL; + List *join_path; + + foreach(join_path, join_paths) { + Path *path = (Path *)lfirst(join_path); + + if (intMember(lfirsti(path->joinid), outer_relids) + && ((cheapest==NULL || + path_is_cheaper((Path*)lfirst(join_path),cheapest)))) { + + cheapest = (Path*)lfirst(join_path); + } + } + return(cheapest); +} + +/* + * sort-inner-and-outer-- + * Create mergesort join paths by explicitly sorting both the outer and + * inner join relations on each available merge ordering. + * + * 'joinrel' is the join relation + * 'outerrel' is the outer join relation + * 'innerrel' is the inner join relation + * 'mergeinfo-list' is a list of nodes containing info on(mergesortable) + * clauses for joining the relations + * + * Returns a list of mergesort paths. + */ +static List * +sort_inner_and_outer(Rel *joinrel, + Rel *outerrel, + Rel *innerrel, + List *mergeinfo_list) +{ + List *ms_list = NIL; + MInfo *xmergeinfo = (MInfo*)NULL; + MergePath *temp_node = (MergePath*)NULL; + List *i; + List *outerkeys = NIL; + List *innerkeys = NIL; + List *merge_pathkeys = NIL; + + foreach(i, mergeinfo_list) { + xmergeinfo = (MInfo *)lfirst(i); + + outerkeys = + extract_path_keys(xmergeinfo->jmethod.jmkeys, + outerrel->targetlist, + OUTER); + + innerkeys = + extract_path_keys(xmergeinfo->jmethod.jmkeys, + innerrel->targetlist, + INNER); + + merge_pathkeys = + new_join_pathkeys(outerkeys, joinrel->targetlist, + xmergeinfo->jmethod.clauses); + + temp_node = + create_mergesort_path(joinrel, + outerrel->size, + innerrel->size, + outerrel->width, + innerrel->width, + (Path*)outerrel->cheapestpath, + (Path*)innerrel->cheapestpath, + merge_pathkeys, + xmergeinfo->m_ordering, + xmergeinfo->jmethod.clauses, + outerkeys, + innerkeys); + + ms_list = lappend(ms_list, temp_node); + } + return(ms_list); +} + +/* + * match-unsorted-outer-- + * Creates possible join paths for processing a single join relation + * 'joinrel' by employing either iterative substitution or + * mergesorting on each of its possible outer paths(assuming that the + * outer relation need not be explicitly sorted). + * + * 1. The inner path is the cheapest available inner path. + * 2. Mergesort wherever possible. Mergesorts are considered if there + * are mergesortable join clauses between the outer and inner join + * relations such that the outer path is keyed on the variables + * appearing in the clauses. The corresponding inner merge path is + * either a path whose keys match those of the outer path(if such a + * path is available) or an explicit sort on the appropriate inner + * join keys, whichever is cheaper. + * + * 'joinrel' is the join relation + * 'outerrel' is the outer join relation + * 'innerrel' is the inner join relation + * 'outerpath-list' is the list of possible outer paths + * 'cheapest-inner' is the cheapest inner path + * 'best-innerjoin' is the best inner index path(if any) + * 'mergeinfo-list' is a list of nodes containing info on mergesortable + * clauses + * + * Returns a list of possible join path nodes. + */ +static List * +match_unsorted_outer(Rel *joinrel, + Rel *outerrel, + Rel *innerrel, + List *outerpath_list, + Path *cheapest_inner, + Path *best_innerjoin, + List *mergeinfo_list) +{ + Path *outerpath = (Path*)NULL; + List *jp_list = NIL; + List *temp_node = NIL; + List *merge_pathkeys = NIL; + Path *nestinnerpath =(Path*)NULL; + List *paths = NIL; + List *i = NIL; + PathOrder *outerpath_ordering = NULL; + + foreach(i,outerpath_list) { + List *clauses = NIL; + List *matchedJoinKeys = NIL; + List *matchedJoinClauses = NIL; + MInfo *xmergeinfo = (MInfo*)NULL; + + outerpath = (Path*)lfirst(i); + + outerpath_ordering = &outerpath->p_ordering; + + if (outerpath_ordering) { + xmergeinfo = + match_order_mergeinfo(outerpath_ordering, + mergeinfo_list); + } + + if (xmergeinfo) { + clauses = xmergeinfo->jmethod.clauses; + } + + if (clauses) { + List *keys = xmergeinfo->jmethod.jmkeys; + List *clauses = xmergeinfo->jmethod.clauses; + + matchedJoinKeys = + match_pathkeys_joinkeys(outerpath->keys, + keys, + clauses, + OUTER, + &matchedJoinClauses); + merge_pathkeys = + new_join_pathkeys(outerpath->keys, + joinrel->targetlist, clauses); + } else { + merge_pathkeys = outerpath->keys; + } + + if(best_innerjoin && + path_is_cheaper(best_innerjoin, cheapest_inner)) { + nestinnerpath = best_innerjoin; + } else { + nestinnerpath = cheapest_inner; + } + + paths = lcons(create_nestloop_path(joinrel, + outerrel, + outerpath, + nestinnerpath, + merge_pathkeys), + NIL); + + if (clauses && matchedJoinKeys) { + bool path_is_cheaper_than_sort; + List *varkeys = NIL; + Path *mergeinnerpath = + match_paths_joinkeys(matchedJoinKeys, + outerpath_ordering, + innerrel->pathlist, + INNER); + + path_is_cheaper_than_sort = + (bool) (mergeinnerpath && + (mergeinnerpath->path_cost < + (cheapest_inner->path_cost + + cost_sort(matchedJoinKeys, + innerrel->size, + innerrel->width, + false)))); + if(!path_is_cheaper_than_sort) { + varkeys = + extract_path_keys(matchedJoinKeys, + innerrel->targetlist, + INNER); + } + + + /* + * Keep track of the cost of the outer path used with + * this ordered inner path for later processing in + * (match-unsorted-inner), since it isn't a sort and + * thus wouldn't otherwise be considered. + */ + if (path_is_cheaper_than_sort) { + mergeinnerpath->outerjoincost = outerpath->path_cost; + } else { + mergeinnerpath = cheapest_inner; + } + + temp_node = + lcons(create_mergesort_path(joinrel, + outerrel->size, + innerrel->size, + outerrel->width, + innerrel->width, + outerpath, + mergeinnerpath, + merge_pathkeys, + xmergeinfo->m_ordering, + matchedJoinClauses, + NIL, + varkeys), + paths); + } else { + temp_node = paths; + } + jp_list = nconc(jp_list, temp_node); + } + return(jp_list); +} + +/* + * match-unsorted-inner -- + * Find the cheapest ordered join path for a given(ordered, unsorted) + * inner join path. + * + * Scans through each path available on an inner join relation and tries + * matching its ordering keys against those of mergejoin clauses. + * If 1. an appropriately-ordered inner path and matching mergeclause are + * found, and + * 2. sorting the cheapest outer path is cheaper than using an ordered + * but unsorted outer path(as was considered in + * (match-unsorted-outer)), + * then this merge path is considered. + * + * 'joinrel' is the join result relation + * 'outerrel' is the outer join relation + * 'innerrel' is the inner join relation + * 'innerpath-list' is the list of possible inner join paths + * 'mergeinfo-list' is a list of nodes containing info on mergesortable + * clauses + * + * Returns a list of possible merge paths. + */ +static List * +match_unsorted_inner(Rel *joinrel, + Rel *outerrel, + Rel *innerrel, + List *innerpath_list, + List *mergeinfo_list) +{ + Path *innerpath = (Path*)NULL; + List *mp_list = NIL; + List *temp_node = NIL; + PathOrder *innerpath_ordering = NULL; + Cost temp1 = 0.0; + bool temp2 = false; + List *i = NIL; + + foreach (i, innerpath_list) { + MInfo *xmergeinfo = (MInfo*)NULL; + List *clauses = NIL; + List *matchedJoinKeys = NIL; + List *matchedJoinClauses = NIL; + + innerpath = (Path*)lfirst(i); + + innerpath_ordering = &innerpath->p_ordering; + + if (innerpath_ordering) { + xmergeinfo = + match_order_mergeinfo(innerpath_ordering, + mergeinfo_list); + } + + if (xmergeinfo) { + clauses = ((JoinMethod*)xmergeinfo)->clauses; + } + + if (clauses) { + List *keys = xmergeinfo->jmethod.jmkeys; + List *cls = xmergeinfo->jmethod.clauses; + + matchedJoinKeys = + match_pathkeys_joinkeys(innerpath->keys, + keys, + cls, + INNER, + &matchedJoinClauses); + } + + /* + * (match-unsorted-outer) if it is applicable. + * 'OuterJoinCost was set above in + */ + if (clauses && matchedJoinKeys) { + temp1 = outerrel->cheapestpath->path_cost + + cost_sort(matchedJoinKeys, outerrel->size, outerrel->width, + false); + + temp2 = (bool) (FLOAT_IS_ZERO(innerpath->outerjoincost) + || (innerpath->outerjoincost > temp1)); + + if(temp2) { + List *outerkeys = + extract_path_keys(matchedJoinKeys, + outerrel->targetlist, + OUTER); + List *merge_pathkeys = + new_join_pathkeys(outerkeys, + joinrel->targetlist, + clauses); + + temp_node = + lcons(create_mergesort_path(joinrel, + outerrel->size, + innerrel->size, + outerrel->width, + innerrel->width, + (Path*)outerrel->cheapestpath, + innerpath, + merge_pathkeys, + xmergeinfo->m_ordering, + matchedJoinClauses, + outerkeys, + NIL), + NIL); + + mp_list = nconc(mp_list,temp_node); + } + } + } + return(mp_list); + +} + +static bool +EnoughMemoryForHashjoin(Rel *hashrel) +{ + int ntuples; + int tupsize; + int pages; + + ntuples = hashrel->size; + if (ntuples == 0) ntuples = 1000; + tupsize = hashrel->width + sizeof(HeapTupleData); + pages = page_size(ntuples, tupsize); + /* + * if amount of buffer space below hashjoin threshold, + * return false + */ + if (ceil(sqrt((double)pages)) > NBuffers) + return false; + return true; +} + +/* + * hash-inner-and-outer-- XXX HASH + * Create hashjoin join paths by explicitly hashing both the outer and + * inner join relations on each available hash op. + * + * 'joinrel' is the join relation + * 'outerrel' is the outer join relation + * 'innerrel' is the inner join relation + * 'hashinfo-list' is a list of nodes containing info on(hashjoinable) + * clauses for joining the relations + * + * Returns a list of hashjoin paths. + */ +static List * +hash_inner_and_outer(Rel *joinrel, + Rel *outerrel, + Rel *innerrel, + List *hashinfo_list) +{ + HInfo *xhashinfo = (HInfo*)NULL; + List *hjoin_list = NIL; + HashPath *temp_node = (HashPath*)NULL; + List *i = NIL; + List *outerkeys = NIL; + List *innerkeys = NIL; + List *hash_pathkeys = NIL; + + foreach (i, hashinfo_list) { + xhashinfo = (HInfo*)lfirst(i); + outerkeys = + extract_path_keys(((JoinMethod*)xhashinfo)->jmkeys, + outerrel->targetlist, + OUTER); + innerkeys = + extract_path_keys(((JoinMethod*)xhashinfo)->jmkeys, + innerrel->targetlist, + INNER); + hash_pathkeys = + new_join_pathkeys(outerkeys, + joinrel->targetlist, + ((JoinMethod*)xhashinfo)->clauses); + + if (EnoughMemoryForHashjoin(innerrel)) { + temp_node = create_hashjoin_path(joinrel, + outerrel->size, + innerrel->size, + outerrel->width, + innerrel->width, + (Path*)outerrel->cheapestpath, + (Path*)innerrel->cheapestpath, + hash_pathkeys, + xhashinfo->hashop, + ((JoinMethod*)xhashinfo)->clauses, + outerkeys, + innerkeys); + hjoin_list = lappend(hjoin_list, temp_node); + } + } + return(hjoin_list); +} + diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c new file mode 100644 index 00000000000..b26e3364f93 --- /dev/null +++ b/src/backend/optimizer/path/joinrels.c @@ -0,0 +1,528 @@ +/*------------------------------------------------------------------------- + * + * joinrels.c-- + * Routines to determine which relations should be joined + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.1.1.1 1996/07/09 06:21:36 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" + +#include "optimizer/internal.h" +#include "optimizer/cost.h" +#include "optimizer/paths.h" +#include "optimizer/tlist.h" +#include "optimizer/joininfo.h" +#include "optimizer/pathnode.h" + + +static List *find_clause_joins(Query *root, Rel *outer_rel, List *joininfo_list); +static List *find_clauseless_joins(Rel *outer_rel, List *inner_rels); +static Rel *init_join_rel(Rel *outer_rel, Rel *inner_rel, JInfo *joininfo); +static List *new_join_tlist(List *tlist, List *other_relids, + int first_resdomno); +static List *new_joininfo_list(List *joininfo_list, List *join_relids); +static void add_superrels(Rel *rel, Rel *super_rel); +static bool nonoverlap_rels(Rel *rel1, Rel *rel2); +static bool nonoverlap_sets(List *s1, List *s2); +static void set_joinrel_size(Rel *joinrel, Rel *outer_rel, Rel *inner_rel, + JInfo *jinfo); + +/* + * find-join-rels-- + * Find all possible joins for each of the outer join relations in + * 'outer-rels'. A rel node is created for each possible join relation, + * and the resulting list of nodes is returned. If at all possible, only + * those relations for which join clauses exist are considered. If none + * of these exist for a given relation, all remaining possibilities are + * considered. + * + * 'outer-rels' is the list of rel nodes + * + * Returns a list of rel nodes corresponding to the new join relations. + */ +List * +find_join_rels(Query *root, List *outer_rels) +{ + List *joins = NIL; + List *join_list = NIL; + List *r = NIL; + + foreach(r, outer_rels) { + Rel *outer_rel = (Rel *)lfirst(r); + + if(!(joins = find_clause_joins(root, outer_rel,outer_rel->joininfo))) + if (BushyPlanFlag) + joins = find_clauseless_joins(outer_rel,outer_rels); + else + joins = find_clauseless_joins(outer_rel,root->base_relation_list_); + + join_list = nconc(join_list, joins); + } + + return(join_list); +} + +/* + * find-clause-joins-- + * Determines whether joins can be performed between an outer relation + * 'outer-rel' and those relations within 'outer-rel's joininfo nodes + * (i.e., relations that participate in join clauses that 'outer-rel' + * participates in). This is possible if all but one of the relations + * contained within the join clauses of the joininfo node are already + * contained within 'outer-rel'. + * + * 'outer-rel' is the relation entry for the outer relation + * 'joininfo-list' is a list of join clauses which 'outer-rel' + * participates in + * + * Returns a list of new join relations. + */ +static List * +find_clause_joins(Query *root, Rel *outer_rel, List *joininfo_list) +{ + List *join_list = NIL; + List *i = NIL; + + foreach (i, joininfo_list) { + JInfo *joininfo = (JInfo*)lfirst(i); + Rel *rel; + + if(!joininfo->inactive) { + List *other_rels = joininfo->otherrels; + + if(other_rels != NIL) { + if(length(other_rels) == 1) { + rel = init_join_rel(outer_rel, + get_base_rel(root, lfirsti(other_rels)), + joininfo); + } else if (BushyPlanFlag) { + rel = init_join_rel(outer_rel, + get_join_rel(root, other_rels), + joininfo); + } else { + rel = NULL; + } + + if (rel != NULL) + join_list = lappend(join_list, rel); + } + } + } + + return(join_list); +} + +/* + * find-clauseless-joins-- + * Given an outer relation 'outer-rel' and a list of inner relations + * 'inner-rels', create a join relation between 'outer-rel' and each + * member of 'inner-rels' that isn't already included in 'outer-rel'. + * + * Returns a list of new join relations. + */ +static List * +find_clauseless_joins(Rel *outer_rel, List *inner_rels) +{ + Rel *inner_rel; + List *t_list = NIL; + List *temp_node = NIL; + List *i = NIL; + + foreach (i, inner_rels) { + inner_rel = (Rel *)lfirst(i); + if(nonoverlap_rels(inner_rel, outer_rel)) { + temp_node = lcons(init_join_rel(outer_rel, + inner_rel, + (JInfo*)NULL), + NIL); + t_list = nconc(t_list,temp_node); + } + } + + return(t_list); +} + +/* + * init-join-rel-- + * Creates and initializes a new join relation. + * + * 'outer-rel' and 'inner-rel' are relation nodes for the relations to be + * joined + * 'joininfo' is the joininfo node(join clause) containing both + * 'outer-rel' and 'inner-rel', if any exists + * + * Returns the new join relation node. + */ +static Rel * +init_join_rel(Rel *outer_rel, Rel *inner_rel, JInfo *joininfo) +{ + Rel *joinrel = makeNode(Rel); + List *joinrel_joininfo_list = NIL; + List *new_outer_tlist; + List *new_inner_tlist; + + /* + * Create a new tlist by removing irrelevant elements from both + * tlists of the outer and inner join relations and then merging + * the results together. + */ + new_outer_tlist = + new_join_tlist(outer_rel->targetlist, /* XXX 1-based attnos */ + inner_rel->relids, 1); + new_inner_tlist = + new_join_tlist(inner_rel->targetlist, /* XXX 1-based attnos */ + outer_rel->relids, + length(new_outer_tlist) + 1); + + joinrel->relids = NIL; + joinrel->indexed = false; + joinrel->pages = 0; + joinrel->tuples = 0; + joinrel->width = 0; +/* joinrel->targetlist = NIL;*/ + joinrel->pathlist = NIL; + joinrel->unorderedpath = (Path *)NULL; + joinrel->cheapestpath = (Path *)NULL; + joinrel->pruneable = true; + joinrel->classlist = NULL; + joinrel->relam = InvalidOid; + joinrel->ordering = NULL; + joinrel->clauseinfo = NIL; + joinrel->joininfo = NULL; + joinrel->innerjoin = NIL; + joinrel->superrels = NIL; + + joinrel->relids = lcons(outer_rel->relids, /* ??? aren't they lists? -ay */ + lcons(inner_rel->relids, NIL)); + + new_outer_tlist = nconc(new_outer_tlist,new_inner_tlist); + joinrel->targetlist = new_outer_tlist; + + if (joininfo) { + joinrel->clauseinfo = joininfo->jinfoclauseinfo; + if (BushyPlanFlag) + joininfo->inactive = true; + } + + joinrel_joininfo_list = + new_joininfo_list(append(outer_rel->joininfo, inner_rel->joininfo), + intAppend(outer_rel->relids, inner_rel->relids)); + + joinrel->joininfo = joinrel_joininfo_list; + + set_joinrel_size(joinrel, outer_rel, inner_rel, joininfo); + + return(joinrel); +} + +/* + * new-join-tlist-- + * Builds a join relations's target list by keeping those elements that + * will be in the final target list and any other elements that are still + * needed for future joins. For a target list entry to still be needed + * for future joins, its 'joinlist' field must not be empty after removal + * of all relids in 'other-relids'. + * + * 'tlist' is the target list of one of the join relations + * 'other-relids' is a list of relids contained within the other + * join relation + * 'first-resdomno' is the resdom number to use for the first created + * target list entry + * + * Returns the new target list. + */ +static List * +new_join_tlist(List *tlist, + List *other_relids, + int first_resdomno) +{ + int resdomno = first_resdomno - 1; + TargetEntry *xtl = NULL; + List *temp_node = NIL; + List *t_list = NIL; + List *i = NIL; + List *join_list = NIL; + bool in_final_tlist =false; + + + foreach(i,tlist) { + xtl= lfirst(i); + in_final_tlist = (join_list==NIL); + if( in_final_tlist) { + resdomno += 1; + temp_node = + lcons(create_tl_element(get_expr(xtl), + resdomno), + NIL); + t_list = nconc(t_list,temp_node); + } + } + + return(t_list); +} + +/* + * new-joininfo-list-- + * Builds a join relation's joininfo list by checking for join clauses + * which still need to used in future joins involving this relation. A + * join clause is still needed if there are still relations in the clause + * not contained in the list of relations comprising this join relation. + * New joininfo nodes are only created and added to + * 'current-joininfo-list' if a node for a particular join hasn't already + * been created. + * + * 'current-joininfo-list' contains a list of those joininfo nodes that + * have already been built + * 'joininfo-list' is the list of join clauses involving this relation + * 'join-relids' is a list of relids corresponding to the relations + * currently being joined + * + * Returns a list of joininfo nodes, new and old. + */ +static List * +new_joininfo_list(List *joininfo_list, List *join_relids) +{ + List *current_joininfo_list = NIL; + List *new_otherrels = NIL; + JInfo *other_joininfo = (JInfo*)NULL; + List *xjoininfo = NIL; + + foreach (xjoininfo, joininfo_list) { + JInfo *joininfo = (JInfo*)lfirst(xjoininfo); + + new_otherrels = joininfo->otherrels; + if (nonoverlap_sets(new_otherrels,join_relids)) { + other_joininfo = joininfo_member(new_otherrels, + current_joininfo_list); + if(other_joininfo) { + other_joininfo->jinfoclauseinfo = + (List*)LispUnion(joininfo->jinfoclauseinfo, + other_joininfo->jinfoclauseinfo); + }else { + other_joininfo = makeNode(JInfo); + + other_joininfo->otherrels = + joininfo->otherrels; + other_joininfo->jinfoclauseinfo = + joininfo->jinfoclauseinfo; + other_joininfo->mergesortable = + joininfo->mergesortable; + other_joininfo->hashjoinable = + joininfo->hashjoinable; + other_joininfo->inactive = false; + + current_joininfo_list = lcons(other_joininfo, + current_joininfo_list); + } + } + } + + return(current_joininfo_list); +} + +/* + * add-new-joininfos-- + * For each new join relation, create new joininfos that + * use the join relation as inner relation, and add + * the new joininfos to those rel nodes that still + * have joins with the join relation. + * + * 'joinrels' is a list of join relations. + * + * Modifies the joininfo field of appropriate rel nodes. + */ +void +add_new_joininfos(Query *root, List *joinrels, List *outerrels) +{ + List *xjoinrel = NIL; + List *xrelid = NIL; + List *xrel = NIL; + List *xjoininfo = NIL; + + foreach(xjoinrel, joinrels) { + Rel *joinrel = (Rel *)lfirst(xjoinrel); + foreach(xrelid, joinrel->relids) { + Relid relid = (Relid)lfirst(xrelid); + Rel *rel = get_join_rel(root, relid); + add_superrels(rel,joinrel); + } + } + foreach(xjoinrel, joinrels) { + Rel *joinrel = (Rel *)lfirst(xjoinrel); + + foreach(xjoininfo, joinrel->joininfo) { + JInfo *joininfo = (JInfo*)lfirst(xjoininfo); + List *other_rels = joininfo->otherrels; + List *clause_info = joininfo->jinfoclauseinfo; + bool mergesortable = joininfo->mergesortable; + bool hashjoinable = joininfo->hashjoinable; + + foreach(xrelid, other_rels) { + Relid relid = (Relid)lfirst(xrelid); + Rel *rel = get_join_rel(root, relid); + List *super_rels = rel->superrels; + List *xsuper_rel = NIL; + JInfo *new_joininfo = makeNode(JInfo); + + new_joininfo->otherrels = joinrel->relids; + new_joininfo->jinfoclauseinfo = clause_info; + new_joininfo->mergesortable = mergesortable; + new_joininfo->hashjoinable = hashjoinable; + new_joininfo->inactive = false; + rel->joininfo = + lappend(rel->joininfo, new_joininfo); + + foreach(xsuper_rel, super_rels) { + Rel *super_rel = (Rel *)lfirst(xsuper_rel); + + if( nonoverlap_rels(super_rel,joinrel) ) { + List *new_relids = super_rel->relids; + JInfo *other_joininfo = + joininfo_member(new_relids, + joinrel->joininfo); + + if (other_joininfo) { + other_joininfo->jinfoclauseinfo = + (List*)LispUnion(clause_info, + other_joininfo->jinfoclauseinfo); + } else { + JInfo *new_joininfo = makeNode(JInfo); + + new_joininfo->otherrels = new_relids; + new_joininfo->jinfoclauseinfo = clause_info; + new_joininfo->mergesortable = mergesortable; + new_joininfo->hashjoinable = hashjoinable; + new_joininfo->inactive = false; + joinrel->joininfo = + lappend(joinrel->joininfo, + new_joininfo); + } + } + } + } + } + } + foreach(xrel, outerrels) { + Rel *rel = (Rel *)lfirst(xrel); + rel->superrels = NIL; + } +} + +/* + * final-join-rels-- + * Find the join relation that includes all the original + * relations, i.e. the final join result. + * + * 'join-rel-list' is a list of join relations. + * + * Returns the list of final join relations. + */ +List * +final_join_rels(List *join_rel_list) +{ + List *xrel = NIL; + List *temp = NIL; + List *t_list = NIL; + + /* + * find the relations that has no further joins, + * i.e., its joininfos all have otherrels nil. + */ + foreach(xrel,join_rel_list) { + Rel *rel = (Rel *)lfirst(xrel); + List *xjoininfo = NIL; + bool final = true; + + foreach (xjoininfo, rel->joininfo) { + JInfo *joininfo = (JInfo*)lfirst(xjoininfo); + + if (joininfo->otherrels != NIL) { + final = false; + break; + } + } + if (final) { + temp = lcons(rel, NIL); + t_list = nconc(t_list, temp); + } + } + + return(t_list); +} + +/* + * add_superrels-- + * add rel to the temporary property list superrels. + * + * 'rel' a rel node + * 'super-rel' rel node of a join relation that includes rel + * + * Modifies the superrels field of rel + */ +static void +add_superrels(Rel *rel, Rel *super_rel) +{ + rel->superrels = lappend(rel->superrels, super_rel); +} + +/* + * nonoverlap-rels-- + * test if two join relations overlap, i.e., includes the same + * relation. + * + * 'rel1' and 'rel2' are two join relations + * + * Returns non-nil if rel1 and rel2 do not overlap. + */ +static bool +nonoverlap_rels(Rel *rel1, Rel *rel2) +{ + return(nonoverlap_sets(rel1->relids, rel2->relids)); +} + +static bool +nonoverlap_sets(List *s1, List *s2) +{ + List *x = NIL; + + foreach(x,s1) { + int e = lfirsti(x); + if(intMember(e,s2)) + return(false); + } + return(true); +} + +static void +set_joinrel_size(Rel *joinrel, Rel *outer_rel, Rel *inner_rel, JInfo *jinfo) +{ + int ntuples; + float selec; + + /* voodoo magic. but better than a size of 0. I have no idea why + we didn't set the size before. -ay 2/95 */ + if (jinfo==NULL) { + /* worst case: the cartesian product */ + ntuples = outer_rel->tuples * inner_rel->tuples; + } else { + selec = product_selec(jinfo->jinfoclauseinfo); +/* ntuples = Min(outer_rel->tuples,inner_rel->tuples) * selec; */ + ntuples = outer_rel->tuples * inner_rel->tuples * selec; + } + + /* I bet sizes less than 1 will screw up optimization so + make the best case 1 instead of 0 - jolly*/ + if (ntuples < 1) + ntuples = 1; + + joinrel->tuples = ntuples; +} diff --git a/src/backend/optimizer/path/joinutils.c b/src/backend/optimizer/path/joinutils.c new file mode 100644 index 00000000000..1be5a57f2ec --- /dev/null +++ b/src/backend/optimizer/path/joinutils.c @@ -0,0 +1,432 @@ +/*------------------------------------------------------------------------- + * + * joinutils.c-- + * Utilities for matching and building join and path keys + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/Attic/joinutils.c,v 1.1.1.1 1996/07/09 06:21:36 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/plannodes.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/var.h" +#include "optimizer/keys.h" +#include "optimizer/tlist.h" +#include "optimizer/joininfo.h" +#include "optimizer/ordering.h" + + +static int match_pathkey_joinkeys(List *pathkey, List *joinkeys, + int which_subkey); +static bool every_func(List *joinkeys, List *pathkey, + int which_subkey); +static List *new_join_pathkey(List *subkeys, + List *considered_subkeys, List *join_rel_tlist, + List *joinclauses); +static List *new_matching_subkeys(Var *subkey, List *considered_subkeys, + List *join_rel_tlist, List *joinclauses); + +/**************************************************************************** + * KEY COMPARISONS + ****************************************************************************/ + +/* + * match-pathkeys-joinkeys-- + * Attempts to match the keys of a path against the keys of join clauses. + * This is done by looking for a matching join key in 'joinkeys' for + * every path key in the list 'pathkeys'. If there is a matching join key + * (not necessarily unique) for every path key, then the list of + * corresponding join keys and join clauses are returned in the order in + * which the keys matched the path keys. + * + * 'pathkeys' is a list of path keys: + * ( ( (var) (var) ... ) ( (var) ... ) ) + * 'joinkeys' is a list of join keys: + * ( (outer inner) (outer inner) ... ) + * 'joinclauses' is a list of clauses corresponding to the join keys in + * 'joinkeys' + * 'which-subkey' is a flag that selects the desired subkey of a join key + * in 'joinkeys' + * + * Returns the join keys and corresponding join clauses in a list if all + * of the path keys were matched: + * ( + * ( (outerkey0 innerkey0) ... (outerkeyN innerkeyN) ) + * ( clause0 ... clauseN ) + * ) + * and nil otherwise. + * + * Returns a list of matched join keys and a list of matched join clauses + * in matchedJoinClausesPtr. - ay 11/94 + */ +List * +match_pathkeys_joinkeys(List *pathkeys, + List *joinkeys, + List *joinclauses, + int which_subkey, + List **matchedJoinClausesPtr) +{ + List *matched_joinkeys = NIL; + List *matched_joinclauses = NIL; + List *pathkey = NIL; + List *i = NIL; + int matched_joinkey_index = -1; + + foreach(i, pathkeys) { + pathkey = lfirst(i); + matched_joinkey_index = + match_pathkey_joinkeys(pathkey, joinkeys, which_subkey); + + if (matched_joinkey_index != -1 ) { + List *xjoinkey = nth(matched_joinkey_index,joinkeys); + List *joinclause = nth(matched_joinkey_index,joinclauses); + + /* XXX was "push" function */ + matched_joinkeys = lappend(matched_joinkeys,xjoinkey); + matched_joinkeys = nreverse(matched_joinkeys); + + matched_joinclauses = lappend(matched_joinclauses,joinclause); + matched_joinclauses = nreverse(matched_joinclauses); + joinkeys = LispRemove(xjoinkey,joinkeys); + } else { + return(NIL); + } + + } + if(matched_joinkeys==NULL || + length(matched_joinkeys) != length(pathkeys)) { + return NIL; + } + + *matchedJoinClausesPtr = nreverse(matched_joinclauses); + return (nreverse(matched_joinkeys)); +} + +/* + * match-pathkey-joinkeys-- + * Returns the 0-based index into 'joinkeys' of the first joinkey whose + * outer or inner subkey matches any subkey of 'pathkey'. + */ +static int +match_pathkey_joinkeys(List *pathkey, + List *joinkeys, + int which_subkey) +{ + Var *path_subkey; + int pos; + List *i = NIL; + List *x = NIL; + JoinKey *jk; + + foreach(i, pathkey) { + path_subkey = (Var *)lfirst(i); + pos = 0; + foreach(x, joinkeys) { + jk = (JoinKey*)lfirst(x); + if(var_equal(path_subkey, + extract_subkey(jk, which_subkey))) + return(pos); + pos++; + } + } + return(-1); /* no index found */ +} + +/* + * match-paths-joinkeys-- + * Attempts to find a path in 'paths' whose keys match a set of join + * keys 'joinkeys'. To match, + * 1. the path node ordering must equal 'ordering'. + * 2. each subkey of a given path must match(i.e., be(var_equal) to) the + * appropriate subkey of the corresponding join key in 'joinkeys', + * i.e., the Nth path key must match its subkeys against the subkey of + * the Nth join key in 'joinkeys'. + * + * 'joinkeys' is the list of key pairs to which the path keys must be + * matched + * 'ordering' is the ordering of the(outer) path to which 'joinkeys' + * must correspond + * 'paths' is a list of(inner) paths which are to be matched against + * each join key in 'joinkeys' + * 'which-subkey' is a flag that selects the desired subkey of a join key + * in 'joinkeys' + * + * Returns the matching path node if one exists, nil otherwise. + */ +static bool +every_func(List *joinkeys, List *pathkey, int which_subkey) +{ + JoinKey *xjoinkey; + Var *temp; + Var *tempkey = NULL; + bool found = false; + List *i = NIL; + List *j = NIL; + + foreach(i,joinkeys) { + xjoinkey = (JoinKey*)lfirst(i); + found = false; + foreach(j,pathkey) { + temp = (Var*)lfirst((List*)lfirst(j)); + if(temp == NULL) continue; + tempkey = extract_subkey(xjoinkey,which_subkey); + if(var_equal(tempkey, temp)) { + found = true; + break; + } + } + if(found == false) + return(false); + } + return(found); +} + + +/* + * match_paths_joinkeys - + * find the cheapest path that matches the join keys + */ +Path * +match_paths_joinkeys(List *joinkeys, + PathOrder *ordering, + List *paths, + int which_subkey) +{ + Path *matched_path = NULL ; + bool key_match = false; + List *i = NIL; + + foreach(i,paths) { + Path *path = (Path*)lfirst(i); + + key_match = every_func(joinkeys, path->keys, which_subkey); + + if (equal_path_path_ordering(ordering, + &path->p_ordering) && + length(joinkeys) == length(path->keys) && + key_match) { + + if (matched_path) { + if (path->path_cost < matched_path->path_cost) + matched_path = path; + } else { + matched_path = path; + } + } + } + return matched_path; +} + + + +/* + * extract-path-keys-- + * Builds a subkey list for a path by pulling one of the subkeys from + * a list of join keys 'joinkeys' and then finding the var node in the + * target list 'tlist' that corresponds to that subkey. + * + * 'joinkeys' is a list of join key pairs + * 'tlist' is a relation target list + * 'which-subkey' is a flag that selects the desired subkey of a join key + * in 'joinkeys' + * + * Returns a list of pathkeys: ((tlvar1)(tlvar2)...(tlvarN)). + * [I've no idea why they have to be list of lists. Should be fixed. -ay 12/94] + */ +List * +extract_path_keys(List *joinkeys, + List *tlist, + int which_subkey) +{ + List *pathkeys = NIL; + List *jk; + + foreach(jk, joinkeys) { + JoinKey *jkey = (JoinKey*)lfirst(jk); + Var *var, *key; + List *p; + + /* + * find the right Var in the target list for this key + */ + var = (Var*)extract_subkey(jkey, which_subkey); + key = (Var*)matching_tlvar(var, tlist); + + /* + * include it in the pathkeys list if we haven't already done so + */ + foreach(p, pathkeys) { + Var *pkey = lfirst((List*)lfirst(p)); /* XXX fix me */ + if (key == pkey) + break; + } + if (p!=NIL) + continue; /* key already in pathkeys */ + + pathkeys = + lappend(pathkeys, lcons(key,NIL)); + } + return(pathkeys); +} + + +/**************************************************************************** + * NEW PATHKEY FORMATION + ****************************************************************************/ + +/* + * new-join-pathkeys-- + * Find the path keys for a join relation by finding all vars in the list + * of join clauses 'joinclauses' such that: + * (1) the var corresponding to the outer join relation is a + * key on the outer path + * (2) the var appears in the target list of the join relation + * In other words, add to each outer path key the inner path keys that + * are required for qualification. + * + * 'outer-pathkeys' is the list of the outer path's path keys + * 'join-rel-tlist' is the target list of the join relation + * 'joinclauses' is the list of restricting join clauses + * + * Returns the list of new path keys. + * + */ +List * +new_join_pathkeys(List *outer_pathkeys, + List *join_rel_tlist, + List *joinclauses) +{ + List *outer_pathkey = NIL; + List *t_list = NIL; + List *x; + List *i = NIL; + + foreach(i, outer_pathkeys) { + outer_pathkey = lfirst(i); + x = new_join_pathkey(outer_pathkey, NIL, + join_rel_tlist,joinclauses); + if (x!=NIL) { + t_list = lappend(t_list, x); + } + } + return(t_list); +} + +/* + * new-join-pathkey-- + * Finds new vars that become subkeys due to qualification clauses that + * contain any previously considered subkeys. These new subkeys plus the + * subkeys from 'subkeys' form a new pathkey for the join relation. + * + * Note that each returned subkey is the var node found in + * 'join-rel-tlist' rather than the joinclause var node. + * + * 'subkeys' is a list of subkeys for which matching subkeys are to be + * found + * 'considered-subkeys' is the current list of all subkeys corresponding + * to a given pathkey + * + * Returns a new pathkey(list of subkeys). + * + */ +static List * +new_join_pathkey(List *subkeys, + List *considered_subkeys, + List *join_rel_tlist, + List *joinclauses) +{ + List *t_list = NIL; + Var *subkey; + List *i = NIL; + List *matched_subkeys = NIL; + Expr *tlist_key = (Expr*)NULL; + List *newly_considered_subkeys = NIL; + + foreach (i, subkeys) { + subkey = (Var *)lfirst(i); + if(subkey == NULL) + break; /* XXX something is wrong */ + matched_subkeys = + new_matching_subkeys(subkey,considered_subkeys, + join_rel_tlist,joinclauses); + tlist_key = matching_tlvar(subkey,join_rel_tlist); + newly_considered_subkeys = NIL; + + if (tlist_key) { + if(!member(tlist_key, matched_subkeys)) + newly_considered_subkeys = lcons(tlist_key, + matched_subkeys); + } + else { + newly_considered_subkeys = matched_subkeys; + } + + considered_subkeys = + append(considered_subkeys, newly_considered_subkeys); + + t_list = nconc(t_list,newly_considered_subkeys); + } + return(t_list); +} + +/* + * new-matching-subkeys-- + * Returns a list of new subkeys: + * (1) which are not listed in 'considered-subkeys' + * (2) for which the "other" variable in some clause in 'joinclauses' is + * 'subkey' + * (3) which are mentioned in 'join-rel-tlist' + * + * Note that each returned subkey is the var node found in + * 'join-rel-tlist' rather than the joinclause var node. + * + * 'subkey' is the var node for which we are trying to find matching + * clauses + * + * Returns a list of new subkeys. + * + */ +static List * +new_matching_subkeys(Var *subkey, + List *considered_subkeys, + List *join_rel_tlist, + List *joinclauses) +{ + Expr *joinclause = NULL; + List *t_list = NIL; + List *temp = NIL; + List *i = NIL; + Expr *tlist_other_var = (Expr *)NULL; + + foreach(i,joinclauses) { + joinclause = lfirst(i); + tlist_other_var = + matching_tlvar(other_join_clause_var(subkey,joinclause), + join_rel_tlist); + + if(tlist_other_var && + !(member(tlist_other_var,considered_subkeys))) { + + /* XXX was "push" function */ + considered_subkeys = lappend(considered_subkeys, + tlist_other_var); + + /* considered_subkeys = nreverse(considered_subkeys); + XXX -- I am not sure of this. */ + + temp = lcons(tlist_other_var, NIL); + t_list = nconc(t_list,temp); + } + } + return(t_list); +} diff --git a/src/backend/optimizer/path/mergeutils.c b/src/backend/optimizer/path/mergeutils.c new file mode 100644 index 00000000000..d5f0fdcb65b --- /dev/null +++ b/src/backend/optimizer/path/mergeutils.c @@ -0,0 +1,122 @@ +/*------------------------------------------------------------------------- + * + * mergeutils.c-- + * Utilities for finding applicable merge clauses and pathkeys + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/Attic/mergeutils.c,v 1.1.1.1 1996/07/09 06:21:36 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" + +#include "optimizer/internal.h" +#include "optimizer/paths.h" +#include "optimizer/clauses.h" +#include "optimizer/ordering.h" + +/* + * group-clauses-by-order-- + * If a join clause node in 'clauseinfo-list' is mergesortable, store + * it within a mergeinfo node containing other clause nodes with the same + * mergesort ordering. + * + * 'clauseinfo-list' is the list of clauseinfo nodes + * 'inner-relid' is the relid of the inner join relation + * + * Returns the new list of mergeinfo nodes. + * + */ +List * +group_clauses_by_order(List *clauseinfo_list, + int inner_relid) +{ + List *mergeinfo_list = NIL; + List *xclauseinfo = NIL; + + foreach (xclauseinfo, clauseinfo_list) { + CInfo *clauseinfo = (CInfo *)lfirst(xclauseinfo); + MergeOrder *merge_ordering = clauseinfo->mergesortorder; + + if (merge_ordering) { + /* + * Create a new mergeinfo node and add it to + * 'mergeinfo-list' if one does not yet exist for this + * merge ordering. + */ + PathOrder p_ordering; + MInfo *xmergeinfo; + Expr *clause = clauseinfo->clause; + Var *leftop = get_leftop (clause); + Var *rightop = get_rightop (clause); + JoinKey *keys; + + p_ordering.ordtype = MERGE_ORDER; + p_ordering.ord.merge = merge_ordering; + xmergeinfo = + match_order_mergeinfo(&p_ordering, mergeinfo_list); + if (inner_relid == leftop->varno) { + keys = makeNode(JoinKey); + keys->outer = rightop; + keys->inner = leftop; + } else { + keys = makeNode(JoinKey); + keys->outer = leftop; + keys->inner = rightop; + } + + if (xmergeinfo==NULL) { + xmergeinfo = makeNode(MInfo); + + xmergeinfo->m_ordering = merge_ordering; + mergeinfo_list = lcons(xmergeinfo, + mergeinfo_list); + } + + ((JoinMethod *)xmergeinfo)->clauses = + lcons(clause, + ((JoinMethod *)xmergeinfo)->clauses); + ((JoinMethod *)xmergeinfo)->jmkeys = + lcons(keys, + ((JoinMethod *)xmergeinfo)->jmkeys); + } + } + return(mergeinfo_list); +} + + +/* + * match-order-mergeinfo-- + * Searches the list 'mergeinfo-list' for a mergeinfo node whose order + * field equals 'ordering'. + * + * Returns the node if it exists. + * + */ +MInfo * +match_order_mergeinfo(PathOrder *ordering, List *mergeinfo_list) +{ + MergeOrder *xmergeorder; + List *xmergeinfo = NIL; + + foreach(xmergeinfo, mergeinfo_list) { + MInfo *mergeinfo = (MInfo*)lfirst(xmergeinfo); + + xmergeorder = mergeinfo->m_ordering; + + if ((ordering->ordtype==MERGE_ORDER && + equal_merge_merge_ordering(ordering->ord.merge, xmergeorder)) || + (ordering->ordtype==SORTOP_ORDER && + equal_path_merge_ordering(ordering->ord.sortop, xmergeorder))) { + + return (mergeinfo); + } + } + return((MInfo*) NIL); +} diff --git a/src/backend/optimizer/path/orindxpath.c b/src/backend/optimizer/path/orindxpath.c new file mode 100644 index 00000000000..e040675e6ec --- /dev/null +++ b/src/backend/optimizer/path/orindxpath.c @@ -0,0 +1,271 @@ +/*------------------------------------------------------------------------- + * + * orindxpath.c-- + * Routines to find index paths that match a set of 'or' clauses + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.1.1.1 1996/07/09 06:21:36 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" +#include "nodes/primnodes.h" + +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" + +#include "optimizer/internal.h" +#include "optimizer/clauses.h" +#include "optimizer/clauseinfo.h" +#include "optimizer/paths.h" +#include "optimizer/cost.h" +#include "optimizer/plancat.h" +#include "optimizer/xfunc.h" + +#include "parser/parsetree.h" + + +static void best_or_subclause_indices(Query *root, Rel *rel, List *subclauses, + List *indices, List *examined_indexids, Cost subcost, List *selectivities, + List **indexids, Cost *cost, List **selecs); +static void best_or_subclause_index(Query *root, Rel *rel, Expr *subclause, + List *indices, int *indexid, Cost *cost, Cost *selec); + + +/* + * create-or-index-paths-- + * Creates index paths for indices that match 'or' clauses. + * + * 'rel' is the relation entry for which the paths are to be defined on + * 'clauses' is the list of available restriction clause nodes + * + * Returns a list of these index path nodes. + * + */ +List * +create_or_index_paths(Query *root, + Rel *rel, List *clauses) +{ + List *t_list = NIL; + + if (clauses != NIL) { + CInfo *clausenode = (CInfo *) (lfirst (clauses)); + + /* Check to see if this clause is an 'or' clause, and, if so, + * whether or not each of the subclauses within the 'or' clause has + * been matched by an index (the 'Index field was set in + * (match_or) if no index matches a given subclause, one of the + * lists of index nodes returned by (get_index) will be 'nil'). + */ + if (valid_or_clause(clausenode) && + clausenode->indexids) { + List *temp = NIL; + List *index_list = NIL; + bool index_flag = true; + + index_list = clausenode->indexids; + foreach(temp,index_list) { + if (!temp) + index_flag = false; + } + if (index_flag) { /* used to be a lisp every function */ + IndexPath *pathnode = makeNode(IndexPath); + List *indexids; + Cost cost; + List *selecs; + + best_or_subclause_indices(root, + rel, + clausenode->clause->args, + clausenode->indexids, + NIL, + (Cost)0, + NIL, + &indexids, + &cost, + &selecs); + + pathnode->path.pathtype = T_IndexScan; + pathnode->path.parent = rel; + pathnode->indexqual = + lcons(clausenode,NIL); + pathnode->indexid = indexids; + pathnode->path.path_cost = cost; + + /* copy clauseinfo list into path for expensive + function processing -- JMH, 7/7/92 */ + pathnode->path.locclauseinfo = + set_difference(clauses, + copyObject((Node*) + rel->clauseinfo)); + +#if 0 /* fix xfunc */ + /* add in cost for expensive functions! -- JMH, 7/7/92 */ + if (XfuncMode != XFUNC_OFF) { + ((Path*)pathnode)->path_cost += + xfunc_get_path_cost((Path)pathnode); + } +#endif + clausenode->selectivity = (Cost)floatVal(selecs); + t_list = + lcons(pathnode, + create_or_index_paths(root, rel,lnext(clauses))); + } else { + t_list = create_or_index_paths(root, rel,lnext(clauses)); + } + } + } + + return(t_list); +} + +/* + * best-or-subclause-indices-- + * Determines the best index to be used in conjunction with each subclause + * of an 'or' clause and the cost of scanning a relation using these + * indices. The cost is the sum of the individual index costs. + * + * 'rel' is the node of the relation on which the index is defined + * 'subclauses' are the subclauses of the 'or' clause + * 'indices' are those index nodes that matched subclauses of the 'or' + * clause + * 'examined-indexids' is a list of those index ids to be used with + * subclauses that have already been examined + * 'subcost' is the cost of using the indices in 'examined-indexids' + * 'selectivities' is a list of the selectivities of subclauses that + * have already been examined + * + * Returns a list of the indexids, cost, and selectivities of each + * subclause, e.g., ((i1 i2 i3) cost (s1 s2 s3)), where 'i' is an OID, + * 'cost' is a flonum, and 's' is a flonum. + */ +static void +best_or_subclause_indices(Query *root, + Rel *rel, + List *subclauses, + List *indices, + List *examined_indexids, + Cost subcost, + List *selectivities, + List **indexids, /* return value */ + Cost *cost, /* return value */ + List **selecs) /* return value */ +{ + if (subclauses==NIL) { + *indexids = nreverse(examined_indexids); + *cost = subcost; + *selecs = nreverse(selectivities); + } else { + int best_indexid; + Cost best_cost; + Cost best_selec; + + best_or_subclause_index(root, rel, lfirst(subclauses), lfirst(indices), + &best_indexid, &best_cost, &best_selec); + + best_or_subclause_indices(root, + rel, + lnext(subclauses), + lnext(indices), + lconsi(best_indexid, examined_indexids), + subcost + best_cost, + lcons(makeFloat(best_selec), selectivities), + indexids, + cost, + selecs); + } + return; +} + +/* + * best-or-subclause-index-- + * Determines which is the best index to be used with a subclause of + * an 'or' clause by estimating the cost of using each index and selecting + * the least expensive. + * + * 'rel' is the node of the relation on which the index is defined + * 'subclause' is the subclause + * 'indices' is a list of index nodes that match the subclause + * + * Returns a list (index-id index-subcost index-selectivity) + * (a fixnum, a fixnum, and a flonum respectively). + * + */ +static void +best_or_subclause_index(Query *root, + Rel *rel, + Expr *subclause, + List *indices, + int *retIndexid, /* return value */ + Cost *retCost, /* return value */ + Cost *retSelec) /* return value */ +{ + if (indices != NIL) { + Datum value; + int flag = 0; + Cost subcost; + Rel *index = (Rel *)lfirst (indices); + AttrNumber attno = (get_leftop (subclause))->varattno ; + Oid opno = ((Oper*)subclause->oper)->opno; + bool constant_on_right = non_null((Expr*)get_rightop(subclause)); + float npages, selec; + int subclause_indexid; + Cost subclause_cost; + Cost subclause_selec; + + if(constant_on_right) { + value = ((Const*)get_rightop (subclause))->constvalue; + } else { + value = NameGetDatum(""); + } + if(constant_on_right) { + flag = (_SELEC_IS_CONSTANT_ ||_SELEC_CONSTANT_RIGHT_); + } else { + flag = _SELEC_CONSTANT_RIGHT_; + } + index_selectivity(lfirsti(index->relids), + index->classlist, + lconsi(opno,NIL), + getrelid(lfirsti(rel->relids), + root->rtable), + lconsi(attno,NIL), + lconsi(value,NIL), + lconsi(flag,NIL), + 1, + &npages, + &selec); + + subcost = cost_index((Oid) lfirsti(index->relids), + (int)npages, + (Cost)selec, + rel->pages, + rel->tuples, + index->pages, + index->tuples, + false); + best_or_subclause_index(root, + rel, + subclause, + lnext(indices), + &subclause_indexid, + &subclause_cost, + &subclause_selec); + + if (subclause_indexid==0 || subcost < subclause_cost) { + *retIndexid = lfirsti(index->relids); + *retCost = subcost; + *retSelec = selec; + } else { + *retIndexid = 0; + *retCost = 0.0; + *retSelec = 0.0; + } + } + return; +} diff --git a/src/backend/optimizer/path/predmig.c b/src/backend/optimizer/path/predmig.c new file mode 100644 index 00000000000..2d3b5c5767f --- /dev/null +++ b/src/backend/optimizer/path/predmig.c @@ -0,0 +1,773 @@ +/*------------------------------------------------------------------------- + * + * predmig.c-- + * + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/Attic/predmig.c,v 1.1.1.1 1996/07/09 06:21:36 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +/* +** DESCRIPTION +** Main Routines to handle Predicate Migration (i.e. correct optimization +** of queries with expensive functions.) +** +** The reasoning behind some of these algorithms is rather detailed. +** Have a look at Sequoia Tech Report 92/13 for more info. Also +** see Monma and Sidney's paper "Sequencing with Series-Parallel +** Precedence Constraints", in "Mathematics of Operations Research", +** volume 4 (1979), pp. 215-224. +** +** The main thing that this code does that wasn't handled in xfunc.c is +** it considers the possibility that two joins in a stream may not +** be ordered by ascending rank -- in such a scenario, it may be optimal +** to pullup more restrictions than we did via xfunc_try_pullup. +** +** This code in some sense generalizes xfunc_try_pullup; if you +** run postgres -x noprune, you'll turn off xfunc_try_pullup, and this +** code will do everything that xfunc_try_pullup would have, and maybe +** more. However, this results in no pruning, which may slow down the +** optimizer and/or cause the system to run out of memory. +** -- JMH, 11/13/92 +*/ + +#include "nodes/pg_list.h" +#include "nodes/nodes.h" +#include "nodes/primnodes.h" +#include "nodes/relation.h" +#include "utils/palloc.h" +#include "utils/elog.h" +#include "planner/xfunc.h" +#include "planner/pathnode.h" +#include "planner/internal.h" +#include "planner/cost.h" +#include "planner/keys.h" +#include "planner/tlist.h" +#include "lib/qsort.h" + +#define is_clause(node) (get_cinfo(node)) /* a stream node represents a + clause (not a join) iff it + has a non-NULL cinfo field */ + +static void xfunc_predmig(JoinPath pathnode, Stream streamroot, + Stream laststream, bool *progressp); +static bool xfunc_series_llel(Stream stream); +static bool xfunc_llel_chains(Stream root, Stream bottom); +static Stream xfunc_complete_stream(Stream stream); +static bool xfunc_prdmig_pullup(Stream origstream, Stream pullme, + JoinPath joinpath); +static void xfunc_form_groups(Stream root, Stream bottom); +static void xfunc_free_stream(Stream root); +static Stream xfunc_add_clauses(Stream current); +static void xfunc_setup_group(Stream node, Stream bottom); +static Stream xfunc_streaminsert(CInfo clauseinfo, Stream current, + int clausetype); +static int xfunc_num_relids(Stream node); +static StreamPtr xfunc_get_downjoin(Stream node); +static StreamPtr xfunc_get_upjoin(Stream node); +static Stream xfunc_stream_qsort(Stream root, Stream bottom); +static int xfunc_stream_compare(void *arg1, void *arg2); +static bool xfunc_check_stream(Stream node); +static bool xfunc_in_stream(Stream node, Stream stream); + +/* ----------------- MAIN FUNCTIONS ------------------------ */ +/* +** xfunc_do_predmig +** wrapper for Predicate Migration. It calls xfunc_predmig until no +** more progress is made. +** return value says if any changes were ever made. +*/ +bool xfunc_do_predmig(Path root) +{ + bool progress, changed = false; + + if (is_join(root)) + do + { + progress = false; + Assert(IsA(root,JoinPath)); + xfunc_predmig((JoinPath)root, (Stream)NULL, (Stream)NULL, + &progress); + if (changed && progress) + elog(DEBUG, "Needed to do a second round of predmig!\n"); + if (progress) changed = true; + } while (progress); + return(changed); +} + + +/* + ** xfunc_predmig + ** The main routine for Predicate Migration. It traverses a join tree, + ** and for each root-to-leaf path in the plan tree it constructs a + ** "Stream", which it passes to xfunc_series_llel for optimization. + ** Destructively modifies the join tree (via predicate pullup). + */ +static void +xfunc_predmig(JoinPath pathnode, /* root of the join tree */ + Stream streamroot, + Stream laststream, /* for recursive calls -- these are + the root of the stream under + construction, and the lowest node + created so far */ + bool *progressp) +{ + Stream newstream; + + /* + ** traverse the join tree dfs-style, constructing a stream as you go. + ** When you hit a scan node, pass the stream off to xfunc_series_llel. + */ + + /* sanity check */ + if ((!streamroot && laststream) || + (streamroot && !laststream)) + elog(WARN, "called xfunc_predmig with bad inputs"); + if (streamroot) Assert(xfunc_check_stream(streamroot)); + + /* add path node to stream */ + newstream = RMakeStream(); + if (!streamroot) + streamroot = newstream; + set_upstream(newstream, (StreamPtr)laststream); + if (laststream) + set_downstream(laststream, (StreamPtr)newstream); + set_downstream(newstream, (StreamPtr)NULL); + set_pathptr(newstream, (pathPtr)pathnode); + set_cinfo(newstream, (CInfo)NULL); + set_clausetype(newstream, XFUNC_UNKNOWN); + + /* base case: we're at a leaf, call xfunc_series_llel */ + if (!is_join(pathnode)) + { + /* form a fleshed-out copy of the stream */ + Stream fullstream = xfunc_complete_stream(streamroot); + + /* sort it via series-llel */ + if (xfunc_series_llel(fullstream)) + *progressp = true; + + /* free up the copy */ + xfunc_free_stream(fullstream); + } + else + { + /* visit left child */ + xfunc_predmig((JoinPath)get_outerjoinpath(pathnode), + streamroot, newstream, progressp); + + /* visit right child */ + xfunc_predmig((JoinPath)get_innerjoinpath(pathnode), + streamroot, newstream, progressp); + } + + /* remove this node */ + if (get_upstream(newstream)) + set_downstream((Stream)get_upstream(newstream), (StreamPtr)NULL); + pfree(newstream); +} + +/* + ** xfunc_series_llel + ** A flavor of Monma and Sidney's Series-Parallel algorithm. + ** Traverse stream downwards. When you find a node with restrictions on it, + ** call xfunc_llel_chains on the substream from root to that node. + */ +static bool xfunc_series_llel(Stream stream) +{ + Stream temp, next; + bool progress = false; + + for (temp = stream; temp != (Stream)NULL; temp = next) + { + next = (Stream)xfunc_get_downjoin(temp); + /* + ** if there are restrictions/secondary join clauses above this + ** node, call xfunc_llel_chains + */ + if (get_upstream(temp) && is_clause((Stream)get_upstream(temp))) + if (xfunc_llel_chains(stream, temp)) + progress = true; + } + return(progress); +} + +/* + ** xfunc_llel_chains + ** A flavor of Monma and Sidney's Parallel Chains algorithm. + ** Given a stream which has been well-ordered except for its lowermost + ** restrictions/2-ary joins, pull up the restrictions/2-arys as appropriate. + ** What that means here is to form groups in the chain above the lowest + ** join node above bottom inclusive, and then take all the restrictions + ** following bottom, and try to pull them up as far as possible. + */ +static bool xfunc_llel_chains(Stream root, Stream bottom) +{ + bool progress = false; + Stream origstream; + Stream tmpstream, pathstream; + Stream rootcopy = root; + + Assert(xfunc_check_stream(root)); + + /* xfunc_prdmig_pullup will need an unmodified copy of the stream */ + origstream = (Stream)copyObject((Node)root); + + /* form groups among ill-ordered nodes */ + xfunc_form_groups(root, bottom); + + /* sort chain by rank */ + Assert(xfunc_in_stream(bottom, root)); + rootcopy = xfunc_stream_qsort(root, bottom); + + /* + ** traverse sorted stream -- if any restriction has moved above a join, + ** we must pull it up in the plan. That is, make plan tree + ** reflect order of sorted stream. + */ + for (tmpstream = rootcopy, + pathstream = (Stream)xfunc_get_downjoin(rootcopy); + tmpstream != (Stream)NULL && pathstream != (Stream)NULL; + tmpstream = (Stream)get_downstream(tmpstream)) + { + if (is_clause(tmpstream) + && get_pathptr(pathstream) != get_pathptr(tmpstream)) + { + /* + ** If restriction moved above a Join after sort, we pull it + ** up in the join plan. + ** If restriction moved down, we ignore it. + ** This is because Joey's Sequoia paper proves that + ** restrictions should never move down. If this + ** one were moved down, it would violate "semantic correctness", + ** i.e. it would be lower than the attributes it references. + */ + Assert(xfunc_num_relids(pathstream)>xfunc_num_relids(tmpstream)); + progress = + xfunc_prdmig_pullup(origstream, tmpstream, + (JoinPath)get_pathptr(pathstream)); + } + if (get_downstream(tmpstream)) + pathstream = + (Stream)xfunc_get_downjoin((Stream)get_downstream(tmpstream)); + } + + /* free up origstream */ + xfunc_free_stream(origstream); + return(progress); +} + +/* + ** xfunc_complete_stream -- + ** Given a stream composed of join nodes only, make a copy containing the + ** join nodes along with the associated restriction nodes. + */ +static Stream xfunc_complete_stream(Stream stream) +{ + Stream tmpstream, copystream, curstream = (Stream)NULL; + + copystream = (Stream)copyObject((Node)stream); + Assert(xfunc_check_stream(copystream)); + + curstream = copystream; + Assert(!is_clause(curstream)); + + /* curstream = (Stream)xfunc_get_downjoin(curstream); */ + + while(curstream != (Stream)NULL) + { + xfunc_add_clauses(curstream); + curstream = (Stream)xfunc_get_downjoin(curstream); + } + + /* find top of stream and return it */ + for (tmpstream = copystream; get_upstream(tmpstream) != (StreamPtr)NULL; + tmpstream = (Stream)get_upstream(tmpstream)) + /* no body in for loop */; + + return(tmpstream); +} + +/* + ** xfunc_prdmig_pullup + ** pullup a clause in a path above joinpath. Since the JoinPath tree + ** doesn't have upward pointers, it's difficult to deal with. Thus we + ** require the original stream, which maintains pointers to all the path + ** nodes. We use the original stream to find out what joins are + ** above the clause. + */ +static bool +xfunc_prdmig_pullup(Stream origstream, Stream pullme, JoinPath joinpath) +{ + CInfo clauseinfo = get_cinfo(pullme); + bool progress = false; + Stream upjoin, orignode, temp; + int whichchild; + + /* find node in origstream that contains clause */ + for (orignode = origstream; + orignode != (Stream) NULL + && get_cinfo(orignode) != clauseinfo; + orignode = (Stream)get_downstream(orignode)) + /* empty body in for loop */ ; + if (!orignode) + elog(WARN, "Didn't find matching node in original stream"); + + + /* pull up this node as far as it should go */ + for (upjoin = (Stream)xfunc_get_upjoin(orignode); + upjoin != (Stream)NULL + && (JoinPath)get_pathptr((Stream)xfunc_get_downjoin(upjoin)) + != joinpath; + upjoin = (Stream)xfunc_get_upjoin(upjoin)) + { +#ifdef DEBUG + elog(DEBUG, "pulling up in xfunc_predmig_pullup!"); +#endif + /* move clause up in path */ + if (get_pathptr((Stream)get_downstream(upjoin)) + == (pathPtr)get_outerjoinpath((JoinPath)get_pathptr(upjoin))) + whichchild = OUTER; + else whichchild = INNER; + clauseinfo = xfunc_pullup((Path)get_pathptr((Stream)get_downstream(upjoin)), + (JoinPath)get_pathptr(upjoin), + clauseinfo, + whichchild, + get_clausetype(orignode)); + set_pathptr(pullme, get_pathptr(upjoin)); + /* pullme has been moved into locclauseinfo */ + set_clausetype(pullme, XFUNC_LOCPRD); + + /* + ** xfunc_pullup makes new path nodes for children of + ** get_pathptr(current). We must modify the stream nodes to point + ** to these path nodes + */ + if (whichchild == OUTER) + { + for(temp = (Stream)get_downstream(upjoin); is_clause(temp); + temp = (Stream)get_downstream(temp)) + set_pathptr + (temp, (pathPtr) + get_outerjoinpath((JoinPath)get_pathptr(upjoin))); + set_pathptr + (temp, + (pathPtr)get_outerjoinpath((JoinPath)get_pathptr(upjoin))); + } + else + { + for(temp = (Stream)get_downstream(upjoin); is_clause(temp); + temp = (Stream)get_downstream(temp)) + set_pathptr + (temp, (pathPtr) + get_innerjoinpath((JoinPath)get_pathptr(upjoin))); + set_pathptr + (temp, (pathPtr) + get_innerjoinpath((JoinPath)get_pathptr(upjoin))); + } + progress = true; + } + if (!progress) + elog(DEBUG, "didn't succeed in pulling up in xfunc_prdmig_pullup"); + return(progress); +} + +/* + ** xfunc_form_groups -- + ** A group is a pair of stream nodes a,b such that a is constrained to + ** precede b (for instance if a and b are both joins), but rank(a) > rank(b). + ** In such a situation, Monma and Sidney prove that no clauses should end + ** up between a and b, and therefore we may treat them as a group, with + ** selectivity equal to the product of their selectivities, and cost + ** equal to the cost of the first plus the selectivity of the first times the + ** cost of the second. We define each node to be in a group by itself, + ** and then repeatedly find adjacent groups which are ordered by descending + ** rank, and make larger groups. You know that two adjacent nodes are in a + ** group together if the lower has groupup set to true. They will both have + ** the same groupcost and groupsel (since they're in the same group!) + */ +static void xfunc_form_groups(Query* queryInfo, Stream root, Stream bottom) +{ + Stream temp, parent; + int lowest = xfunc_num_relids((Stream)xfunc_get_upjoin(bottom)); + bool progress; + LispValue primjoin; + int whichchild; + + if (!lowest) return; /* no joins in stream, so no groups */ + + /* initialize groups to be single nodes */ + for (temp = root; + temp != (Stream)NULL && temp != bottom; + temp = (Stream)get_downstream(temp)) + { + /* if a Join node */ + if (!is_clause(temp)) + { + if (get_pathptr((Stream)get_downstream(temp)) + == (pathPtr)get_outerjoinpath((JoinPath)get_pathptr(temp))) + whichchild = OUTER; + else whichchild = INNER; + set_groupcost(temp, + xfunc_join_expense((JoinPath)get_pathptr(temp), + whichchild)); + if (primjoin = xfunc_primary_join((JoinPath)get_pathptr(temp))) + { + set_groupsel(temp, + compute_clause_selec(queryInfo, + primjoin, NIL)); + } + else + { + set_groupsel(temp,1.0); + } + } + else /* a restriction, or 2-ary join pred */ + { + set_groupcost(temp, + xfunc_expense(queryInfo, + get_clause(get_cinfo(temp)))); + set_groupsel(temp, + compute_clause_selec(queryInfo, + get_clause(get_cinfo(temp)), + NIL)); + } + set_groupup(temp,false); + } + + /* make passes upwards, forming groups */ + do + { + progress = false; + for (temp = (Stream)get_upstream(bottom); + temp != (Stream)NULL; + temp = (Stream)get_upstream(temp)) + { + /* check for grouping with node upstream */ + if (!get_groupup(temp) && /* not already grouped */ + (parent = (Stream)get_upstream(temp)) != (Stream)NULL && + /* temp is a join or temp is the top of a group */ + (is_join((Path)get_pathptr(temp)) || + get_downstream(temp) && + get_groupup((Stream)get_downstream(temp))) && + get_grouprank(parent) < get_grouprank(temp)) + { + progress = true; /* we formed a new group */ + set_groupup(temp,true); + set_groupcost(temp, + get_groupcost(temp) + + get_groupsel(temp) * get_groupcost(parent)); + set_groupsel(temp,get_groupsel(temp) * get_groupsel(parent)); + + /* fix costs and sels of all members of group */ + xfunc_setup_group(temp, bottom); + } + } + } while(progress); +} + + +/* ------------------- UTILITY FUNCTIONS ------------------------- */ + +/* + ** xfunc_free_stream -- + ** walk down a stream and pfree it + */ +static void xfunc_free_stream(Stream root) +{ + Stream cur, next; + + Assert(xfunc_check_stream(root)); + + if (root != (Stream)NULL) + for (cur = root; cur != (Stream)NULL; cur = next) + { + next = (Stream)get_downstream(cur); + pfree(cur); + } +} + +/* + ** xfunc_add<_clauses + ** find any clauses above current, and insert them into stream as + ** appropriate. Return uppermost clause inserted, or current if none. + */ +static Stream xfunc_add_clauses(Stream current) +{ + Stream topnode = current; + LispValue temp; + LispValue primjoin; + + /* first add in the local clauses */ + foreach(temp, get_locclauseinfo((Path)get_pathptr(current))) + { + topnode = + xfunc_streaminsert((CInfo)lfirst(temp), topnode, + XFUNC_LOCPRD); + } + + /* and add in the join clauses */ + if (IsA(get_pathptr(current),JoinPath)) + { + primjoin = xfunc_primary_join((JoinPath)get_pathptr(current)); + foreach(temp, get_pathclauseinfo((JoinPath)get_pathptr(current))) + { + if (!equal(get_clause((CInfo)lfirst(temp)), primjoin)) + topnode = + xfunc_streaminsert((CInfo)lfirst(temp), topnode, + XFUNC_JOINPRD); + } + } + return(topnode); +} + + +/* + ** xfunc_setup_group + ** find all elements of stream that are grouped with node and are above + ** bottom, and set their groupcost and groupsel to be the same as node's. + */ +static void xfunc_setup_group(Stream node, Stream bottom) +{ + Stream temp; + + if (node != bottom) + /* traverse downwards */ + for (temp = (Stream)get_downstream(node); + temp != (Stream)NULL && temp != bottom; + temp = (Stream)get_downstream(temp)) + { + if (!get_groupup(temp)) break; + else + { + set_groupcost(temp, get_groupcost(node)); + set_groupsel(temp, get_groupsel(node)); + } + } + + /* traverse upwards */ + for (temp = (Stream)get_upstream(node); temp != (Stream)NULL; + temp = (Stream)get_upstream(temp)) + { + if (!get_groupup((Stream)get_downstream(temp))) break; + else + { + set_groupcost(temp, get_groupcost(node)); + set_groupsel(temp, get_groupsel(node)); + } + } +} + + +/* + ** xfunc_streaminsert + ** Make a new Stream node to hold clause, and insert it above current. + ** Return new node. + */ +static Stream +xfunc_streaminsert(CInfo clauseinfo, + Stream current, + int clausetype) /* XFUNC_LOCPRD or XFUNC_JOINPRD */ +{ + Stream newstream = RMakeStream(); + set_upstream(newstream, get_upstream(current)); + if (get_upstream(current)) + set_downstream((Stream)(get_upstream(current)), (StreamPtr)newstream); + set_upstream(current, (StreamPtr)newstream); + set_downstream(newstream, (StreamPtr)current); + set_pathptr(newstream, get_pathptr(current)); + set_cinfo(newstream, clauseinfo); + set_clausetype(newstream, clausetype); + return(newstream); +} + +/* + ** Given a Stream node, find the number of relids referenced in the pathnode + ** associated with the stream node. The number of relids gives a unique + ** ordering on the joins in a stream, which we use to compare the height of + ** join nodes. + */ +static int xfunc_num_relids(Stream node) +{ + if (!node || !IsA(get_pathptr(node),JoinPath)) + return(0); + else return(length + (get_relids(get_parent((JoinPath)get_pathptr(node))))); +} + +/* + ** xfunc_get_downjoin -- + ** Given a stream node, find the next lowest node which points to a + ** join predicate or a scan node. + */ +static StreamPtr xfunc_get_downjoin(Stream node) +{ + Stream temp; + + if (!is_clause(node)) /* if this is a join */ + node = (Stream)get_downstream(node); + for (temp = node; temp && is_clause(temp); + temp = (Stream)get_downstream(temp)) + /* empty body in for loop */ ; + + return((StreamPtr)temp); +} + +/* + ** xfunc_get_upjoin -- + ** same as above, but upwards. + */ +static StreamPtr xfunc_get_upjoin(Stream node) +{ + Stream temp; + + if (!is_clause(node)) /* if this is a join */ + node = (Stream)get_upstream(node); + for (temp = node; temp && is_clause(temp); + temp = (Stream)get_upstream(temp)) + /* empty body in for loop */ ; + + return((StreamPtr)temp); +} + +/* + ** xfunc_stream_qsort -- + ** Given a stream, sort by group rank the elements in the stream from the + ** node "bottom" up. DESTRUCTIVELY MODIFIES STREAM! Returns new root. + */ +static Stream xfunc_stream_qsort(Stream root, Stream bottom) +{ + int i; + size_t num; + Stream *nodearray, output; + Stream tmp; + + /* find size of list */ + for (num = 0, tmp = root; tmp != bottom; + tmp = (Stream)get_downstream(tmp)) + num ++; + if (num <= 1) return (root); + + /* copy elements of the list into an array */ + nodearray = (Stream *) palloc(num * sizeof(Stream)); + + for (tmp = root, i = 0; tmp != bottom; + tmp = (Stream)get_downstream(tmp), i++) + nodearray[i] = tmp; + + /* sort the array */ + pg_qsort(nodearray, num, sizeof(LispValue), xfunc_stream_compare); + + /* paste together the array elements */ + output = nodearray[num - 1]; + set_upstream(output, (StreamPtr)NULL); + for (i = num - 2; i >= 0; i--) + { + set_downstream(nodearray[i+1], (StreamPtr)nodearray[i]); + set_upstream(nodearray[i], (StreamPtr)nodearray[i+1]); + } + set_downstream(nodearray[0], (StreamPtr)bottom); + if (bottom) + set_upstream(bottom, (StreamPtr)nodearray[0]); + + Assert(xfunc_check_stream(output)); + return(output); +} + +/* + ** xfunc_stream_compare + ** comparison function for xfunc_stream_qsort. + ** Compare nodes by group rank. If group ranks are equal, ensure that + ** join nodes appear in same order as in plan tree. + */ +static int xfunc_stream_compare(void *arg1, void *arg2) +{ + Stream stream1 = *(Stream *) arg1; + Stream stream2 = *(Stream *) arg2; + Cost rank1, rank2; + + rank1 = get_grouprank(stream1); + rank2 = get_grouprank(stream2); + + if (rank1 > rank2) return(1); + else if (rank1 < rank2) return(-1); + else + { + if (is_clause(stream1) && is_clause(stream2)) + return(0); /* doesn't matter what order if both are restrictions */ + else if (!is_clause(stream1) && !is_clause(stream2)) + { + if (xfunc_num_relids(stream1) < xfunc_num_relids(stream2)) + return(-1); + else return(1); + } + else if (is_clause(stream1) && !is_clause(stream2)) + { + if (xfunc_num_relids(stream1) == xfunc_num_relids(stream2)) + /* stream1 is a restriction over stream2 */ + return(1); + else return(-1); + } + else if (!is_clause(stream1) && is_clause(stream2)) + { + /* stream2 is a restriction over stream1: never push down */ + return(-1); + } + } +} + +/* ------------------ DEBUGGING ROUTINES ---------------------------- */ + +/* + ** Make sure all pointers in stream make sense. Make sure no joins are + ** out of order. + */ +static bool xfunc_check_stream(Stream node) +{ + Stream temp; + int numrelids, tmp; + + /* set numrelids higher than max */ + if (!is_clause(node)) + numrelids = xfunc_num_relids(node) + 1; + else if (xfunc_get_downjoin(node)) + numrelids = xfunc_num_relids((Stream)xfunc_get_downjoin(node)) + 1; + else numrelids = 1; + + for (temp = node; get_downstream(temp); temp = (Stream)get_downstream(temp)) + { + if ((Stream)get_upstream((Stream)get_downstream(temp)) != temp) + { + elog(WARN, "bad pointers in stream"); + return(false); + } + if (!is_clause(temp)) + { + if ((tmp = xfunc_num_relids(temp)) >= numrelids) + { + elog(WARN, "Joins got reordered!"); + return(false); + } + numrelids = tmp; + } + } + + return(true); +} + +/* + ** xfunc_in_stream + ** check if node is in stream + */ +static bool xfunc_in_stream(Stream node, Stream stream) +{ + Stream temp; + + for (temp = stream; temp; temp = (Stream)get_downstream(temp)) + if (temp == node) return(1); + return(0); +} diff --git a/src/backend/optimizer/path/prune.c b/src/backend/optimizer/path/prune.c new file mode 100644 index 00000000000..70f9b209e0c --- /dev/null +++ b/src/backend/optimizer/path/prune.c @@ -0,0 +1,203 @@ +/*------------------------------------------------------------------------- + * + * prune.c-- + * Routines to prune redundant paths and relations + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/Attic/prune.c,v 1.1.1.1 1996/07/09 06:21:36 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "nodes/pg_list.h" +#include "nodes/relation.h" + +#include "optimizer/internal.h" +#include "optimizer/cost.h" +#include "optimizer/paths.h" +#include "optimizer/pathnode.h" + +#include "utils/elog.h" + + +static List *prune_joinrel(Rel *rel, List *other_rels); + +/* + * prune-joinrels-- + * Removes any redundant relation entries from a list of rel nodes + * 'rel-list'. + * + * Returns the resulting list. + * + */ +List *prune_joinrels(List *rel_list) +{ + List *temp_list = NIL; + + if (rel_list != NIL) { + temp_list = lcons(lfirst(rel_list), + prune_joinrels(prune_joinrel((Rel*)lfirst(rel_list), + lnext(rel_list)))); + } + return(temp_list); +} + +/* + * prune-joinrel-- + * Prunes those relations from 'other-rels' that are redundant with + * 'rel'. A relation is redundant if it is built up of the same + * relations as 'rel'. Paths for the redundant relation are merged into + * the pathlist of 'rel'. + * + * Returns a list of non-redundant relations, and sets the pathlist field + * of 'rel' appropriately. + * + */ +static List * +prune_joinrel(Rel *rel, List *other_rels) +{ + List *i = NIL; + List *t_list = NIL; + List *temp_node = NIL; + Rel *other_rel = (Rel *)NULL; + + foreach(i, other_rels) { + other_rel = (Rel*)lfirst(i); + if(same(rel->relids, other_rel->relids)) { + rel->pathlist = add_pathlist(rel, + rel->pathlist, + other_rel->pathlist); + t_list = nconc(t_list, NIL); /* XXX is this right ? */ + } else { + temp_node = lcons(other_rel, NIL); + t_list = nconc(t_list,temp_node); + } + } + return(t_list); +} + +/* + * prune-rel-paths-- + * For each relation entry in 'rel-list' (which corresponds to a join + * relation), set pointers to the unordered path and cheapest paths + * (if the unordered path isn't the cheapest, it is pruned), and + * reset the relation's size field to reflect the join. + * + * Returns nothing of interest. + * + */ +void +prune_rel_paths(List *rel_list) +{ + List *x = NIL; + List *y = NIL; + Path *path; + Rel *rel = (Rel*)NULL; + JoinPath *cheapest = (JoinPath*)NULL; + + foreach(x, rel_list) { + rel = (Rel*)lfirst(x); + foreach(y, rel->pathlist) { + path = (Path*)lfirst(y); + + if(!path->p_ordering.ord.sortop) { + break; + } + } + cheapest = (JoinPath*)prune_rel_path(rel, path); + if (IsA_JoinPath(cheapest)) + { + rel->size = compute_joinrel_size(cheapest); + } + else + elog(WARN, "non JoinPath called"); + } +} + + +/* + * prune-rel-path-- + * Compares the unordered path for a relation with the cheapest path. If + * the unordered path is not cheapest, it is pruned. + * + * Resets the pointers in 'rel' for unordered and cheapest paths. + * + * Returns the cheapest path. + * + */ +Path * +prune_rel_path(Rel *rel, Path *unorderedpath) +{ + Path *cheapest = set_cheapest(rel, rel->pathlist); + + /* don't prune if not pruneable -- JMH, 11/23/92 */ + if(unorderedpath != cheapest + && rel->pruneable) { + + rel->unorderedpath = (Path *)NULL; + rel->pathlist = lremove(unorderedpath, rel->pathlist); + } else { + rel->unorderedpath = (Path *)unorderedpath; + } + + return(cheapest); +} + +/* + * merge-joinrels-- + * Given two lists of rel nodes that are already + * pruned, merge them into one pruned rel node list + * + * 'rel-list1' and + * 'rel-list2' are the rel node lists + * + * Returns one pruned rel node list + */ +List * +merge_joinrels(List *rel_list1, List *rel_list2) +{ + List *xrel = NIL; + + foreach(xrel,rel_list1) { + Rel *rel = (Rel*)lfirst(xrel); + rel_list2 = prune_joinrel(rel,rel_list2); + } + return(append(rel_list1, rel_list2)); +} + +/* + * prune_oldrels-- + * If all the joininfo's in a rel node are inactive, + * that means that this node has been joined into + * other nodes in all possible ways, therefore + * this node can be discarded. If not, it will cause + * extra complexity of the optimizer. + * + * old_rels is a list of rel nodes + * + * Returns a new list of rel nodes + */ +List *prune_oldrels(List *old_rels) +{ + Rel *rel; + List *joininfo_list, *xjoininfo; + + if(old_rels == NIL) + return(NIL); + + rel = (Rel*)lfirst(old_rels); + joininfo_list = rel->joininfo; + if(joininfo_list == NIL) + return (lcons(rel, prune_oldrels(lnext(old_rels)))); + + foreach(xjoininfo, joininfo_list) { + JInfo *joininfo = (JInfo*)lfirst(xjoininfo); + if(!joininfo->inactive) + return (lcons(rel, prune_oldrels(lnext(old_rels)))); + } + return(prune_oldrels(lnext(old_rels))); +} diff --git a/src/backend/optimizer/path/xfunc.c b/src/backend/optimizer/path/xfunc.c new file mode 100644 index 00000000000..405b3b77f00 --- /dev/null +++ b/src/backend/optimizer/path/xfunc.c @@ -0,0 +1,1360 @@ +/*------------------------------------------------------------------------- + * + * xfunc.c-- + * Utility routines to handle expensive function optimization. + * Includes xfunc_trypullup(), which attempts early pullup of predicates + * to allow for maximal pruning. + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/Attic/xfunc.c,v 1.1.1.1 1996/07/09 06:21:36 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#ifndef WIN32 +#include <math.h> /* for MAXFLOAT on most systems */ +#else +#include <float.h> +#define MAXFLOAT DBL_MAX +#endif /* WIN32 */ + +#include <values.h> /* for MAXFLOAT on SunOS */ +#include <string.h> + +#include "postgres.h" +#include "nodes/pg_list.h" +#include "nodes/nodes.h" +#include "nodes/primnodes.h" +#include "nodes/relation.h" +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/syscache.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_type.h" +#include "utils/syscache.h" +#include "catalog/pg_language.h" +#include "planner/xfunc.h" +#include "planner/clauses.h" +#include "planner/pathnode.h" +#include "planner/internal.h" +#include "planner/cost.h" +#include "planner/keys.h" +#include "planner/tlist.h" +#include "lib/lispsort.h" +#include "access/heapam.h" +#include "tcop/dest.h" +#include "storage/buf_internals.h" /* for NBuffers */ +#include "optimizer/tlist.h" /* for get_expr */ + +#define ever ; 1 ; + +/* local funcs */ +static int xfunc_card_unreferenced(Query *queryInfo, + Expr *clause, Relid referenced); */ + +/* +** xfunc_trypullup -- +** Preliminary pullup of predicates, to allow for maximal pruning. +** Given a relation, check each of its paths and see if you can +** pullup clauses from its inner and outer. +*/ + +void xfunc_trypullup(Rel rel) +{ + LispValue y; /* list ptr */ + CInfo maxcinfo; /* The CInfo to pull up, as calculated by + xfunc_shouldpull() */ + JoinPath curpath; /* current path in list */ + int progress; /* has progress been made this time through? */ + int clausetype; + + do { + progress = false; /* no progress yet in this iteration */ + foreach(y, get_pathlist(rel)) { + curpath = (JoinPath)lfirst(y); + + /* + ** for each operand, attempt to pullup predicates until first + ** failure. + */ + for(ever) { + /* No, the following should NOT be '==' !! */ + if (clausetype = + xfunc_shouldpull((Path)get_innerjoinpath(curpath), + curpath, INNER, &maxcinfo)) { + + xfunc_pullup((Path)get_innerjoinpath(curpath), + curpath, maxcinfo, INNER, clausetype); + progress = true; + }else + break; + } + for(ever) { + + /* No, the following should NOT be '==' !! */ + if (clausetype = + xfunc_shouldpull((Path)get_outerjoinpath(curpath), + curpath, OUTER, &maxcinfo)) { + + xfunc_pullup((Path)get_outerjoinpath(curpath), + curpath, maxcinfo, OUTER, clausetype); + progress = true; + }else + break; + } + + /* + ** make sure the unpruneable flag bubbles up, i.e. + ** if anywhere below us in the path pruneable is false, + ** then pruneable should be false here + */ + if (get_pruneable(get_parent(curpath)) && + (!get_pruneable(get_parent + ((Path)get_innerjoinpath(curpath))) || + !get_pruneable(get_parent((Path) + get_outerjoinpath(curpath))))) { + + set_pruneable(get_parent(curpath),false); + progress = true; + } + } + } while(progress); +} + +/* + ** xfunc_shouldpull -- + ** find clause with highest rank, and decide whether to pull it up + ** from child to parent. Currently we only pullup secondary join clauses + ** that are in the pathclauseinfo. Secondary hash and sort clauses are + ** left where they are. + ** If we find an expensive function but decide *not* to pull it up, + ** we'd better set the unpruneable flag. -- JMH, 11/11/92 + ** + ** Returns: 0 if nothing left to pullup + ** XFUNC_LOCPRD if a local predicate is to be pulled up + ** XFUNC_JOINPRD if a secondary join predicate is to be pulled up + */ +int xfunc_shouldpull(Query* queryInfo, + Path childpath, + JoinPath parentpath, + int whichchild, + CInfo *maxcinfopt) /* Out: pointer to clause to pullup */ +{ + LispValue clauselist, tmplist; /* lists of clauses */ + CInfo maxcinfo; /* clause to pullup */ + LispValue primjoinclause /* primary join clause */ + = xfunc_primary_join(parentpath); + Cost tmprank, maxrank = (-1 * MAXFLOAT); /* ranks of clauses */ + Cost joinselec = 0; /* selectivity of the join predicate */ + Cost joincost = 0; /* join cost + primjoinclause cost */ + int retval = XFUNC_LOCPRD; + + clauselist = get_locclauseinfo(childpath); + + if (clauselist != LispNil) { + /* find local predicate with maximum rank */ + for (tmplist = clauselist, + maxcinfo = (CInfo) lfirst(tmplist), + maxrank = xfunc_rank(get_clause(maxcinfo)); + tmplist != LispNil; + tmplist = lnext(tmplist)) { + + if ((tmprank = xfunc_rank(get_clause((CInfo)lfirst(tmplist)))) + > maxrank) { + maxcinfo = (CInfo) lfirst(tmplist); + maxrank = tmprank; + } + } + } + + /* + ** If child is a join path, and there are multiple join clauses, + ** see if any join clause has even higher rank than the highest + ** local predicate + */ + if (is_join(childpath) && xfunc_num_join_clauses((JoinPath)childpath) > 1) + for (tmplist = get_pathclauseinfo((JoinPath)childpath); + tmplist != LispNil; + tmplist = lnext(tmplist)) { + + if (tmplist != LispNil && + (tmprank = xfunc_rank(get_clause((CInfo) lfirst(tmplist)))) + > maxrank) { + maxcinfo = (CInfo) lfirst(tmplist); + maxrank = tmprank; + retval = XFUNC_JOINPRD; + } + } + if (maxrank == (-1 * MAXFLOAT)) /* no expensive clauses */ + return(0); + + /* + ** Pullup over join if clause is higher rank than join, or if + ** join is nested loop and current path is inner child (note that + ** restrictions on the inner of a nested loop don't buy you anything -- + ** you still have to scan the entire inner relation each time). + ** Note that the cost of a secondary join clause is only what's + ** calculated by xfunc_expense(), since the actual joining + ** (i.e. the usual path_cost) is paid for by the primary join clause. + */ + if (primjoinclause != LispNil) { + joinselec = compute_clause_selec(queryInfo, primjoinclause, LispNil); + joincost = xfunc_join_expense(parentpath, whichchild); + + if (XfuncMode == XFUNC_PULLALL || + (XfuncMode != XFUNC_WAIT && + ((joincost != 0 && + (maxrank = xfunc_rank(get_clause(maxcinfo))) > + ((joinselec - 1.0) / joincost)) + || (joincost == 0 && joinselec < 1) + || (!is_join(childpath) + && (whichchild == INNER) + && IsA(parentpath,JoinPath) + && !IsA(parentpath,HashPath) + && !IsA(parentpath,MergePath))))) { + + *maxcinfopt = maxcinfo; + return(retval); + + }else if (maxrank != -(MAXFLOAT)) { + /* + ** we've left an expensive restriction below a join. Since + ** we may pullup this restriction in predmig.c, we'd best + ** set the Rel of this join to be unpruneable + */ + set_pruneable(get_parent(parentpath), false); + /* and fall through */ + } + } + return(0); +} + + +/* + ** xfunc_pullup -- + ** move clause from child pathnode to parent pathnode. This operation + ** makes the child pathnode produce a larger relation than it used to. + ** This means that we must construct a new Rel just for the childpath, + ** although this Rel will not be added to the list of Rels to be joined up + ** in the query; it's merely a parent for the new childpath. + ** We also have to fix up the path costs of the child and parent. + ** + ** Now returns a pointer to the new pulled-up CInfo. -- JMH, 11/18/92 + */ +CInfo xfunc_pullup(Query* queryInfo, + Path childpath, + JoinPath parentpath, + CInfo cinfo, /* clause to pull up */ + int whichchild,/* whether child is INNER or OUTER of join */ + int clausetype)/* whether clause to pull is join or local */ +{ + Path newkid; + Rel newrel; + Cost pulled_selec; + Cost cost; + CInfo newinfo; + + /* remove clause from childpath */ + newkid = (Path)copyObject((Node)childpath); + if (clausetype == XFUNC_LOCPRD) { + set_locclauseinfo(newkid, + xfunc_LispRemove((LispValue)cinfo, + (List)get_locclauseinfo(newkid))); + }else { + set_pathclauseinfo + ((JoinPath)newkid, + xfunc_LispRemove((LispValue)cinfo, + (List)get_pathclauseinfo((JoinPath)newkid))); + } + + /* + ** give the new child path its own Rel node that reflects the + ** lack of the pulled-up predicate + */ + pulled_selec = compute_clause_selec(queryInfo, + get_clause(cinfo), LispNil); + xfunc_copyrel(get_parent(newkid), &newrel); + set_parent(newkid, newrel); + set_pathlist(newrel, lcons(newkid, NIL)); + set_unorderedpath(newrel, (PathPtr)newkid); + set_cheapestpath(newrel, (PathPtr)newkid); + set_size(newrel, + (Count)((Cost)get_size(get_parent(childpath)) / pulled_selec)); + + /* + ** fix up path cost of newkid. To do this we subtract away all the + ** xfunc_costs of childpath, then recompute the xfunc_costs of newkid + */ + cost = get_path_cost(newkid) - xfunc_get_path_cost(childpath); + Assert(cost >= 0); + set_path_cost(newkid, cost); + cost = get_path_cost(newkid) + xfunc_get_path_cost(newkid); + set_path_cost(newkid, cost); + + /* + ** We copy the cinfo, since it may appear in other plans, and we're going + ** to munge it. -- JMH, 7/22/92 + */ + newinfo = (CInfo)copyObject((Node)cinfo); + + /* + ** Fix all vars in the clause + ** to point to the right varno and varattno in parentpath + */ + xfunc_fixvars(get_clause(newinfo), newrel, whichchild); + + /* add clause to parentpath, and fix up its cost. */ + set_locclauseinfo(parentpath, + lispCons((LispValue)newinfo, + (LispValue)get_locclauseinfo(parentpath))); + /* put new childpath into the path tree */ + if (whichchild == INNER) { + set_innerjoinpath(parentpath, (pathPtr)newkid); + }else { + set_outerjoinpath(parentpath, (pathPtr)newkid); + } + + /* + ** recompute parentpath cost from scratch -- the cost + ** of the join method has changed + */ + cost = xfunc_total_path_cost(parentpath); + set_path_cost(parentpath, cost); + + return(newinfo); +} + +/* + ** calculate (selectivity-1)/cost. + */ +Cost xfunc_rank(Query *queryInfo,LispValue clause) +{ + Cost selec = compute_clause_selec(queryInfo, clause, LispNil); + Cost cost = xfunc_expense(queryInfo,clause); + + if (cost == 0) + if (selec > 1) return(MAXFLOAT); + else return(-(MAXFLOAT)); + return((selec - 1)/cost); +} + +/* + ** Find the "global" expense of a clause; i.e. the local expense divided + ** by the cardinalities of all the base relations of the query that are *not* + ** referenced in the clause. + */ +Cost xfunc_expense(Query* queryInfo, clause) + LispValue clause; +{ + Cost cost = xfunc_local_expense(clause); + + if (cost) + { + Count card = xfunc_card_unreferenced(queryInfo, clause, LispNil); + if (card) + cost /= card; + } + + return(cost); +} + +/* + ** xfunc_join_expense -- + ** Find global expense of a join clause + */ +Cost xfunc_join_expense(Query *queryInfo, JoinPath path, int whichchild) +{ + LispValue primjoinclause = xfunc_primary_join(path); + + /* + ** the second argument to xfunc_card_unreferenced reflects all the + ** relations involved in the join clause, i.e. all the relids in the Rel + ** of the join clause + */ + Count card = 0; + Cost cost = xfunc_expense_per_tuple(path, whichchild); + + card = xfunc_card_unreferenced(queryInfo, + primjoinclause, + get_relids(get_parent(path))); + if (primjoinclause) + cost += xfunc_local_expense(primjoinclause); + + if (card) cost /= card; + + return(cost); +} + +/* + ** Recursively find the per-tuple expense of a clause. See + ** xfunc_func_expense for more discussion. + */ +Cost xfunc_local_expense(LispValue clause) +{ + Cost cost = 0; /* running expense */ + LispValue tmpclause; + + /* First handle the base case */ + if (IsA(clause,Const) || IsA(clause,Var) || IsA(clause,Param)) + return(0); + /* now other stuff */ + else if (IsA(clause,Iter)) + /* Too low. Should multiply by the expected number of iterations. */ + return(xfunc_local_expense(get_iterexpr((Iter)clause))); + else if (IsA(clause,ArrayRef)) + return(xfunc_local_expense(get_refexpr((ArrayRef)clause))); + else if (fast_is_clause(clause)) + return(xfunc_func_expense((LispValue)get_op(clause), + (LispValue)get_opargs(clause))); + else if (fast_is_funcclause(clause)) + return(xfunc_func_expense((LispValue)get_function(clause), + (LispValue)get_funcargs(clause))); + else if (fast_not_clause(clause)) + return(xfunc_local_expense(lsecond(clause))); + else if (fast_or_clause(clause)) { + /* find cost of evaluating each disjunct */ + for (tmpclause = lnext(clause); tmpclause != LispNil; + tmpclause = lnext(tmpclause)) + cost += xfunc_local_expense(lfirst(tmpclause)); + return(cost); + }else { + elog(WARN, "Clause node of undetermined type"); + return(-1); + } +} + +/* + ** xfunc_func_expense -- + ** given a Func or Oper and its args, find its expense. + ** Note: in Stonebraker's SIGMOD '91 paper, he uses a more complicated metric + ** than the one here. We can ignore the expected number of tuples for + ** our calculations; we just need the per-tuple expense. But he also + ** proposes components to take into account the costs of accessing disk and + ** archive. We didn't adopt that scheme here; eventually the vacuum + ** cleaner should be able to tell us what percentage of bytes to find on + ** which storage level, and that should be multiplied in appropriately + ** in the cost function below. Right now we don't model the cost of + ** accessing secondary or tertiary storage, since we don't have sufficient + ** stats to do it right. + */ +Cost xfunc_func_expense(LispValue node, LispValue args) +{ + HeapTuple tupl; /* the pg_proc tuple for each function */ + Form_pg_proc proc; /* a data structure to hold the pg_proc tuple */ + int width = 0; /* byte width of the field referenced by each clause */ + RegProcedure funcid; /* ID of function associate with node */ + Cost cost = 0; /* running expense */ + LispValue tmpclause; + LispValue operand; /* one operand of an operator */ + + if (IsA(node,Oper)) { + /* don't trust the opid in the Oper node. Use the opno. */ + if (!(funcid = get_opcode(get_opno((Oper)node)))) + elog(WARN, "Oper's function is undefined"); + }else { + funcid = get_funcid((Func)node); + } + + /* look up tuple in cache */ + tupl = SearchSysCacheTuple(PROOID, ObjectIdGetDatum(funcid),0,0,0); + if (!HeapTupleIsValid(tupl)) + elog(WARN, "Cache lookup failed for procedure %d", funcid); + proc = (Form_pg_proc) GETSTRUCT(tupl); + + /* + ** if it's a Postquel function, its cost is stored in the + ** associated plan. + */ + if (proc->prolang == SQLlanguageId) { + LispValue tmpplan; + List planlist; + + if (IsA(node,Oper) || get_func_planlist((Func)node) == LispNil) { + Oid *argOidVect; /* vector of argtypes */ + char *pq_src; /* text of PQ function */ + int nargs; /* num args to PQ function */ + QueryTreeList *queryTree_list; /* dummy variable */ + + /* + ** plan the function, storing it in the Func node for later + ** use by the executor. + */ + pq_src = (char *) textout(&(proc->prosrc)); + nargs = proc->pronargs; + if (nargs > 0) + argOidVect = proc->proargtypes; + planlist = (List)pg_plan(pq_src, argOidVect, nargs, + &parseTree_list, None); + if (IsA(node,Func)) + set_func_planlist((Func)node, planlist); + + }else {/* plan has been cached inside the Func node already */ + planlist = get_func_planlist((Func)node); + } + + /* + ** Return the sum of the costs of the plans (the PQ function + ** may have many queries in its body). + */ + foreach(tmpplan, planlist) + cost += get_cost((Plan)lfirst(tmpplan)); + return(cost); + }else { /* it's a C function */ + /* + ** find the cost of evaluating the function's arguments + ** and the width of the operands + */ + for (tmpclause = args; tmpclause != LispNil; + tmpclause = lnext(tmpclause)) { + + if ((operand = lfirst(tmpclause)) != LispNil) { + cost += xfunc_local_expense(operand); + width += xfunc_width(operand); + } + } + + /* + ** when stats become available, add in cost of accessing secondary + ** and tertiary storage here. + */ + return(cost + + (Cost)proc->propercall_cpu + + (Cost)proc->properbyte_cpu * (Cost)proc->probyte_pct/100.00 * + (Cost)width + /* + * Pct_of_obj_in_mem + DISK_COST * proc->probyte_pct/100.00 * width + * Pct_of_obj_on_disk + + ARCH_COST * proc->probyte_pct/100.00 * width + * Pct_of_obj_on_arch + */ + ); + } +} + +/* + ** xfunc_width -- + ** recursively find the width of a expression + */ + +int xfunc_width(LispValue clause) +{ + Relation rd; /* Relation Descriptor */ + HeapTuple tupl; /* structure to hold a cached tuple */ + TypeTupleForm type; /* structure to hold a type tuple */ + int retval = 0; + + if (IsA(clause,Const)) { + /* base case: width is the width of this constant */ + retval = get_constlen((Const) clause); + goto exit; + }else if (IsA(clause,ArrayRef)) { + /* base case: width is width of the refelem within the array */ + retval = get_refelemlength((ArrayRef)clause); + goto exit; + }else if (IsA(clause,Var)) { + /* base case: width is width of this attribute */ + tupl = SearchSysCacheTuple(TYPOID, + PointerGetDatum(get_vartype((Var)clause)), + 0,0,0); + if (!HeapTupleIsValid(tupl)) + elog(WARN, "Cache lookup failed for type %d", + get_vartype((Var)clause)); + type = (TypeTupleForm) GETSTRUCT(tupl); + if (get_varattno((Var)clause) == 0) { + /* clause is a tuple. Get its width */ + rd = heap_open(type->typrelid); + retval = xfunc_tuple_width(rd); + heap_close(rd); + }else { + /* attribute is a base type */ + retval = type->typlen; + } + goto exit; + }else if (IsA(clause,Param)) { + if (typeid_get_relid(get_paramtype((Param)clause))) { + /* Param node returns a tuple. Find its width */ + rd = heap_open(typeid_get_relid(get_paramtype((Param)clause))); + retval = xfunc_tuple_width(rd); + heap_close(rd); + }else if (get_param_tlist((Param)clause) != LispNil) { + /* Param node projects a complex type */ + Assert(length(get_param_tlist((Param)clause)) == 1); /* sanity */ + retval = + xfunc_width((LispValue) + get_expr(lfirst(get_param_tlist((Param)clause)))); + }else { + /* Param node returns a base type */ + retval = tlen(get_id_type(get_paramtype((Param)clause))); + } + goto exit; + }else if (IsA(clause,Iter)) { + /* + ** An Iter returns a setof things, so return the width of a single + ** thing. + ** Note: THIS MAY NOT WORK RIGHT WHEN AGGS GET FIXED, + ** SINCE AGG FUNCTIONS CHEW ON THE WHOLE SETOF THINGS!!!! + ** This whole Iter business is bogus, anyway. + */ + retval = xfunc_width(get_iterexpr((Iter)clause)); + goto exit; + }else if (fast_is_clause(clause)) { + /* + ** get function associated with this Oper, and treat this as + ** a Func + */ + tupl = SearchSysCacheTuple(OPROID, + ObjectIdGetDatum(get_opno((Oper)get_op(clause))), + 0,0,0); + if (!HeapTupleIsValid(tupl)) + elog(WARN, "Cache lookup failed for procedure %d", + get_opno((Oper)get_op(clause))); + return(xfunc_func_width + ((RegProcedure)(((OperatorTupleForm)(GETSTRUCT(tupl)))->oprcode), + (LispValue)get_opargs(clause))); + }else if (fast_is_funcclause(clause)) { + Func func = (Func)get_function(clause); + if (get_func_tlist(func) != LispNil) { + /* this function has a projection on it. Get the length + of the projected attribute */ + Assert(length(get_func_tlist(func)) == 1); /* sanity */ + retval = + xfunc_width((LispValue) + get_expr(lfirst(get_func_tlist(func)))); + goto exit; + }else { + return(xfunc_func_width((RegProcedure)get_funcid(func), + (LispValue)get_funcargs(clause))); + } + }else { + elog(WARN, "Clause node of undetermined type"); + return(-1); + } + + exit: + if (retval == -1) + retval = VARLEN_DEFAULT; + return(retval); +} + +/* + ** xfunc_card_unreferenced: + ** find all relations not referenced in clause, and multiply their + ** cardinalities. Ignore relation of cardinality 0. + ** User may pass in referenced list, if they know it (useful + ** for joins). + */ +static Count +xfunc_card_unreferenced(Query *queryInfo, + LispValue clause, Relid referenced) +{ + Relid unreferenced, allrelids = LispNil; + LispValue temp; + + /* find all relids of base relations referenced in query */ + foreach (temp,queryInfo->base_relation_list_) + { + Assert(lnext(get_relids((Rel)lfirst(temp))) == LispNil); + allrelids = lappend(allrelids, + lfirst(get_relids((Rel)lfirst(temp)))); + } + + /* find all relids referenced in query but not in clause */ + if (!referenced) + referenced = xfunc_find_references(clause); + unreferenced = set_difference(allrelids, referenced); + + return(xfunc_card_product(unreferenced)); +} + +/* + ** xfunc_card_product + ** multiple together cardinalities of a list relations. + */ +Count xfunc_card_product(Query *queryInfo, Relid relids) +{ + LispValue cinfonode; + LispValue temp; + Rel currel; + Cost tuples; + Count retval = 0; + + foreach(temp,relids) { + currel = get_rel(lfirst(temp)); + tuples = get_tuples(currel); + + if (tuples) { /* not of cardinality 0 */ + /* factor in the selectivity of all zero-cost clauses */ + foreach (cinfonode, get_clauseinfo(currel)) { + if (!xfunc_expense(queryInfo,get_clause((CInfo)lfirst(cinfonode)))) + tuples *= + compute_clause_selec(queryInfo, + get_clause((CInfo)lfirst(cinfonode)), + LispNil); + } + + if (retval == 0) retval = tuples; + else retval *= tuples; + } + } + if (retval == 0) retval = 1; /* saves caller from dividing by zero */ + return(retval); +} + + +/* + ** xfunc_find_references: + ** Traverse a clause and find all relids referenced in the clause. + */ +List xfunc_find_references(LispValue clause) +{ + List retval = (List)LispNil; + LispValue tmpclause; + + /* Base cases */ + if (IsA(clause,Var)) + return(lispCons(lfirst(get_varid((Var)clause)), LispNil)); + else if (IsA(clause,Const) || IsA(clause,Param)) + return((List)LispNil); + + /* recursion */ + else if (IsA(clause,Iter)) + /* Too low. Should multiply by the expected number of iterations. maybe */ + return(xfunc_find_references(get_iterexpr((Iter)clause))); + else if (IsA(clause,ArrayRef)) + return(xfunc_find_references(get_refexpr((ArrayRef)clause))); + else if (fast_is_clause(clause)) { + /* string together result of all operands of Oper */ + for (tmpclause = (LispValue)get_opargs(clause); tmpclause != LispNil; + tmpclause = lnext(tmpclause)) + retval = nconc(retval, xfunc_find_references(lfirst(tmpclause))); + return(retval); + }else if (fast_is_funcclause(clause)) { + /* string together result of all args of Func */ + for (tmpclause = (LispValue)get_funcargs(clause); + tmpclause != LispNil; + tmpclause = lnext(tmpclause)) + retval = nconc(retval, xfunc_find_references(lfirst(tmpclause))); + return(retval); + }else if (fast_not_clause(clause)) + return(xfunc_find_references(lsecond(clause))); + else if (fast_or_clause(clause)) { + /* string together result of all operands of OR */ + for (tmpclause = lnext(clause); tmpclause != LispNil; + tmpclause = lnext(tmpclause)) + retval = nconc(retval, xfunc_find_references(lfirst(tmpclause))); + return(retval); + }else { + elog(WARN, "Clause node of undetermined type"); + return((List)LispNil); + } +} + +/* + ** xfunc_primary_join: + ** Find the primary join clause: for Hash and Merge Joins, this is the + ** min rank Hash or Merge clause, while for Nested Loop it's the + ** min rank pathclause + */ +LispValue xfunc_primary_join(JoinPath pathnode) +{ + LispValue joinclauselist = get_pathclauseinfo(pathnode); + CInfo mincinfo; + LispValue tmplist; + LispValue minclause = LispNil; + Cost minrank, tmprank; + + if (IsA(pathnode,MergePath)) + { + for(tmplist = get_path_mergeclauses((MergePath)pathnode), + minclause = lfirst(tmplist), + minrank = xfunc_rank(minclause); + tmplist != LispNil; + tmplist = lnext(tmplist)) + if ((tmprank = xfunc_rank(lfirst(tmplist))) + < minrank) + { + minrank = tmprank; + minclause = lfirst(tmplist); + } + return(minclause); + } + else if (IsA(pathnode,HashPath)) + { + for(tmplist = get_path_hashclauses((HashPath)pathnode), + minclause = lfirst(tmplist), + minrank = xfunc_rank(minclause); + tmplist != LispNil; + tmplist = lnext(tmplist)) + if ((tmprank = xfunc_rank(lfirst(tmplist))) + < minrank) + { + minrank = tmprank; + minclause = lfirst(tmplist); + } + return(minclause); + } + + /* if we drop through, it's nested loop join */ + if (joinclauselist == LispNil) + return(LispNil); + + for(tmplist = joinclauselist, mincinfo = (CInfo) lfirst(joinclauselist), + minrank = xfunc_rank(get_clause((CInfo) lfirst(tmplist))); + tmplist != LispNil; + tmplist = lnext(tmplist)) + if ((tmprank = xfunc_rank(get_clause((CInfo) lfirst(tmplist)))) + < minrank) + { + minrank = tmprank; + mincinfo = (CInfo) lfirst(tmplist); + } + return((LispValue)get_clause(mincinfo)); +} + +/* + ** xfunc_get_path_cost + ** get the expensive function costs of the path + */ +Cost xfunc_get_path_cost(Query *queryInfo, Path pathnode) +{ + Cost cost = 0; + LispValue tmplist; + Cost selec = 1.0; + + /* + ** first add in the expensive local function costs. + ** We ensure that the clauses are sorted by rank, so that we + ** know (via selectivities) the number of tuples that will be checked + ** by each function. If we're not doing any optimization of expensive + ** functions, we don't sort. + */ + if (XfuncMode != XFUNC_OFF) + set_locclauseinfo(pathnode, lisp_qsort(get_locclauseinfo(pathnode), + xfunc_cinfo_compare)); + for(tmplist = get_locclauseinfo(pathnode), selec = 1.0; + tmplist != LispNil; + tmplist = lnext(tmplist)) + { + cost += (Cost)(xfunc_local_expense(get_clause((CInfo)lfirst(tmplist))) + * (Cost)get_tuples(get_parent(pathnode)) * selec); + selec *= compute_clause_selec(queryInfo, + get_clause((CInfo)lfirst(tmplist)), + LispNil); + } + + /* + ** Now add in any node-specific expensive function costs. + ** Again, we must ensure that the clauses are sorted by rank. + */ + if (IsA(pathnode,JoinPath)) + { + if (XfuncMode != XFUNC_OFF) + set_pathclauseinfo((JoinPath)pathnode, lisp_qsort + (get_pathclauseinfo((JoinPath)pathnode), + xfunc_cinfo_compare)); + for(tmplist = get_pathclauseinfo((JoinPath)pathnode), selec = 1.0; + tmplist != LispNil; + tmplist = lnext(tmplist)) + { + cost += (Cost)(xfunc_local_expense(get_clause((CInfo)lfirst(tmplist))) + * (Cost)get_tuples(get_parent(pathnode)) * selec); + selec *= compute_clause_selec(queryInfo, + get_clause((CInfo)lfirst(tmplist)), + LispNil); + } + } + if (IsA(pathnode,HashPath)) + { + if (XfuncMode != XFUNC_OFF) + set_path_hashclauses + ((HashPath)pathnode, + lisp_qsort(get_path_hashclauses((HashPath)pathnode), + xfunc_clause_compare)); + for(tmplist = get_path_hashclauses((HashPath)pathnode), selec = 1.0; + tmplist != LispNil; + tmplist = lnext(tmplist)) + { + cost += (Cost)(xfunc_local_expense(lfirst(tmplist)) + * (Cost)get_tuples(get_parent(pathnode)) * selec); + selec *= compute_clause_selec(queryInfo, + lfirst(tmplist), LispNil); + } + } + if (IsA(pathnode,MergePath)) + { + if (XfuncMode != XFUNC_OFF) + set_path_mergeclauses + ((MergePath)pathnode, + lisp_qsort(get_path_mergeclauses((MergePath)pathnode), + xfunc_clause_compare)); + for(tmplist = get_path_mergeclauses((MergePath)pathnode), selec = 1.0; + tmplist != LispNil; + tmplist = lnext(tmplist)) + { + cost += (Cost)(xfunc_local_expense(lfirst(tmplist)) + * (Cost)get_tuples(get_parent(pathnode)) * selec); + selec *= compute_clause_selec(queryInfo, + lfirst(tmplist), LispNil); + } + } + Assert(cost >= 0); + return(cost); +} + +/* + ** Recalculate the cost of a path node. This includes the basic cost of the + ** node, as well as the cost of its expensive functions. + ** We need to do this to the parent after pulling a clause from a child into a + ** parent. Thus we should only be calling this function on JoinPaths. + */ +Cost xfunc_total_path_cost(JoinPath pathnode) +{ + Cost cost = xfunc_get_path_cost((Path)pathnode); + + Assert(IsA(pathnode,JoinPath)); + if (IsA(pathnode,MergePath)) + { + MergePath mrgnode = (MergePath)pathnode; + cost += cost_mergesort(get_path_cost((Path)get_outerjoinpath(mrgnode)), + get_path_cost((Path)get_innerjoinpath(mrgnode)), + get_outersortkeys(mrgnode), + get_innersortkeys(mrgnode), + get_tuples(get_parent((Path)get_outerjoinpath + (mrgnode))), + get_tuples(get_parent((Path)get_innerjoinpath + (mrgnode))), + get_width(get_parent((Path)get_outerjoinpath + (mrgnode))), + get_width(get_parent((Path)get_innerjoinpath + (mrgnode)))); + Assert(cost >= 0); + return(cost); + } + else if (IsA(pathnode,HashPath)) + { + HashPath hashnode = (HashPath)pathnode; + cost += cost_hashjoin(get_path_cost((Path)get_outerjoinpath(hashnode)), + get_path_cost((Path)get_innerjoinpath(hashnode)), + get_outerhashkeys(hashnode), + get_innerhashkeys(hashnode), + get_tuples(get_parent((Path)get_outerjoinpath + (hashnode))), + get_tuples(get_parent((Path)get_innerjoinpath + (hashnode))), + get_width(get_parent((Path)get_outerjoinpath + (hashnode))), + get_width(get_parent((Path)get_innerjoinpath + (hashnode)))); + Assert (cost >= 0); + return(cost); + } + else /* Nested Loop Join */ + { + cost += cost_nestloop(get_path_cost((Path)get_outerjoinpath(pathnode)), + get_path_cost((Path)get_innerjoinpath(pathnode)), + get_tuples(get_parent((Path)get_outerjoinpath + (pathnode))), + get_tuples(get_parent((Path)get_innerjoinpath + (pathnode))), + get_pages(get_parent((Path)get_outerjoinpath + (pathnode))), + IsA(get_innerjoinpath(pathnode),IndexPath)); + Assert(cost >= 0); + return(cost); + } +} + + +/* + ** xfunc_expense_per_tuple -- + ** return the expense of the join *per-tuple* of the input relation. + ** The cost model here is that a join costs + ** k*card(outer)*card(inner) + l*card(outer) + m*card(inner) + n + ** + ** We treat the l and m terms by considering them to be like restrictions + ** constrained to be right under the join. Thus the cost per inner and + ** cost per outer of the join is different, reflecting these virtual nodes. + ** + ** The cost per tuple of outer is k + l/referenced(inner). Cost per tuple + ** of inner is k + m/referenced(outer). + ** The constants k, l, m and n depend on the join method. Measures here are + ** based on the costs in costsize.c, with fudging for HashJoin and Sorts to + ** make it fit our model (the 'q' in HashJoin results in a + ** card(outer)/card(inner) term, and sorting results in a log term. + + */ +Cost xfunc_expense_per_tuple(JoinPath joinnode, int whichchild) +{ + Rel outerrel = get_parent((Path)get_outerjoinpath(joinnode)); + Rel innerrel = get_parent((Path)get_innerjoinpath(joinnode)); + Count outerwidth = get_width(outerrel); + Count outers_per_page = ceil(BLCKSZ/(outerwidth + sizeof(HeapTupleData))); + + if (IsA(joinnode,HashPath)) + { + if (whichchild == INNER) + return((1 + _CPU_PAGE_WEIGHT_)*outers_per_page/NBuffers); + else + return(((1 + _CPU_PAGE_WEIGHT_)*outers_per_page/NBuffers) + + _CPU_PAGE_WEIGHT_ + / xfunc_card_product(get_relids(innerrel))); + } + else if (IsA(joinnode,MergePath)) + { + /* assumes sort exists, and costs one (I/O + CPU) per tuple */ + if (whichchild == INNER) + return((2*_CPU_PAGE_WEIGHT_ + 1) + / xfunc_card_product(get_relids(outerrel))); + else + return((2*_CPU_PAGE_WEIGHT_ + 1) + / xfunc_card_product(get_relids(innerrel))); + } + else /* nestloop */ + { + Assert(IsA(joinnode,JoinPath)); + return(_CPU_PAGE_WEIGHT_); + } +} + +/* + ** xfunc_fixvars -- + ** After pulling up a clause, we must walk its expression tree, fixing Var + ** nodes to point to the correct varno (either INNER or OUTER, depending + ** on which child the clause was pulled from), and the right varattno in the + ** target list of the child's former relation. If the target list of the + ** child Rel does not contain the attribute we need, we add it. + */ +void xfunc_fixvars(LispValue clause, /* clause being pulled up */ + Rel rel, /* rel it's being pulled from */ + int varno) /* whether rel is INNER or OUTER of join */ +{ + LispValue tmpclause; /* temporary variable */ + TargetEntry *tle; /* tlist member corresponding to var */ + + + if (IsA(clause,Const) || IsA(clause,Param)) return; + else if (IsA(clause,Var)) + { + /* here's the meat */ + tle = tlistentry_member((Var)clause, get_targetlist(rel)); + if (tle == LispNil) + { + /* + ** The attribute we need is not in the target list, + ** so we have to add it. + ** + */ + add_tl_element(rel, (Var)clause); + tle = tlistentry_member((Var)clause, get_targetlist(rel)); + } + set_varno(((Var)clause), varno); + set_varattno(((Var)clause), get_resno(get_resdom(get_entry(tle)))); + } + else if (IsA(clause,Iter)) + xfunc_fixvars(get_iterexpr((Iter)clause), rel, varno); + else if (fast_is_clause(clause)) + { + xfunc_fixvars(lfirst(lnext(clause)), rel, varno); + xfunc_fixvars(lfirst(lnext(lnext(clause))), rel, varno); + } + else if (fast_is_funcclause(clause)) + for (tmpclause = lnext(clause); tmpclause != LispNil; + tmpclause = lnext(tmpclause)) + xfunc_fixvars(lfirst(tmpclause), rel, varno); + else if (fast_not_clause(clause)) + xfunc_fixvars(lsecond(clause), rel, varno); + else if (fast_or_clause(clause)) + for (tmpclause = lnext(clause); tmpclause != LispNil; + tmpclause = lnext(tmpclause)) + xfunc_fixvars(lfirst(tmpclause), rel, varno); + else + { + elog(WARN, "Clause node of undetermined type"); + } +} + + +/* + ** Comparison function for lisp_qsort() on a list of CInfo's. + ** arg1 and arg2 should really be of type (CInfo *). + */ +int xfunc_cinfo_compare(void *arg1, void *arg2) +{ + CInfo info1 = *(CInfo *) arg1; + CInfo info2 = *(CInfo *) arg2; + + LispValue clause1 = (LispValue) get_clause(info1), + clause2 = (LispValue) get_clause(info2); + + return(xfunc_clause_compare((void *) &clause1, (void *) &clause2)); +} + +/* + ** xfunc_clause_compare: comparison function for lisp_qsort() that compares two + ** clauses based on expense/(1 - selectivity) + ** arg1 and arg2 are really pointers to clauses. + */ +int xfunc_clause_compare(void *arg1, void *arg2) +{ + LispValue clause1 = *(LispValue *) arg1; + LispValue clause2 = *(LispValue *) arg2; + Cost rank1, /* total xfunc rank of clause1 */ + rank2; /* total xfunc rank of clause2 */ + + rank1 = xfunc_rank(clause1); + rank2 = xfunc_rank(clause2); + + if ( rank1 < rank2) + return(-1); + else if (rank1 == rank2) + return(0); + else return(1); +} + +/* + ** xfunc_disjunct_sort -- + ** given a list of clauses, for each clause sort the disjuncts by cost + ** (this assumes the predicates have been converted to Conjunctive NF) + ** Modifies the clause list! + */ +void xfunc_disjunct_sort(LispValue clause_list) +{ + LispValue temp; + + foreach(temp, clause_list) + if(or_clause(lfirst(temp))) + lnext(lfirst(temp)) = + lisp_qsort(lnext(lfirst(temp)), xfunc_disjunct_compare); +} + + +/* + ** xfunc_disjunct_compare: comparison function for qsort() that compares two + ** disjuncts based on cost/selec. + ** arg1 and arg2 are really pointers to disjuncts + */ +int xfunc_disjunct_compare(Query* queryInfo, void *arg1, void *arg2) +{ + LispValue disjunct1 = *(LispValue *) arg1; + LispValue disjunct2 = *(LispValue *) arg2; + Cost cost1, /* total cost of disjunct1 */ + cost2, /* total cost of disjunct2 */ + selec1, + selec2; + Cost rank1, rank2; + + cost1 = xfunc_expense(queryInfo, disjunct1); + cost2 = xfunc_expense(queryInfo, disjunct2); + selec1 = compute_clause_selec(queryInfo, + disjunct1, LispNil); + selec2 = compute_clause_selec(queryInfo, + disjunct2, LispNil); + + if (selec1 == 0) + rank1 = MAXFLOAT; + else if (cost1 == 0) + rank1 = 0; + else + rank1 = cost1/selec1; + + if (selec2 == 0) + rank2 = MAXFLOAT; + else if (cost2 == 0) + rank2 = 0; + else + rank2 = cost2/selec2; + + if ( rank1 < rank2) + return(-1); + else if (rank1 == rank2) + return(0); + else return(1); +} + +/* ------------------------ UTILITY FUNCTIONS ------------------------------- */ +/* + ** xfunc_func_width -- + ** Given a function OID and operands, find the width of the return value. + */ +int xfunc_func_width(RegProcedure funcid, LispValue args) +{ + Relation rd; /* Relation Descriptor */ + HeapTuple tupl; /* structure to hold a cached tuple */ + Form_pg_proc proc; /* structure to hold the pg_proc tuple */ + TypeTupleForm type; /* structure to hold the pg_type tuple */ + LispValue tmpclause; + int retval; + + /* lookup function and find its return type */ + Assert(RegProcedureIsValid(funcid)); + tupl = SearchSysCacheTuple(PROOID, ObjectIdGetDatum(funcid), 0,0,0); + if (!HeapTupleIsValid(tupl)) + elog(WARN, "Cache lookup failed for procedure %d", funcid); + proc = (Form_pg_proc) GETSTRUCT(tupl); + + /* if function returns a tuple, get the width of that */ + if (typeid_get_relid(proc->prorettype)) + { + rd = heap_open(typeid_get_relid(proc->prorettype)); + retval = xfunc_tuple_width(rd); + heap_close(rd); + goto exit; + } + else /* function returns a base type */ + { + tupl = SearchSysCacheTuple(TYPOID, + ObjectIdGetDatum(proc->prorettype), + 0,0,0); + if (!HeapTupleIsValid(tupl)) + elog(WARN, "Cache lookup failed for type %d", proc->prorettype); + type = (TypeTupleForm) GETSTRUCT(tupl); + /* if the type length is known, return that */ + if (type->typlen != -1) + { + retval = type->typlen; + goto exit; + } + else /* estimate the return size */ + { + /* find width of the function's arguments */ + for (tmpclause = args; tmpclause != LispNil; + tmpclause = lnext(tmpclause)) + retval += xfunc_width(lfirst(tmpclause)); + /* multiply by outin_ratio */ + retval = (int)(proc->prooutin_ratio/100.0 * retval); + goto exit; + } + } + exit: + return(retval); +} + +/* + ** xfunc_tuple_width -- + ** Return the sum of the lengths of all the attributes of a given relation + */ +int xfunc_tuple_width(Relation rd) +{ + int i; + int retval = 0; + TupleDesc tdesc = RelationGetTupleDescriptor(rd); + + for (i = 0; i < tdesc->natts; i++) + { + if (tdesc->attrs[i]->attlen != -1) + retval += tdesc->attrs[i]->attlen; + else retval += VARLEN_DEFAULT; + } + + return(retval); +} + +/* + ** xfunc_num_join_clauses -- + ** Find the number of join clauses associated with this join path + */ +int xfunc_num_join_clauses(JoinPath path) +{ + int num = length(get_pathclauseinfo(path)); + if (IsA(path,MergePath)) + return(num + length(get_path_mergeclauses((MergePath)path))); + else if (IsA(path,HashPath)) + return(num + length(get_path_hashclauses((HashPath)path))); + else return(num); +} + +/* + ** xfunc_LispRemove -- + ** Just like LispRemove, but it whines if the item to be removed ain't there + */ +LispValue xfunc_LispRemove(LispValue foo, List bar) +{ + LispValue temp = LispNil; + LispValue result = LispNil; + int sanity = false; + + for (temp = bar; !null(temp); temp = lnext(temp)) + if (! equal((Node)(foo),(Node)(lfirst(temp))) ) + { + result = lappend(result,lfirst(temp)); + } + else sanity = true; /* found a matching item to remove! */ + + if (!sanity) + elog(WARN, "xfunc_LispRemove: didn't find a match!"); + + return(result); +} + +#define Node_Copy(a, b, c, d) \ + if (NodeCopy((Node)((a)->d), (Node*)&((b)->d), c) != true) { \ + return false; \ + } + +/* + ** xfunc_copyrel -- + ** Just like _copyRel, but doesn't copy the paths + */ +bool xfunc_copyrel(Rel from, Rel *to) +{ + Rel newnode; + Pointer (*alloc)() = palloc; + + /* COPY_CHECKARGS() */ + if (to == NULL) + { + return false; + } + + /* COPY_CHECKNULL() */ + if (from == NULL) + { + (*to) = NULL; + return true; + } + + /* COPY_NEW(c) */ + newnode = (Rel)(*alloc)(classSize(Rel)); + if (newnode == NULL) + { + return false; + } + + /* ---------------- + * copy node superclass fields + * ---------------- + */ + CopyNodeFields((Node)from, (Node)newnode, alloc); + + /* ---------------- + * copy remainder of node + * ---------------- + */ + Node_Copy(from, newnode, alloc, relids); + + newnode->indexed = from->indexed; + newnode->pages = from->pages; + newnode->tuples = from->tuples; + newnode->size = from->size; + newnode->width = from->width; + + Node_Copy(from, newnode, alloc, targetlist); + /* No!!!! Node_Copy(from, newnode, alloc, pathlist); + Node_Copy(from, newnode, alloc, unorderedpath); + Node_Copy(from, newnode, alloc, cheapestpath); */ +#if 0 /* can't use Node_copy now. 2/95 -ay */ + Node_Copy(from, newnode, alloc, classlist); + Node_Copy(from, newnode, alloc, indexkeys); + Node_Copy(from, newnode, alloc, ordering); +#endif + Node_Copy(from, newnode, alloc, clauseinfo); + Node_Copy(from, newnode, alloc, joininfo); + Node_Copy(from, newnode, alloc, innerjoin); + Node_Copy(from, newnode, alloc, superrels); + + (*to) = newnode; + return true; +} |