/*------------------------------------------------------------------------- * * xfunc.c-- * Utility routines to handle expensive function optimization. * Includes xfunc_trypullup(), which attempts early pullup of predicates * to allow for maximal pruning. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * $Header: /cvsroot/pgsql/src/backend/optimizer/path/Attic/xfunc.c,v 1.1.1.1 1996/07/09 06:21:36 scrappy Exp $ * *------------------------------------------------------------------------- */ #ifndef WIN32 #include /* for MAXFLOAT on most systems */ #else #include #define MAXFLOAT DBL_MAX #endif /* WIN32 */ #include /* for MAXFLOAT on SunOS */ #include #include "postgres.h" #include "nodes/pg_list.h" #include "nodes/nodes.h" #include "nodes/primnodes.h" #include "nodes/relation.h" #include "utils/elog.h" #include "utils/palloc.h" #include "utils/syscache.h" #include "catalog/pg_proc.h" #include "catalog/pg_type.h" #include "utils/syscache.h" #include "catalog/pg_language.h" #include "planner/xfunc.h" #include "planner/clauses.h" #include "planner/pathnode.h" #include "planner/internal.h" #include "planner/cost.h" #include "planner/keys.h" #include "planner/tlist.h" #include "lib/lispsort.h" #include "access/heapam.h" #include "tcop/dest.h" #include "storage/buf_internals.h" /* for NBuffers */ #include "optimizer/tlist.h" /* for get_expr */ #define ever ; 1 ; /* local funcs */ static int xfunc_card_unreferenced(Query *queryInfo, Expr *clause, Relid referenced); */ /* ** xfunc_trypullup -- ** Preliminary pullup of predicates, to allow for maximal pruning. ** Given a relation, check each of its paths and see if you can ** pullup clauses from its inner and outer. */ void xfunc_trypullup(Rel rel) { LispValue y; /* list ptr */ CInfo maxcinfo; /* The CInfo to pull up, as calculated by xfunc_shouldpull() */ JoinPath curpath; /* current path in list */ int progress; /* has progress been made this time through? */ int clausetype; do { progress = false; /* no progress yet in this iteration */ foreach(y, get_pathlist(rel)) { curpath = (JoinPath)lfirst(y); /* ** for each operand, attempt to pullup predicates until first ** failure. */ for(ever) { /* No, the following should NOT be '==' !! */ if (clausetype = xfunc_shouldpull((Path)get_innerjoinpath(curpath), curpath, INNER, &maxcinfo)) { xfunc_pullup((Path)get_innerjoinpath(curpath), curpath, maxcinfo, INNER, clausetype); progress = true; }else break; } for(ever) { /* No, the following should NOT be '==' !! */ if (clausetype = xfunc_shouldpull((Path)get_outerjoinpath(curpath), curpath, OUTER, &maxcinfo)) { xfunc_pullup((Path)get_outerjoinpath(curpath), curpath, maxcinfo, OUTER, clausetype); progress = true; }else break; } /* ** make sure the unpruneable flag bubbles up, i.e. ** if anywhere below us in the path pruneable is false, ** then pruneable should be false here */ if (get_pruneable(get_parent(curpath)) && (!get_pruneable(get_parent ((Path)get_innerjoinpath(curpath))) || !get_pruneable(get_parent((Path) get_outerjoinpath(curpath))))) { set_pruneable(get_parent(curpath),false); progress = true; } } } while(progress); } /* ** xfunc_shouldpull -- ** find clause with highest rank, and decide whether to pull it up ** from child to parent. Currently we only pullup secondary join clauses ** that are in the pathclauseinfo. Secondary hash and sort clauses are ** left where they are. ** If we find an expensive function but decide *not* to pull it up, ** we'd better set the unpruneable flag. -- JMH, 11/11/92 ** ** Returns: 0 if nothing left to pullup ** XFUNC_LOCPRD if a local predicate is to be pulled up ** XFUNC_JOINPRD if a secondary join predicate is to be pulled up */ int xfunc_shouldpull(Query* queryInfo, Path childpath, JoinPath parentpath, int whichchild, CInfo *maxcinfopt) /* Out: pointer to clause to pullup */ { LispValue clauselist, tmplist; /* lists of clauses */ CInfo maxcinfo; /* clause to pullup */ LispValue primjoinclause /* primary join clause */ = xfunc_primary_join(parentpath); Cost tmprank, maxrank = (-1 * MAXFLOAT); /* ranks of clauses */ Cost joinselec = 0; /* selectivity of the join predicate */ Cost joincost = 0; /* join cost + primjoinclause cost */ int retval = XFUNC_LOCPRD; clauselist = get_locclauseinfo(childpath); if (clauselist != LispNil) { /* find local predicate with maximum rank */ for (tmplist = clauselist, maxcinfo = (CInfo) lfirst(tmplist), maxrank = xfunc_rank(get_clause(maxcinfo)); tmplist != LispNil; tmplist = lnext(tmplist)) { if ((tmprank = xfunc_rank(get_clause((CInfo)lfirst(tmplist)))) > maxrank) { maxcinfo = (CInfo) lfirst(tmplist); maxrank = tmprank; } } } /* ** If child is a join path, and there are multiple join clauses, ** see if any join clause has even higher rank than the highest ** local predicate */ if (is_join(childpath) && xfunc_num_join_clauses((JoinPath)childpath) > 1) for (tmplist = get_pathclauseinfo((JoinPath)childpath); tmplist != LispNil; tmplist = lnext(tmplist)) { if (tmplist != LispNil && (tmprank = xfunc_rank(get_clause((CInfo) lfirst(tmplist)))) > maxrank) { maxcinfo = (CInfo) lfirst(tmplist); maxrank = tmprank; retval = XFUNC_JOINPRD; } } if (maxrank == (-1 * MAXFLOAT)) /* no expensive clauses */ return(0); /* ** Pullup over join if clause is higher rank than join, or if ** join is nested loop and current path is inner child (note that ** restrictions on the inner of a nested loop don't buy you anything -- ** you still have to scan the entire inner relation each time). ** Note that the cost of a secondary join clause is only what's ** calculated by xfunc_expense(), since the actual joining ** (i.e. the usual path_cost) is paid for by the primary join clause. */ if (primjoinclause != LispNil) { joinselec = compute_clause_selec(queryInfo, primjoinclause, LispNil); joincost = xfunc_join_expense(parentpath, whichchild); if (XfuncMode == XFUNC_PULLALL || (XfuncMode != XFUNC_WAIT && ((joincost != 0 && (maxrank = xfunc_rank(get_clause(maxcinfo))) > ((joinselec - 1.0) / joincost)) || (joincost == 0 && joinselec < 1) || (!is_join(childpath) && (whichchild == INNER) && IsA(parentpath,JoinPath) && !IsA(parentpath,HashPath) && !IsA(parentpath,MergePath))))) { *maxcinfopt = maxcinfo; return(retval); }else if (maxrank != -(MAXFLOAT)) { /* ** we've left an expensive restriction below a join. Since ** we may pullup this restriction in predmig.c, we'd best ** set the Rel of this join to be unpruneable */ set_pruneable(get_parent(parentpath), false); /* and fall through */ } } return(0); } /* ** xfunc_pullup -- ** move clause from child pathnode to parent pathnode. This operation ** makes the child pathnode produce a larger relation than it used to. ** This means that we must construct a new Rel just for the childpath, ** although this Rel will not be added to the list of Rels to be joined up ** in the query; it's merely a parent for the new childpath. ** We also have to fix up the path costs of the child and parent. ** ** Now returns a pointer to the new pulled-up CInfo. -- JMH, 11/18/92 */ CInfo xfunc_pullup(Query* queryInfo, Path childpath, JoinPath parentpath, CInfo cinfo, /* clause to pull up */ int whichchild,/* whether child is INNER or OUTER of join */ int clausetype)/* whether clause to pull is join or local */ { Path newkid; Rel newrel; Cost pulled_selec; Cost cost; CInfo newinfo; /* remove clause from childpath */ newkid = (Path)copyObject((Node)childpath); if (clausetype == XFUNC_LOCPRD) { set_locclauseinfo(newkid, xfunc_LispRemove((LispValue)cinfo, (List)get_locclauseinfo(newkid))); }else { set_pathclauseinfo ((JoinPath)newkid, xfunc_LispRemove((LispValue)cinfo, (List)get_pathclauseinfo((JoinPath)newkid))); } /* ** give the new child path its own Rel node that reflects the ** lack of the pulled-up predicate */ pulled_selec = compute_clause_selec(queryInfo, get_clause(cinfo), LispNil); xfunc_copyrel(get_parent(newkid), &newrel); set_parent(newkid, newrel); set_pathlist(newrel, lcons(newkid, NIL)); set_unorderedpath(newrel, (PathPtr)newkid); set_cheapestpath(newrel, (PathPtr)newkid); set_size(newrel, (Count)((Cost)get_size(get_parent(childpath)) / pulled_selec)); /* ** fix up path cost of newkid. To do this we subtract away all the ** xfunc_costs of childpath, then recompute the xfunc_costs of newkid */ cost = get_path_cost(newkid) - xfunc_get_path_cost(childpath); Assert(cost >= 0); set_path_cost(newkid, cost); cost = get_path_cost(newkid) + xfunc_get_path_cost(newkid); set_path_cost(newkid, cost); /* ** We copy the cinfo, since it may appear in other plans, and we're going ** to munge it. -- JMH, 7/22/92 */ newinfo = (CInfo)copyObject((Node)cinfo); /* ** Fix all vars in the clause ** to point to the right varno and varattno in parentpath */ xfunc_fixvars(get_clause(newinfo), newrel, whichchild); /* add clause to parentpath, and fix up its cost. */ set_locclauseinfo(parentpath, lispCons((LispValue)newinfo, (LispValue)get_locclauseinfo(parentpath))); /* put new childpath into the path tree */ if (whichchild == INNER) { set_innerjoinpath(parentpath, (pathPtr)newkid); }else { set_outerjoinpath(parentpath, (pathPtr)newkid); } /* ** recompute parentpath cost from scratch -- the cost ** of the join method has changed */ cost = xfunc_total_path_cost(parentpath); set_path_cost(parentpath, cost); return(newinfo); } /* ** calculate (selectivity-1)/cost. */ Cost xfunc_rank(Query *queryInfo,LispValue clause) { Cost selec = compute_clause_selec(queryInfo, clause, LispNil); Cost cost = xfunc_expense(queryInfo,clause); if (cost == 0) if (selec > 1) return(MAXFLOAT); else return(-(MAXFLOAT)); return((selec - 1)/cost); } /* ** Find the "global" expense of a clause; i.e. the local expense divided ** by the cardinalities of all the base relations of the query that are *not* ** referenced in the clause. */ Cost xfunc_expense(Query* queryInfo, clause) LispValue clause; { Cost cost = xfunc_local_expense(clause); if (cost) { Count card = xfunc_card_unreferenced(queryInfo, clause, LispNil); if (card) cost /= card; } return(cost); } /* ** xfunc_join_expense -- ** Find global expense of a join clause */ Cost xfunc_join_expense(Query *queryInfo, JoinPath path, int whichchild) { LispValue primjoinclause = xfunc_primary_join(path); /* ** the second argument to xfunc_card_unreferenced reflects all the ** relations involved in the join clause, i.e. all the relids in the Rel ** of the join clause */ Count card = 0; Cost cost = xfunc_expense_per_tuple(path, whichchild); card = xfunc_card_unreferenced(queryInfo, primjoinclause, get_relids(get_parent(path))); if (primjoinclause) cost += xfunc_local_expense(primjoinclause); if (card) cost /= card; return(cost); } /* ** Recursively find the per-tuple expense of a clause. See ** xfunc_func_expense for more discussion. */ Cost xfunc_local_expense(LispValue clause) { Cost cost = 0; /* running expense */ LispValue tmpclause; /* First handle the base case */ if (IsA(clause,Const) || IsA(clause,Var) || IsA(clause,Param)) return(0); /* now other stuff */ else if (IsA(clause,Iter)) /* Too low. Should multiply by the expected number of iterations. */ return(xfunc_local_expense(get_iterexpr((Iter)clause))); else if (IsA(clause,ArrayRef)) return(xfunc_local_expense(get_refexpr((ArrayRef)clause))); else if (fast_is_clause(clause)) return(xfunc_func_expense((LispValue)get_op(clause), (LispValue)get_opargs(clause))); else if (fast_is_funcclause(clause)) return(xfunc_func_expense((LispValue)get_function(clause), (LispValue)get_funcargs(clause))); else if (fast_not_clause(clause)) return(xfunc_local_expense(lsecond(clause))); else if (fast_or_clause(clause)) { /* find cost of evaluating each disjunct */ for (tmpclause = lnext(clause); tmpclause != LispNil; tmpclause = lnext(tmpclause)) cost += xfunc_local_expense(lfirst(tmpclause)); return(cost); }else { elog(WARN, "Clause node of undetermined type"); return(-1); } } /* ** xfunc_func_expense -- ** given a Func or Oper and its args, find its expense. ** Note: in Stonebraker's SIGMOD '91 paper, he uses a more complicated metric ** than the one here. We can ignore the expected number of tuples for ** our calculations; we just need the per-tuple expense. But he also ** proposes components to take into account the costs of accessing disk and ** archive. We didn't adopt that scheme here; eventually the vacuum ** cleaner should be able to tell us what percentage of bytes to find on ** which storage level, and that should be multiplied in appropriately ** in the cost function below. Right now we don't model the cost of ** accessing secondary or tertiary storage, since we don't have sufficient ** stats to do it right. */ Cost xfunc_func_expense(LispValue node, LispValue args) { HeapTuple tupl; /* the pg_proc tuple for each function */ Form_pg_proc proc; /* a data structure to hold the pg_proc tuple */ int width = 0; /* byte width of the field referenced by each clause */ RegProcedure funcid; /* ID of function associate with node */ Cost cost = 0; /* running expense */ LispValue tmpclause; LispValue operand; /* one operand of an operator */ if (IsA(node,Oper)) { /* don't trust the opid in the Oper node. Use the opno. */ if (!(funcid = get_opcode(get_opno((Oper)node)))) elog(WARN, "Oper's function is undefined"); }else { funcid = get_funcid((Func)node); } /* look up tuple in cache */ tupl = SearchSysCacheTuple(PROOID, ObjectIdGetDatum(funcid),0,0,0); if (!HeapTupleIsValid(tupl)) elog(WARN, "Cache lookup failed for procedure %d", funcid); proc = (Form_pg_proc) GETSTRUCT(tupl); /* ** if it's a Postquel function, its cost is stored in the ** associated plan. */ if (proc->prolang == SQLlanguageId) { LispValue tmpplan; List planlist; if (IsA(node,Oper) || get_func_planlist((Func)node) == LispNil) { Oid *argOidVect; /* vector of argtypes */ char *pq_src; /* text of PQ function */ int nargs; /* num args to PQ function */ QueryTreeList *queryTree_list; /* dummy variable */ /* ** plan the function, storing it in the Func node for later ** use by the executor. */ pq_src = (char *) textout(&(proc->prosrc)); nargs = proc->pronargs; if (nargs > 0) argOidVect = proc->proargtypes; planlist = (List)pg_plan(pq_src, argOidVect, nargs, &parseTree_list, None); if (IsA(node,Func)) set_func_planlist((Func)node, planlist); }else {/* plan has been cached inside the Func node already */ planlist = get_func_planlist((Func)node); } /* ** Return the sum of the costs of the plans (the PQ function ** may have many queries in its body). */ foreach(tmpplan, planlist) cost += get_cost((Plan)lfirst(tmpplan)); return(cost); }else { /* it's a C function */ /* ** find the cost of evaluating the function's arguments ** and the width of the operands */ for (tmpclause = args; tmpclause != LispNil; tmpclause = lnext(tmpclause)) { if ((operand = lfirst(tmpclause)) != LispNil) { cost += xfunc_local_expense(operand); width += xfunc_width(operand); } } /* ** when stats become available, add in cost of accessing secondary ** and tertiary storage here. */ return(cost + (Cost)proc->propercall_cpu + (Cost)proc->properbyte_cpu * (Cost)proc->probyte_pct/100.00 * (Cost)width /* * Pct_of_obj_in_mem DISK_COST * proc->probyte_pct/100.00 * width * Pct_of_obj_on_disk + ARCH_COST * proc->probyte_pct/100.00 * width * Pct_of_obj_on_arch */ ); } } /* ** xfunc_width -- ** recursively find the width of a expression */ int xfunc_width(LispValue clause) { Relation rd; /* Relation Descriptor */ HeapTuple tupl; /* structure to hold a cached tuple */ TypeTupleForm type; /* structure to hold a type tuple */ int retval = 0; if (IsA(clause,Const)) { /* base case: width is the width of this constant */ retval = get_constlen((Const) clause); goto exit; }else if (IsA(clause,ArrayRef)) { /* base case: width is width of the refelem within the array */ retval = get_refelemlength((ArrayRef)clause); goto exit; }else if (IsA(clause,Var)) { /* base case: width is width of this attribute */ tupl = SearchSysCacheTuple(TYPOID, PointerGetDatum(get_vartype((Var)clause)), 0,0,0); if (!HeapTupleIsValid(tupl)) elog(WARN, "Cache lookup failed for type %d", get_vartype((Var)clause)); type = (TypeTupleForm) GETSTRUCT(tupl); if (get_varattno((Var)clause) == 0) { /* clause is a tuple. Get its width */ rd = heap_open(type->typrelid); retval = xfunc_tuple_width(rd); heap_close(rd); }else { /* attribute is a base type */ retval = type->typlen; } goto exit; }else if (IsA(clause,Param)) { if (typeid_get_relid(get_paramtype((Param)clause))) { /* Param node returns a tuple. Find its width */ rd = heap_open(typeid_get_relid(get_paramtype((Param)clause))); retval = xfunc_tuple_width(rd); heap_close(rd); }else if (get_param_tlist((Param)clause) != LispNil) { /* Param node projects a complex type */ Assert(length(get_param_tlist((Param)clause)) == 1); /* sanity */ retval = xfunc_width((LispValue) get_expr(lfirst(get_param_tlist((Param)clause)))); }else { /* Param node returns a base type */ retval = tlen(get_id_type(get_paramtype((Param)clause))); } goto exit; }else if (IsA(clause,Iter)) { /* ** An Iter returns a setof things, so return the width of a single ** thing. ** Note: THIS MAY NOT WORK RIGHT WHEN AGGS GET FIXED, ** SINCE AGG FUNCTIONS CHEW ON THE WHOLE SETOF THINGS!!!! ** This whole Iter business is bogus, anyway. */ retval = xfunc_width(get_iterexpr((Iter)clause)); goto exit; }else if (fast_is_clause(clause)) { /* ** get function associated with this Oper, and treat this as ** a Func */ tupl = SearchSysCacheTuple(OPROID, ObjectIdGetDatum(get_opno((Oper)get_op(clause))), 0,0,0); if (!HeapTupleIsValid(tupl)) elog(WARN, "Cache lookup failed for procedure %d", get_opno((Oper)get_op(clause))); return(xfunc_func_width ((RegProcedure)(((OperatorTupleForm)(GETSTRUCT(tupl)))->oprcode), (LispValue)get_opargs(clause))); }else if (fast_is_funcclause(clause)) { Func func = (Func)get_function(clause); if (get_func_tlist(func) != LispNil) { /* this function has a projection on it. Get the length of the projected attribute */ Assert(length(get_func_tlist(func)) == 1); /* sanity */ retval = xfunc_width((LispValue) get_expr(lfirst(get_func_tlist(func)))); goto exit; }else { return(xfunc_func_width((RegProcedure)get_funcid(func), (LispValue)get_funcargs(clause))); } }else { elog(WARN, "Clause node of undetermined type"); return(-1); } exit: if (retval == -1) retval = VARLEN_DEFAULT; return(retval); } /* ** xfunc_card_unreferenced: ** find all relations not referenced in clause, and multiply their ** cardinalities. Ignore relation of cardinality 0. ** User may pass in referenced list, if they know it (useful ** for joins). */ static Count xfunc_card_unreferenced(Query *queryInfo, LispValue clause, Relid referenced) { Relid unreferenced, allrelids = LispNil; LispValue temp; /* find all relids of base relations referenced in query */ foreach (temp,queryInfo->base_relation_list_) { Assert(lnext(get_relids((Rel)lfirst(temp))) == LispNil); allrelids = lappend(allrelids, lfirst(get_relids((Rel)lfirst(temp)))); } /* find all relids referenced in query but not in clause */ if (!referenced) referenced = xfunc_find_references(clause); unreferenced = set_difference(allrelids, referenced); return(xfunc_card_product(unreferenced)); } /* ** xfunc_card_product ** multiple together cardinalities of a list relations. */ Count xfunc_card_product(Query *queryInfo, Relid relids) { LispValue cinfonode; LispValue temp; Rel currel; Cost tuples; Count retval = 0; foreach(temp,relids) { currel = get_rel(lfirst(temp)); tuples = get_tuples(currel); if (tuples) { /* not of cardinality 0 */ /* factor in the selectivity of all zero-cost clauses */ foreach (cinfonode, get_clauseinfo(currel)) { if (!xfunc_expense(queryInfo,get_clause((CInfo)lfirst(cinfonode)))) tuples *= compute_clause_selec(queryInfo, get_clause((CInfo)lfirst(cinfonode)), LispNil); } if (retval == 0) retval = tuples; else retval *= tuples; } } if (retval == 0) retval = 1; /* saves caller from dividing by zero */ return(retval); } /* ** xfunc_find_references: ** Traverse a clause and find all relids referenced in the clause. */ List xfunc_find_references(LispValue clause) { List retval = (List)LispNil; LispValue tmpclause; /* Base cases */ if (IsA(clause,Var)) return(lispCons(lfirst(get_varid((Var)clause)), LispNil)); else if (IsA(clause,Const) || IsA(clause,Param)) return((List)LispNil); /* recursion */ else if (IsA(clause,Iter)) /* Too low. Should multiply by the expected number of iterations. maybe */ return(xfunc_find_references(get_iterexpr((Iter)clause))); else if (IsA(clause,ArrayRef)) return(xfunc_find_references(get_refexpr((ArrayRef)clause))); else if (fast_is_clause(clause)) { /* string together result of all operands of Oper */ for (tmpclause = (LispValue)get_opargs(clause); tmpclause != LispNil; tmpclause = lnext(tmpclause)) retval = nconc(retval, xfunc_find_references(lfirst(tmpclause))); return(retval); }else if (fast_is_funcclause(clause)) { /* string together result of all args of Func */ for (tmpclause = (LispValue)get_funcargs(clause); tmpclause != LispNil; tmpclause = lnext(tmpclause)) retval = nconc(retval, xfunc_find_references(lfirst(tmpclause))); return(retval); }else if (fast_not_clause(clause)) return(xfunc_find_references(lsecond(clause))); else if (fast_or_clause(clause)) { /* string together result of all operands of OR */ for (tmpclause = lnext(clause); tmpclause != LispNil; tmpclause = lnext(tmpclause)) retval = nconc(retval, xfunc_find_references(lfirst(tmpclause))); return(retval); }else { elog(WARN, "Clause node of undetermined type"); return((List)LispNil); } } /* ** xfunc_primary_join: ** Find the primary join clause: for Hash and Merge Joins, this is the ** min rank Hash or Merge clause, while for Nested Loop it's the ** min rank pathclause */ LispValue xfunc_primary_join(JoinPath pathnode) { LispValue joinclauselist = get_pathclauseinfo(pathnode); CInfo mincinfo; LispValue tmplist; LispValue minclause = LispNil; Cost minrank, tmprank; if (IsA(pathnode,MergePath)) { for(tmplist = get_path_mergeclauses((MergePath)pathnode), minclause = lfirst(tmplist), minrank = xfunc_rank(minclause); tmplist != LispNil; tmplist = lnext(tmplist)) if ((tmprank = xfunc_rank(lfirst(tmplist))) < minrank) { minrank = tmprank; minclause = lfirst(tmplist); } return(minclause); } else if (IsA(pathnode,HashPath)) { for(tmplist = get_path_hashclauses((HashPath)pathnode), minclause = lfirst(tmplist), minrank = xfunc_rank(minclause); tmplist != LispNil; tmplist = lnext(tmplist)) if ((tmprank = xfunc_rank(lfirst(tmplist))) < minrank) { minrank = tmprank; minclause = lfirst(tmplist); } return(minclause); } /* if we drop through, it's nested loop join */ if (joinclauselist == LispNil) return(LispNil); for(tmplist = joinclauselist, mincinfo = (CInfo) lfirst(joinclauselist), minrank = xfunc_rank(get_clause((CInfo) lfirst(tmplist))); tmplist != LispNil; tmplist = lnext(tmplist)) if ((tmprank = xfunc_rank(get_clause((CInfo) lfirst(tmplist)))) < minrank) { minrank = tmprank; mincinfo = (CInfo) lfirst(tmplist); } return((LispValue)get_clause(mincinfo)); } /* ** xfunc_get_path_cost ** get the expensive function costs of the path */ Cost xfunc_get_path_cost(Query *queryInfo, Path pathnode) { Cost cost = 0; LispValue tmplist; Cost selec = 1.0; /* ** first add in the expensive local function costs. ** We ensure that the clauses are sorted by rank, so that we ** know (via selectivities) the number of tuples that will be checked ** by each function. If we're not doing any optimization of expensive ** functions, we don't sort. */ if (XfuncMode != XFUNC_OFF) set_locclauseinfo(pathnode, lisp_qsort(get_locclauseinfo(pathnode), xfunc_cinfo_compare)); for(tmplist = get_locclauseinfo(pathnode), selec = 1.0; tmplist != LispNil; tmplist = lnext(tmplist)) { cost += (Cost)(xfunc_local_expense(get_clause((CInfo)lfirst(tmplist))) * (Cost)get_tuples(get_parent(pathnode)) * selec); selec *= compute_clause_selec(queryInfo, get_clause((CInfo)lfirst(tmplist)), LispNil); } /* ** Now add in any node-specific expensive function costs. ** Again, we must ensure that the clauses are sorted by rank. */ if (IsA(pathnode,JoinPath)) { if (XfuncMode != XFUNC_OFF) set_pathclauseinfo((JoinPath)pathnode, lisp_qsort (get_pathclauseinfo((JoinPath)pathnode), xfunc_cinfo_compare)); for(tmplist = get_pathclauseinfo((JoinPath)pathnode), selec = 1.0; tmplist != LispNil; tmplist = lnext(tmplist)) { cost += (Cost)(xfunc_local_expense(get_clause((CInfo)lfirst(tmplist))) * (Cost)get_tuples(get_parent(pathnode)) * selec); selec *= compute_clause_selec(queryInfo, get_clause((CInfo)lfirst(tmplist)), LispNil); } } if (IsA(pathnode,HashPath)) { if (XfuncMode != XFUNC_OFF) set_path_hashclauses ((HashPath)pathnode, lisp_qsort(get_path_hashclauses((HashPath)pathnode), xfunc_clause_compare)); for(tmplist = get_path_hashclauses((HashPath)pathnode), selec = 1.0; tmplist != LispNil; tmplist = lnext(tmplist)) { cost += (Cost)(xfunc_local_expense(lfirst(tmplist)) * (Cost)get_tuples(get_parent(pathnode)) * selec); selec *= compute_clause_selec(queryInfo, lfirst(tmplist), LispNil); } } if (IsA(pathnode,MergePath)) { if (XfuncMode != XFUNC_OFF) set_path_mergeclauses ((MergePath)pathnode, lisp_qsort(get_path_mergeclauses((MergePath)pathnode), xfunc_clause_compare)); for(tmplist = get_path_mergeclauses((MergePath)pathnode), selec = 1.0; tmplist != LispNil; tmplist = lnext(tmplist)) { cost += (Cost)(xfunc_local_expense(lfirst(tmplist)) * (Cost)get_tuples(get_parent(pathnode)) * selec); selec *= compute_clause_selec(queryInfo, lfirst(tmplist), LispNil); } } Assert(cost >= 0); return(cost); } /* ** Recalculate the cost of a path node. This includes the basic cost of the ** node, as well as the cost of its expensive functions. ** We need to do this to the parent after pulling a clause from a child into a ** parent. Thus we should only be calling this function on JoinPaths. */ Cost xfunc_total_path_cost(JoinPath pathnode) { Cost cost = xfunc_get_path_cost((Path)pathnode); Assert(IsA(pathnode,JoinPath)); if (IsA(pathnode,MergePath)) { MergePath mrgnode = (MergePath)pathnode; cost += cost_mergesort(get_path_cost((Path)get_outerjoinpath(mrgnode)), get_path_cost((Path)get_innerjoinpath(mrgnode)), get_outersortkeys(mrgnode), get_innersortkeys(mrgnode), get_tuples(get_parent((Path)get_outerjoinpath (mrgnode))), get_tuples(get_parent((Path)get_innerjoinpath (mrgnode))), get_width(get_parent((Path)get_outerjoinpath (mrgnode))), get_width(get_parent((Path)get_innerjoinpath (mrgnode)))); Assert(cost >= 0); return(cost); } else if (IsA(pathnode,HashPath)) { HashPath hashnode = (HashPath)pathnode; cost += cost_hashjoin(get_path_cost((Path)get_outerjoinpath(hashnode)), get_path_cost((Path)get_innerjoinpath(hashnode)), get_outerhashkeys(hashnode), get_innerhashkeys(hashnode), get_tuples(get_parent((Path)get_outerjoinpath (hashnode))), get_tuples(get_parent((Path)get_innerjoinpath (hashnode))), get_width(get_parent((Path)get_outerjoinpath (hashnode))), get_width(get_parent((Path)get_innerjoinpath (hashnode)))); Assert (cost >= 0); return(cost); } else /* Nested Loop Join */ { cost += cost_nestloop(get_path_cost((Path)get_outerjoinpath(pathnode)), get_path_cost((Path)get_innerjoinpath(pathnode)), get_tuples(get_parent((Path)get_outerjoinpath (pathnode))), get_tuples(get_parent((Path)get_innerjoinpath (pathnode))), get_pages(get_parent((Path)get_outerjoinpath (pathnode))), IsA(get_innerjoinpath(pathnode),IndexPath)); Assert(cost >= 0); return(cost); } } /* ** xfunc_expense_per_tuple -- ** return the expense of the join *per-tuple* of the input relation. ** The cost model here is that a join costs ** k*card(outer)*card(inner) + l*card(outer) + m*card(inner) + n ** ** We treat the l and m terms by considering them to be like restrictions ** constrained to be right under the join. Thus the cost per inner and ** cost per outer of the join is different, reflecting these virtual nodes. ** ** The cost per tuple of outer is k + l/referenced(inner). Cost per tuple ** of inner is k + m/referenced(outer). ** The constants k, l, m and n depend on the join method. Measures here are ** based on the costs in costsize.c, with fudging for HashJoin and Sorts to ** make it fit our model (the 'q' in HashJoin results in a ** card(outer)/card(inner) term, and sorting results in a log term. */ Cost xfunc_expense_per_tuple(JoinPath joinnode, int whichchild) { Rel outerrel = get_parent((Path)get_outerjoinpath(joinnode)); Rel innerrel = get_parent((Path)get_innerjoinpath(joinnode)); Count outerwidth = get_width(outerrel); Count outers_per_page = ceil(BLCKSZ/(outerwidth + sizeof(HeapTupleData))); if (IsA(joinnode,HashPath)) { if (whichchild == INNER) return((1 + _CPU_PAGE_WEIGHT_)*outers_per_page/NBuffers); else return(((1 + _CPU_PAGE_WEIGHT_)*outers_per_page/NBuffers) + _CPU_PAGE_WEIGHT_ / xfunc_card_product(get_relids(innerrel))); } else if (IsA(joinnode,MergePath)) { /* assumes sort exists, and costs one (I/O + CPU) per tuple */ if (whichchild == INNER) return((2*_CPU_PAGE_WEIGHT_ + 1) / xfunc_card_product(get_relids(outerrel))); else return((2*_CPU_PAGE_WEIGHT_ + 1) / xfunc_card_product(get_relids(innerrel))); } else /* nestloop */ { Assert(IsA(joinnode,JoinPath)); return(_CPU_PAGE_WEIGHT_); } } /* ** xfunc_fixvars -- ** After pulling up a clause, we must walk its expression tree, fixing Var ** nodes to point to the correct varno (either INNER or OUTER, depending ** on which child the clause was pulled from), and the right varattno in the ** target list of the child's former relation. If the target list of the ** child Rel does not contain the attribute we need, we add it. */ void xfunc_fixvars(LispValue clause, /* clause being pulled up */ Rel rel, /* rel it's being pulled from */ int varno) /* whether rel is INNER or OUTER of join */ { LispValue tmpclause; /* temporary variable */ TargetEntry *tle; /* tlist member corresponding to var */ if (IsA(clause,Const) || IsA(clause,Param)) return; else if (IsA(clause,Var)) { /* here's the meat */ tle = tlistentry_member((Var)clause, get_targetlist(rel)); if (tle == LispNil) { /* ** The attribute we need is not in the target list, ** so we have to add it. ** */ add_tl_element(rel, (Var)clause); tle = tlistentry_member((Var)clause, get_targetlist(rel)); } set_varno(((Var)clause), varno); set_varattno(((Var)clause), get_resno(get_resdom(get_entry(tle)))); } else if (IsA(clause,Iter)) xfunc_fixvars(get_iterexpr((Iter)clause), rel, varno); else if (fast_is_clause(clause)) { xfunc_fixvars(lfirst(lnext(clause)), rel, varno); xfunc_fixvars(lfirst(lnext(lnext(clause))), rel, varno); } else if (fast_is_funcclause(clause)) for (tmpclause = lnext(clause); tmpclause != LispNil; tmpclause = lnext(tmpclause)) xfunc_fixvars(lfirst(tmpclause), rel, varno); else if (fast_not_clause(clause)) xfunc_fixvars(lsecond(clause), rel, varno); else if (fast_or_clause(clause)) for (tmpclause = lnext(clause); tmpclause != LispNil; tmpclause = lnext(tmpclause)) xfunc_fixvars(lfirst(tmpclause), rel, varno); else { elog(WARN, "Clause node of undetermined type"); } } /* ** Comparison function for lisp_qsort() on a list of CInfo's. ** arg1 and arg2 should really be of type (CInfo *). */ int xfunc_cinfo_compare(void *arg1, void *arg2) { CInfo info1 = *(CInfo *) arg1; CInfo info2 = *(CInfo *) arg2; LispValue clause1 = (LispValue) get_clause(info1), clause2 = (LispValue) get_clause(info2); return(xfunc_clause_compare((void *) &clause1, (void *) &clause2)); } /* ** xfunc_clause_compare: comparison function for lisp_qsort() that compares two ** clauses based on expense/(1 - selectivity) ** arg1 and arg2 are really pointers to clauses. */ int xfunc_clause_compare(void *arg1, void *arg2) { LispValue clause1 = *(LispValue *) arg1; LispValue clause2 = *(LispValue *) arg2; Cost rank1, /* total xfunc rank of clause1 */ rank2; /* total xfunc rank of clause2 */ rank1 = xfunc_rank(clause1); rank2 = xfunc_rank(clause2); if ( rank1 < rank2) return(-1); else if (rank1 == rank2) return(0); else return(1); } /* ** xfunc_disjunct_sort -- ** given a list of clauses, for each clause sort the disjuncts by cost ** (this assumes the predicates have been converted to Conjunctive NF) ** Modifies the clause list! */ void xfunc_disjunct_sort(LispValue clause_list) { LispValue temp; foreach(temp, clause_list) if(or_clause(lfirst(temp))) lnext(lfirst(temp)) = lisp_qsort(lnext(lfirst(temp)), xfunc_disjunct_compare); } /* ** xfunc_disjunct_compare: comparison function for qsort() that compares two ** disjuncts based on cost/selec. ** arg1 and arg2 are really pointers to disjuncts */ int xfunc_disjunct_compare(Query* queryInfo, void *arg1, void *arg2) { LispValue disjunct1 = *(LispValue *) arg1; LispValue disjunct2 = *(LispValue *) arg2; Cost cost1, /* total cost of disjunct1 */ cost2, /* total cost of disjunct2 */ selec1, selec2; Cost rank1, rank2; cost1 = xfunc_expense(queryInfo, disjunct1); cost2 = xfunc_expense(queryInfo, disjunct2); selec1 = compute_clause_selec(queryInfo, disjunct1, LispNil); selec2 = compute_clause_selec(queryInfo, disjunct2, LispNil); if (selec1 == 0) rank1 = MAXFLOAT; else if (cost1 == 0) rank1 = 0; else rank1 = cost1/selec1; if (selec2 == 0) rank2 = MAXFLOAT; else if (cost2 == 0) rank2 = 0; else rank2 = cost2/selec2; if ( rank1 < rank2) return(-1); else if (rank1 == rank2) return(0); else return(1); } /* ------------------------ UTILITY FUNCTIONS ------------------------------- */ /* ** xfunc_func_width -- ** Given a function OID and operands, find the width of the return value. */ int xfunc_func_width(RegProcedure funcid, LispValue args) { Relation rd; /* Relation Descriptor */ HeapTuple tupl; /* structure to hold a cached tuple */ Form_pg_proc proc; /* structure to hold the pg_proc tuple */ TypeTupleForm type; /* structure to hold the pg_type tuple */ LispValue tmpclause; int retval; /* lookup function and find its return type */ Assert(RegProcedureIsValid(funcid)); tupl = SearchSysCacheTuple(PROOID, ObjectIdGetDatum(funcid), 0,0,0); if (!HeapTupleIsValid(tupl)) elog(WARN, "Cache lookup failed for procedure %d", funcid); proc = (Form_pg_proc) GETSTRUCT(tupl); /* if function returns a tuple, get the width of that */ if (typeid_get_relid(proc->prorettype)) { rd = heap_open(typeid_get_relid(proc->prorettype)); retval = xfunc_tuple_width(rd); heap_close(rd); goto exit; } else /* function returns a base type */ { tupl = SearchSysCacheTuple(TYPOID, ObjectIdGetDatum(proc->prorettype), 0,0,0); if (!HeapTupleIsValid(tupl)) elog(WARN, "Cache lookup failed for type %d", proc->prorettype); type = (TypeTupleForm) GETSTRUCT(tupl); /* if the type length is known, return that */ if (type->typlen != -1) { retval = type->typlen; goto exit; } else /* estimate the return size */ { /* find width of the function's arguments */ for (tmpclause = args; tmpclause != LispNil; tmpclause = lnext(tmpclause)) retval += xfunc_width(lfirst(tmpclause)); /* multiply by outin_ratio */ retval = (int)(proc->prooutin_ratio/100.0 * retval); goto exit; } } exit: return(retval); } /* ** xfunc_tuple_width -- ** Return the sum of the lengths of all the attributes of a given relation */ int xfunc_tuple_width(Relation rd) { int i; int retval = 0; TupleDesc tdesc = RelationGetTupleDescriptor(rd); for (i = 0; i < tdesc->natts; i++) { if (tdesc->attrs[i]->attlen != -1) retval += tdesc->attrs[i]->attlen; else retval += VARLEN_DEFAULT; } return(retval); } /* ** xfunc_num_join_clauses -- ** Find the number of join clauses associated with this join path */ int xfunc_num_join_clauses(JoinPath path) { int num = length(get_pathclauseinfo(path)); if (IsA(path,MergePath)) return(num + length(get_path_mergeclauses((MergePath)path))); else if (IsA(path,HashPath)) return(num + length(get_path_hashclauses((HashPath)path))); else return(num); } /* ** xfunc_LispRemove -- ** Just like LispRemove, but it whines if the item to be removed ain't there */ LispValue xfunc_LispRemove(LispValue foo, List bar) { LispValue temp = LispNil; LispValue result = LispNil; int sanity = false; for (temp = bar; !null(temp); temp = lnext(temp)) if (! equal((Node)(foo),(Node)(lfirst(temp))) ) { result = lappend(result,lfirst(temp)); } else sanity = true; /* found a matching item to remove! */ if (!sanity) elog(WARN, "xfunc_LispRemove: didn't find a match!"); return(result); } #define Node_Copy(a, b, c, d) \ if (NodeCopy((Node)((a)->d), (Node*)&((b)->d), c) != true) { \ return false; \ } /* ** xfunc_copyrel -- ** Just like _copyRel, but doesn't copy the paths */ bool xfunc_copyrel(Rel from, Rel *to) { Rel newnode; Pointer (*alloc)() = palloc; /* COPY_CHECKARGS() */ if (to == NULL) { return false; } /* COPY_CHECKNULL() */ if (from == NULL) { (*to) = NULL; return true; } /* COPY_NEW(c) */ newnode = (Rel)(*alloc)(classSize(Rel)); if (newnode == NULL) { return false; } /* ---------------- * copy node superclass fields * ---------------- */ CopyNodeFields((Node)from, (Node)newnode, alloc); /* ---------------- * copy remainder of node * ---------------- */ Node_Copy(from, newnode, alloc, relids); newnode->indexed = from->indexed; newnode->pages = from->pages; newnode->tuples = from->tuples; newnode->size = from->size; newnode->width = from->width; Node_Copy(from, newnode, alloc, targetlist); /* No!!!! Node_Copy(from, newnode, alloc, pathlist); Node_Copy(from, newnode, alloc, unorderedpath); Node_Copy(from, newnode, alloc, cheapestpath); */ #if 0 /* can't use Node_copy now. 2/95 -ay */ Node_Copy(from, newnode, alloc, classlist); Node_Copy(from, newnode, alloc, indexkeys); Node_Copy(from, newnode, alloc, ordering); #endif Node_Copy(from, newnode, alloc, clauseinfo); Node_Copy(from, newnode, alloc, joininfo); Node_Copy(from, newnode, alloc, innerjoin); Node_Copy(from, newnode, alloc, superrels); (*to) = newnode; return true; }