aboutsummaryrefslogtreecommitdiff
path: root/src/backend/optimizer/geqo/geqo_eval.c
blob: 1642f34b4087a3a5e7ab356a37798aaacb33f8eb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
/*------------------------------------------------------------------------
 *
 * geqo_eval.c
 *	  Routines to evaluate query trees
 *
 * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * $PostgreSQL: pgsql/src/backend/optimizer/geqo/geqo_eval.c,v 1.68 2004/05/26 04:41:20 neilc Exp $
 *
 *-------------------------------------------------------------------------
 */

/* contributed by:
   =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
   *  Martin Utesch				 * Institute of Automatic Control	   *
   =							 = University of Mining and Technology =
   *  utesch@aut.tu-freiberg.de  * Freiberg, Germany				   *
   =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=
 */

#include "postgres.h"

#include <float.h>
#include <limits.h>
#include <math.h>

#include "optimizer/geqo.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "utils/memutils.h"


static bool desirable_join(Query *root,
						   RelOptInfo *outer_rel, RelOptInfo *inner_rel);


/*
 * geqo_eval
 *
 * Returns cost of a query tree as an individual of the population.
 */
Cost
geqo_eval(Gene *tour, int num_gene, GeqoEvalData *evaldata)
{
	MemoryContext mycontext;
	MemoryContext oldcxt;
	RelOptInfo *joinrel;
	Cost		fitness;
	List	   *savelist;

	/*
	 * Because gimme_tree considers both left- and right-sided trees,
	 * there is no difference between a tour (a,b,c,d,...) and a tour
	 * (b,a,c,d,...) --- the same join orders will be considered. To avoid
	 * redundant cost calculations, we simply reject tours where tour[0] >
	 * tour[1], assigning them an artificially bad fitness.
	 *
	 * init_tour() is aware of this rule and so we should never reject a
	 * tour during the initial filling of the pool.  It seems difficult to
	 * persuade the recombination logic never to break the rule, however.
	 */
	if (num_gene >= 2 && tour[0] > tour[1])
		return DBL_MAX;

	/*
	 * Create a private memory context that will hold all temp storage
	 * allocated inside gimme_tree().
	 *
	 * Since geqo_eval() will be called many times, we can't afford to let
	 * all that memory go unreclaimed until end of statement.  Note we
	 * make the temp context a child of the planner's normal context, so
	 * that it will be freed even if we abort via ereport(ERROR).
	 */
	mycontext = AllocSetContextCreate(CurrentMemoryContext,
									  "GEQO",
									  ALLOCSET_DEFAULT_MINSIZE,
									  ALLOCSET_DEFAULT_INITSIZE,
									  ALLOCSET_DEFAULT_MAXSIZE);
	oldcxt = MemoryContextSwitchTo(mycontext);

	/*
	 * preserve root->join_rel_list, which gimme_tree changes; without
	 * this, it'll be pointing at recycled storage after the
	 * MemoryContextDelete below.
	 */
	savelist = evaldata->root->join_rel_list;

	/* construct the best path for the given combination of relations */
	joinrel = gimme_tree(tour, num_gene, evaldata);

	/*
	 * compute fitness
	 *
	 * XXX geqo does not currently support optimization for partial result
	 * retrieval --- how to fix?
	 */
	if (joinrel)
		fitness = joinrel->cheapest_total_path->total_cost;
	else
		fitness = DBL_MAX;

	/* restore join_rel_list */
	evaldata->root->join_rel_list = savelist;

	/* release all the memory acquired within gimme_tree */
	MemoryContextSwitchTo(oldcxt);
	MemoryContextDelete(mycontext);

	return fitness;
}

/*
 * gimme_tree
 *	  Form planner estimates for a join tree constructed in the specified
 *	  order.
 *
 *	 'tour' is the proposed join order, of length 'num_gene'
 *	 'evaldata' contains the context we need
 *
 * Returns a new join relation whose cheapest path is the best plan for
 * this join order.  NB: will return NULL if join order is invalid.
 *
 * The original implementation of this routine always joined in the specified
 * order, and so could only build left-sided plans (and right-sided and
 * mixtures, as a byproduct of the fact that make_join_rel() is symmetric).
 * It could never produce a "bushy" plan.  This had a couple of big problems,
 * of which the worst was that as of 7.4, there are situations involving IN
 * subqueries where the only valid plans are bushy.
 *
 * The present implementation takes the given tour as a guideline, but
 * postpones joins that seem unsuitable according to some heuristic rules.
 * This allows correct bushy plans to be generated at need, and as a nice
 * side-effect it seems to materially improve the quality of the generated
 * plans.
 */
RelOptInfo *
gimme_tree(Gene *tour, int num_gene, GeqoEvalData *evaldata)
{
	RelOptInfo **stack;
	int			stack_depth;
	RelOptInfo *joinrel;
	int			rel_count;

	/*
	 * Create a stack to hold not-yet-joined relations.
	 */
	stack = (RelOptInfo **) palloc(num_gene * sizeof(RelOptInfo *));
	stack_depth = 0;

	/*
	 * Push each relation onto the stack in the specified order.  After
	 * pushing each relation, see whether the top two stack entries are
	 * joinable according to the desirable_join() heuristics.  If so,
	 * join them into one stack entry, and try again to combine with the
	 * next stack entry down (if any).  When the stack top is no longer
	 * joinable, continue to the next input relation.  After we have pushed
	 * the last input relation, the heuristics are disabled and we force
	 * joining all the remaining stack entries.
	 *
	 * If desirable_join() always returns true, this produces a straight
	 * left-to-right join just like the old code.  Otherwise we may produce
	 * a bushy plan or a left/right-sided plan that really corresponds to
	 * some tour other than the one given.  To the extent that the heuristics
	 * are helpful, however, this will be a better plan than the raw tour.
	 *
	 * Also, when a join attempt fails (because of IN-clause constraints),
	 * we may be able to recover and produce a workable plan, where the old
	 * code just had to give up.  This case acts the same as a false result
	 * from desirable_join().
	 */
	for (rel_count = 0; rel_count < num_gene; rel_count++)
	{
		int			cur_rel_index;

		/* Get the next input relation and push it */
		cur_rel_index = (int) tour[rel_count];
		stack[stack_depth] = (RelOptInfo *) nth(cur_rel_index - 1,
												evaldata->initial_rels);
		stack_depth++;

		/*
		 * While it's feasible, pop the top two stack entries and replace
		 * with their join.
		 */
		while (stack_depth >= 2)
		{
			RelOptInfo *outer_rel = stack[stack_depth - 2];
			RelOptInfo *inner_rel = stack[stack_depth - 1];

			/*
			 * Don't pop if heuristics say not to join now.  However,
			 * once we have exhausted the input, the heuristics can't
			 * prevent popping.
			 */
			if (rel_count < num_gene - 1 &&
				!desirable_join(evaldata->root, outer_rel, inner_rel))
				break;

			/*
			 * Construct a RelOptInfo representing the join of these
			 * two input relations.  These are always inner joins.
			 * Note that we expect the joinrel not to exist in
			 * root->join_rel_list yet, and so the paths constructed for it
			 * will only include the ones we want.
			 */
			joinrel = make_join_rel(evaldata->root, outer_rel, inner_rel,
									JOIN_INNER);

			/* Can't pop stack here if join order is not valid */
			if (!joinrel)
				break;

			/* Find and save the cheapest paths for this rel */
			set_cheapest(joinrel);

			/* Pop the stack and replace the inputs with their join */
			stack_depth--;
			stack[stack_depth - 1] = joinrel;
		}
	}

	/* Did we succeed in forming a single join relation? */
	if (stack_depth == 1)
		joinrel = stack[0];
	else
		joinrel = NULL;

	pfree(stack);

	return joinrel;
}

/*
 * Heuristics for gimme_tree: do we want to join these two relations?
 */
static bool
desirable_join(Query *root,
			   RelOptInfo *outer_rel, RelOptInfo *inner_rel)
{
	ListCell   *l;

	/*
	 * Join if there is an applicable join clause.
	 */
	foreach(l, outer_rel->joininfo)
	{
		JoinInfo   *joininfo = (JoinInfo *) lfirst(l);

		if (bms_is_subset(joininfo->unjoined_relids, inner_rel->relids))
			return true;
	}

	/*
	 * Join if the rels are members of the same IN sub-select.  This is
	 * needed to improve the odds that we will find a valid solution in
	 * a case where an IN sub-select has a clauseless join.
	 */
	foreach(l, root->in_info_list)
	{
		InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);

		if (bms_is_subset(outer_rel->relids, ininfo->righthand) &&
			bms_is_subset(inner_rel->relids, ininfo->righthand))
			return true;
	}

	/* Otherwise postpone the join till later. */
	return false;
}