diff options
Diffstat (limited to 'src/backend/utils/sort/tuplesort.c')
-rw-r--r-- | src/backend/utils/sort/tuplesort.c | 275 |
1 files changed, 273 insertions, 2 deletions
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index 76c59a42382..e4fc74b3e93 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -91,13 +91,15 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.74 2007/01/10 18:06:04 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.75 2007/05/04 01:13:44 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#include <limits.h> + #include "access/heapam.h" #include "access/nbtree.h" #include "catalog/pg_amop.h" @@ -112,10 +114,13 @@ #include "utils/tuplesort.h" -/* GUC variable */ +/* GUC variables */ #ifdef TRACE_SORT bool trace_sort = false; #endif +#ifdef DEBUG_BOUNDED_SORT +bool optimize_bounded_sort = true; +#endif /* @@ -159,6 +164,7 @@ typedef struct typedef enum { TSS_INITIAL, /* Loading tuples; still within memory limit */ + TSS_BOUNDED, /* Loading tuples into bounded-size heap */ TSS_BUILDRUNS, /* Loading tuples; writing to tape */ TSS_SORTEDINMEM, /* Sort completed entirely in memory */ TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */ @@ -188,6 +194,9 @@ struct Tuplesortstate TupSortStatus status; /* enumerated value as shown above */ int nKeys; /* number of columns in sort key */ bool randomAccess; /* did caller request random access? */ + bool bounded; /* did caller specify a maximum number of + * tuples to return? */ + int bound; /* if bounded, the maximum number of tuples */ long availMem; /* remaining memory available, in bytes */ long allowedMem; /* total memory allowed, in bytes */ int maxTapes; /* number of tapes (Knuth's T) */ @@ -235,6 +244,13 @@ struct Tuplesortstate int tapenum, unsigned int len); /* + * Function to reverse the sort direction from its current state. + * (We could dispense with this if we wanted to enforce that all variants + * represent the sort key information alike.) + */ + void (*reversedirection) (Tuplesortstate *state); + + /* * This array holds the tuples now in sort memory. If we are in state * INITIAL, the tuples are in no particular order; if we are in state * SORTEDINMEM, the tuples are in final sorted order; in states BUILDRUNS @@ -347,6 +363,7 @@ struct Tuplesortstate #define COPYTUP(state,stup,tup) ((*(state)->copytup) (state, stup, tup)) #define WRITETUP(state,tape,stup) ((*(state)->writetup) (state, tape, stup)) #define READTUP(state,stup,tape,len) ((*(state)->readtup) (state, stup, tape, len)) +#define REVERSEDIRECTION(state) ((*(state)->reversedirection) (state)) #define LACKMEM(state) ((state)->availMem < 0) #define USEMEM(state,amt) ((state)->availMem -= (amt)) #define FREEMEM(state,amt) ((state)->availMem += (amt)) @@ -403,6 +420,8 @@ static void beginmerge(Tuplesortstate *state); static void mergepreread(Tuplesortstate *state); static void mergeprereadone(Tuplesortstate *state, int srcTape); static void dumptuples(Tuplesortstate *state, bool alltuples); +static void make_bounded_heap(Tuplesortstate *state); +static void sort_bounded_heap(Tuplesortstate *state); static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple, int tupleindex, bool checkIndex); static void tuplesort_heap_siftup(Tuplesortstate *state, bool checkIndex); @@ -415,6 +434,7 @@ static void writetup_heap(Tuplesortstate *state, int tapenum, SortTuple *stup); static void readtup_heap(Tuplesortstate *state, SortTuple *stup, int tapenum, unsigned int len); +static void reversedirection_heap(Tuplesortstate *state); static int comparetup_index(const SortTuple *a, const SortTuple *b, Tuplesortstate *state); static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup); @@ -422,6 +442,7 @@ static void writetup_index(Tuplesortstate *state, int tapenum, SortTuple *stup); static void readtup_index(Tuplesortstate *state, SortTuple *stup, int tapenum, unsigned int len); +static void reversedirection_index(Tuplesortstate *state); static int comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state); static void copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup); @@ -429,6 +450,8 @@ static void writetup_datum(Tuplesortstate *state, int tapenum, SortTuple *stup); static void readtup_datum(Tuplesortstate *state, SortTuple *stup, int tapenum, unsigned int len); +static void reversedirection_datum(Tuplesortstate *state); +static void free_sort_tuple(Tuplesortstate *state, SortTuple *stup); /* @@ -538,6 +561,7 @@ tuplesort_begin_heap(TupleDesc tupDesc, state->copytup = copytup_heap; state->writetup = writetup_heap; state->readtup = readtup_heap; + state->reversedirection = reversedirection_heap; state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ state->scanKeys = (ScanKey) palloc0(nkeys * sizeof(ScanKeyData)); @@ -601,6 +625,7 @@ tuplesort_begin_index(Relation indexRel, state->copytup = copytup_index; state->writetup = writetup_index; state->readtup = readtup_index; + state->reversedirection = reversedirection_index; state->indexRel = indexRel; /* see comments below about btree dependence of this code... */ @@ -639,6 +664,7 @@ tuplesort_begin_datum(Oid datumType, state->copytup = copytup_datum; state->writetup = writetup_datum; state->readtup = readtup_datum; + state->reversedirection = reversedirection_datum; state->datumType = datumType; @@ -665,6 +691,40 @@ tuplesort_begin_datum(Oid datumType, } /* + * tuplesort_set_bound + * + * Advise tuplesort that at most the first N result tuples are required. + * + * Must be called before inserting any tuples. (Actually, we could allow it + * as long as the sort hasn't spilled to disk, but there seems no need for + * delayed calls at the moment.) + * + * This is a hint only. The tuplesort may still return more tuples than + * requested. + */ +void +tuplesort_set_bound(Tuplesortstate *state, int64 bound) +{ + /* Assert we're called before loading any tuples */ + Assert(state->status == TSS_INITIAL); + Assert(state->memtupcount == 0); + Assert(!state->bounded); + +#ifdef DEBUG_BOUNDED_SORT + /* Honor GUC setting that disables the feature (for easy testing) */ + if (!optimize_bounded_sort) + return; +#endif + + /* We want to be able to compute bound * 2, so limit the setting */ + if (bound > (int64) (INT_MAX/2)) + return; + + state->bounded = true; + state->bound = (int) bound; +} + +/* * tuplesort_end * * Release resources and clean up. @@ -863,6 +923,32 @@ puttuple_common(Tuplesortstate *state, SortTuple *tuple) state->memtuples[state->memtupcount++] = *tuple; /* + * Check if it's time to switch over to a bounded heapsort. + * We do so if the input tuple count exceeds twice the desired + * tuple count (this is a heuristic for where heapsort becomes + * cheaper than a quicksort), or if we've just filled workMem + * and have enough tuples to meet the bound. + * + * Note that once we enter TSS_BOUNDED state we will always try + * to complete the sort that way. In the worst case, if later + * input tuples are larger than earlier ones, this might cause + * us to exceed workMem significantly. + */ + if (state->bounded && + (state->memtupcount > state->bound * 2 || + (state->memtupcount > state->bound && LACKMEM(state)))) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "switching to bounded heapsort at %d tuples: %s", + state->memtupcount, + pg_rusage_show(&state->ru_start)); +#endif + make_bounded_heap(state); + return; + } + + /* * Done if we still fit in available memory and have array slots. */ if (state->memtupcount < state->memtupsize && !LACKMEM(state)) @@ -878,6 +964,31 @@ puttuple_common(Tuplesortstate *state, SortTuple *tuple) */ dumptuples(state, false); break; + + case TSS_BOUNDED: + /* + * We don't want to grow the array here, so check whether the + * new tuple can be discarded before putting it in. This should + * be a good speed optimization, too, since when there are many + * more input tuples than the bound, most input tuples can be + * discarded with just this one comparison. Note that because + * we currently have the sort direction reversed, we must check + * for <= not >=. + */ + if (COMPARETUP(state, tuple, &state->memtuples[0]) <= 0) + { + /* new tuple <= top of the heap, so we can discard it */ + free_sort_tuple(state, tuple); + } + else + { + /* discard top of heap, sift up, insert new tuple */ + free_sort_tuple(state, &state->memtuples[0]); + tuplesort_heap_siftup(state, false); + tuplesort_heap_insert(state, tuple, 0, false); + } + break; + case TSS_BUILDRUNS: /* @@ -904,6 +1015,7 @@ puttuple_common(Tuplesortstate *state, SortTuple *tuple) */ dumptuples(state, false); break; + default: elog(ERROR, "invalid tuplesort state"); break; @@ -944,6 +1056,23 @@ tuplesort_performsort(Tuplesortstate *state) state->markpos_eof = false; state->status = TSS_SORTEDINMEM; break; + + case TSS_BOUNDED: + + /* + * We were able to accumulate all the tuples required for output + * in memory, using a heap to eliminate excess tuples. Now we have + * to transform the heap to a properly-sorted array. + */ + if (state->memtupcount > 1) + sort_bounded_heap(state); + state->current = 0; + state->eof_reached = false; + state->markpos_offset = 0; + state->markpos_eof = false; + state->status = TSS_SORTEDINMEM; + break; + case TSS_BUILDRUNS: /* @@ -959,6 +1088,7 @@ tuplesort_performsort(Tuplesortstate *state) state->markpos_offset = 0; state->markpos_eof = false; break; + default: elog(ERROR, "invalid tuplesort state"); break; @@ -1004,6 +1134,15 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward, return true; } state->eof_reached = true; + + /* + * Complain if caller tries to retrieve more tuples than + * originally asked for in a bounded sort. This is because + * returning EOF here might be the wrong thing. + */ + if (state->bounded && state->current >= state->bound) + elog(ERROR, "retrieved too many tuples in a bounded sort"); + return false; } else @@ -1988,6 +2127,98 @@ tuplesort_restorepos(Tuplesortstate *state) COMPARETUP(state, tup1, tup2)) /* + * Convert the existing unordered array of SortTuples to a bounded heap, + * discarding all but the smallest "state->bound" tuples. + * + * When working with a bounded heap, we want to keep the largest entry + * at the root (array entry zero), instead of the smallest as in the normal + * sort case. This allows us to discard the largest entry cheaply. + * Therefore, we temporarily reverse the sort direction. + * + * We assume that all entries in a bounded heap will always have tupindex + * zero; it therefore doesn't matter that HEAPCOMPARE() doesn't reverse + * the direction of comparison for tupindexes. + */ +static void +make_bounded_heap(Tuplesortstate *state) +{ + int tupcount = state->memtupcount; + int i; + + Assert(state->status == TSS_INITIAL); + Assert(state->bounded); + Assert(tupcount >= state->bound); + + /* Reverse sort direction so largest entry will be at root */ + REVERSEDIRECTION(state); + + state->memtupcount = 0; /* make the heap empty */ + for (i=0; i<tupcount; i++) + { + if (state->memtupcount >= state->bound && + COMPARETUP(state, &state->memtuples[i], &state->memtuples[0]) <= 0) + { + /* New tuple would just get thrown out, so skip it */ + free_sort_tuple(state, &state->memtuples[i]); + } + else + { + /* Insert next tuple into heap */ + /* Must copy source tuple to avoid possible overwrite */ + SortTuple stup = state->memtuples[i]; + + tuplesort_heap_insert(state, &stup, 0, false); + + /* If heap too full, discard largest entry */ + if (state->memtupcount > state->bound) + { + free_sort_tuple(state, &state->memtuples[0]); + tuplesort_heap_siftup(state, false); + } + } + } + + Assert(state->memtupcount == state->bound); + state->status = TSS_BOUNDED; +} + +/* + * Convert the bounded heap to a properly-sorted array + */ +static void +sort_bounded_heap(Tuplesortstate *state) +{ + int tupcount = state->memtupcount; + + Assert(state->status == TSS_BOUNDED); + Assert(state->bounded); + Assert(tupcount == state->bound); + + /* + * We can unheapify in place because each sift-up will remove the largest + * entry, which we can promptly store in the newly freed slot at the end. + * Once we're down to a single-entry heap, we're done. + */ + while (state->memtupcount > 1) + { + SortTuple stup = state->memtuples[0]; + + /* this sifts-up the next-largest entry and decreases memtupcount */ + tuplesort_heap_siftup(state, false); + state->memtuples[state->memtupcount] = stup; + } + state->memtupcount = tupcount; + + /* + * Reverse sort direction back to the original state. This is not + * actually necessary but seems like a good idea for tidiness. + */ + REVERSEDIRECTION(state); + + state->status = TSS_SORTEDINMEM; +} + +/* * Insert a new tuple into an empty or existing heap, maintaining the * heap invariant. Caller is responsible for ensuring there's room. * @@ -2322,6 +2553,18 @@ readtup_heap(Tuplesortstate *state, SortTuple *stup, &stup->isnull1); } +static void +reversedirection_heap(Tuplesortstate *state) +{ + ScanKey scanKey = state->scanKeys; + int nkey; + + for (nkey = 0; nkey < state->nKeys; nkey++, scanKey++) + { + scanKey->sk_flags ^= (SK_BT_DESC | SK_BT_NULLS_FIRST); + } +} + /* * Routines specialized for IndexTuple case @@ -2497,6 +2740,18 @@ readtup_index(Tuplesortstate *state, SortTuple *stup, &stup->isnull1); } +static void +reversedirection_index(Tuplesortstate *state) +{ + ScanKey scanKey = state->indexScanKey; + int nkey; + + for (nkey = 0; nkey < state->nKeys; nkey++, scanKey++) + { + scanKey->sk_flags ^= (SK_BT_DESC | SK_BT_NULLS_FIRST); + } +} + /* * Routines specialized for DatumTuple case @@ -2601,3 +2856,19 @@ readtup_datum(Tuplesortstate *state, SortTuple *stup, sizeof(tuplen)) != sizeof(tuplen)) elog(ERROR, "unexpected end of data"); } + +static void +reversedirection_datum(Tuplesortstate *state) +{ + state->sortFnFlags ^= (SK_BT_DESC | SK_BT_NULLS_FIRST); +} + +/* + * Convenience routine to free a tuple previously loaded into sort memory + */ +static void +free_sort_tuple(Tuplesortstate *state, SortTuple *stup) +{ + FREEMEM(state, GetMemoryChunkSpace(stup->tuple)); + pfree(stup->tuple); +} |