aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2016-11-15 10:28:46 -0500
committerRobert Haas <rhaas@postgresql.org>2016-11-15 10:30:33 -0500
commitfc19c1801bd2dbee1043b0c0b62e07747d30ea1c (patch)
treef7613540d4995d8b78c8714c09dedf07ed07d637
parent00c6d8077f39191a6f61a847ce7d55073d8f5a6f (diff)
downloadpostgresql-fc19c1801bd2dbee1043b0c0b62e07747d30ea1c.tar.gz
postgresql-fc19c1801bd2dbee1043b0c0b62e07747d30ea1c.zip
Limit the number of number of tapes used for a sort to 501.
Gigantic numbers of tapes don't work out well. Original patch by Peter Geoghegan; comments entirely rewritten by me.
-rw-r--r--src/backend/utils/sort/tuplesort.c17
1 files changed, 15 insertions, 2 deletions
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index 46512cfca1a..3ad81862f35 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -108,7 +108,8 @@
* code we determine the number of tapes M on the basis of workMem: we want
* workMem/M to be large enough that we read a fair amount of data each time
* we preread from a tape, so as to maintain the locality of access described
- * above. Nonetheless, with large workMem we can have many tapes.
+ * above. Nonetheless, with large workMem we can have many tapes (but not
+ * too many -- see the comments in tuplesort_merge_order).
*
*
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
@@ -247,6 +248,7 @@ typedef enum
* tape during a preread cycle (see discussion at top of file).
*/
#define MINORDER 6 /* minimum merge order */
+#define MAXORDER 500 /* maximum merge order */
#define TAPE_BUFFER_OVERHEAD (BLCKSZ * 3)
#define MERGE_BUFFER_SIZE (BLCKSZ * 32)
@@ -2313,8 +2315,19 @@ tuplesort_merge_order(int64 allowedMem)
mOrder = (allowedMem - TAPE_BUFFER_OVERHEAD) /
(MERGE_BUFFER_SIZE + TAPE_BUFFER_OVERHEAD);
- /* Even in minimum memory, use at least a MINORDER merge */
+ /*
+ * Even in minimum memory, use at least a MINORDER merge. On the other
+ * hand, even when we have lots of memory, do not use more than a MAXORDER
+ * merge. Tapes are pretty cheap, but they're not entirely free. Each
+ * additional tape reduces the amount of memory available to build runs,
+ * which in turn can cause the same sort to need more runs, which makes
+ * merging slower even if it can still be done in a single pass. Also,
+ * high order merges are quite slow due to CPU cache effects; it can be
+ * faster to pay the I/O cost of a polyphase merge than to perform a single
+ * merge pass across many hundreds of tapes.
+ */
mOrder = Max(mOrder, MINORDER);
+ mOrder = Min(mOrder, MAXORDER);
return mOrder;
}