aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2017-11-10 16:50:50 -0500
committerRobert Haas <rhaas@postgresql.org>2017-11-10 16:50:50 -0500
commit5edc63bda68a77c4d38f0cbeae1c4271f9ef4100 (patch)
tree214cd7c2d8fe70017061fb3e4fc803437d78f19a /src
parent0c98d0dd5c85ce0c8485ae1a8351a26b83c4338b (diff)
downloadpostgresql-5edc63bda68a77c4d38f0cbeae1c4271f9ef4100.tar.gz
postgresql-5edc63bda68a77c4d38f0cbeae1c4271f9ef4100.zip
Account for the effect of lossy pages when costing bitmap scans.
Dilip Kumar, reviewed by Alexander Kumenkov, Amul Sul, and me. Some final adjustments by me. Discussion: http://postgr.es/m/CAFiTN-sYtqUOXQ4SpuhTv0Z9gD0si3YxZGv_PQAAMX8qbOotcg@mail.gmail.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/nodes/tidbitmap.c37
-rw-r--r--src/backend/optimizer/path/costsize.c59
-rw-r--r--src/include/nodes/tidbitmap.h1
3 files changed, 75 insertions, 22 deletions
diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c
index c47d5849ef7..acfe6b263c6 100644
--- a/src/backend/nodes/tidbitmap.c
+++ b/src/backend/nodes/tidbitmap.c
@@ -265,7 +265,6 @@ TIDBitmap *
tbm_create(long maxbytes, dsa_area *dsa)
{
TIDBitmap *tbm;
- long nbuckets;
/* Create the TIDBitmap struct and zero all its fields */
tbm = makeNode(TIDBitmap);
@@ -273,17 +272,7 @@ tbm_create(long maxbytes, dsa_area *dsa)
tbm->mcxt = CurrentMemoryContext;
tbm->status = TBM_EMPTY;
- /*
- * Estimate number of hashtable entries we can have within maxbytes. This
- * estimates the hash cost as sizeof(PagetableEntry), which is good enough
- * for our purpose. Also count an extra Pointer per entry for the arrays
- * created during iteration readout.
- */
- nbuckets = maxbytes /
- (sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer));
- nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */
- nbuckets = Max(nbuckets, 16); /* sanity limit */
- tbm->maxentries = (int) nbuckets;
+ tbm->maxentries = (int) tbm_calculate_entries(maxbytes);
tbm->lossify_start = 0;
tbm->dsa = dsa;
tbm->dsapagetable = InvalidDsaPointer;
@@ -1546,3 +1535,27 @@ pagetable_free(pagetable_hash *pagetable, void *pointer)
tbm->dsapagetableold = InvalidDsaPointer;
}
}
+
+/*
+ * tbm_calculate_entries
+ *
+ * Estimate number of hashtable entries we can have within maxbytes.
+ */
+long
+tbm_calculate_entries(double maxbytes)
+{
+ long nbuckets;
+
+ /*
+ * Estimate number of hashtable entries we can have within maxbytes. This
+ * estimates the hash cost as sizeof(PagetableEntry), which is good enough
+ * for our purpose. Also count an extra Pointer per entry for the arrays
+ * created during iteration readout.
+ */
+ nbuckets = maxbytes /
+ (sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer));
+ nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */
+ nbuckets = Max(nbuckets, 16); /* sanity limit */
+
+ return nbuckets;
+}
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 98fb16e85a0..2d2df60886a 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -5171,6 +5171,8 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
double T;
double pages_fetched;
double tuples_fetched;
+ double heap_pages;
+ long maxentries;
/*
* Fetch total cost of obtaining the bitmap, as well as its total
@@ -5185,6 +5187,24 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
+ /*
+ * For a single scan, the number of heap pages that need to be fetched is
+ * the same as the Mackert and Lohman formula for the case T <= b (ie, no
+ * re-reads needed).
+ */
+ pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
+
+ /*
+ * Calculate the number of pages fetched from the heap. Then based on
+ * current work_mem estimate get the estimated maxentries in the bitmap.
+ * (Note that we always do this calculation based on the number of pages
+ * that would be fetched in a single iteration, even if loop_count > 1.
+ * That's correct, because only that number of entries will be stored in
+ * the bitmap at one time.)
+ */
+ heap_pages = Min(pages_fetched, baserel->pages);
+ maxentries = tbm_calculate_entries(work_mem * 1024L);
+
if (loop_count > 1)
{
/*
@@ -5199,22 +5219,41 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
root);
pages_fetched /= loop_count;
}
- else
- {
- /*
- * For a single scan, the number of heap pages that need to be fetched
- * is the same as the Mackert and Lohman formula for the case T <= b
- * (ie, no re-reads needed).
- */
- pages_fetched =
- (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
- }
if (pages_fetched >= T)
pages_fetched = T;
else
pages_fetched = ceil(pages_fetched);
+ if (maxentries < heap_pages)
+ {
+ double exact_pages;
+ double lossy_pages;
+
+ /*
+ * Crude approximation of the number of lossy pages. Because of the
+ * way tbm_lossify() is coded, the number of lossy pages increases
+ * very sharply as soon as we run short of memory; this formula has
+ * that property and seems to perform adequately in testing, but it's
+ * possible we could do better somehow.
+ */
+ lossy_pages = Max(0, heap_pages - maxentries / 2);
+ exact_pages = heap_pages - lossy_pages;
+
+ /*
+ * If there are lossy pages then recompute the number of tuples
+ * processed by the bitmap heap node. We assume here that the chance
+ * of a given tuple coming from an exact page is the same as the
+ * chance that a given page is exact. This might not be true, but
+ * it's not clear how we can do any better.
+ */
+ if (lossy_pages > 0)
+ tuples_fetched =
+ clamp_row_est(indexSelectivity *
+ (exact_pages / heap_pages) * baserel->tuples +
+ (lossy_pages / heap_pages) * baserel->tuples);
+ }
+
if (cost)
*cost = indexTotalCost;
if (tuple)
diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h
index f9a1902da87..d3ad0a5566f 100644
--- a/src/include/nodes/tidbitmap.h
+++ b/src/include/nodes/tidbitmap.h
@@ -70,5 +70,6 @@ extern void tbm_end_iterate(TBMIterator *iterator);
extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa,
dsa_pointer dp);
+extern long tbm_calculate_entries(double maxbytes);
#endif /* TIDBITMAP_H */