aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMelanie Plageman <melanieplageman@gmail.com>2025-03-24 16:40:59 -0400
committerMelanie Plageman <melanieplageman@gmail.com>2025-03-24 16:40:59 -0400
commitaea916fe555a351fe20bf31d56f5f0d027d9db61 (patch)
tree4c32330d4ed62148382de7624ff3190b292ce112 /src
parent0e3e0ec06b995f6809f315752cbf5ff67902e095 (diff)
downloadpostgresql-aea916fe555a351fe20bf31d56f5f0d027d9db61.tar.gz
postgresql-aea916fe555a351fe20bf31d56f5f0d027d9db61.zip
Fix bitmapheapscan incorrect recheck of NULL tuples
The bitmap heap scan skip fetch optimization skips fetching the heap block when a page is set all-visible in the visibility map and no columns from the table are needed to satisfy the query. 2b73a8cd33b and c3953226a07 changed the control flow of bitmap heap scan to use the read stream API. The read stream API returns buffers containing blocks to the user. To make this work with the skip fetch optimization, we keep a count of the empty tuples we need to emit for all the blocks skipped and only emit the empty tuples after processing the next block fetched from the heap or at the end of the scan. It's incorrect to recheck NULL tuples, so we must set `recheck` to false before yielding control back to BitmapHeapNext(). This was done before emitting any remaining empty tuples at the end of the scan but not for empty tuples emitted during the scan. This meant that if a page fetched from the heap did require recheck and set `recheck` to true and then we emitted empty tuples for subsequent blocks, we would get wrong results. Fix this by always setting `recheck` to false before emitting empty tuples. Reported-by: Alexander Lakhin <exclusion@gmail.com> Tested-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/496f7acd-881c-4df3-9bd3-8f8534dfec26%40gmail.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/heap/heapam_handler.c28
-rw-r--r--src/test/regress/expected/bitmapops.out14
-rw-r--r--src/test/regress/sql/bitmapops.sql10
3 files changed, 41 insertions, 11 deletions
diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c
index 4da4dc84580..24d3765aa20 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -2147,6 +2147,19 @@ heapam_scan_bitmap_next_tuple(TableScanDesc scan,
*/
ExecStoreAllNullTuple(slot);
bscan->rs_empty_tuples_pending--;
+
+ /*
+ * We do not recheck all NULL tuples. Because the streaming read
+ * API only yields TBMIterateResults for blocks actually fetched
+ * from the heap, we must unset `recheck` ourselves here to ensure
+ * correct results.
+ *
+ * Our read stream callback accrues a count of empty tuples to
+ * emit and then emits them after emitting tuples from the next
+ * fetched block. If no blocks need fetching, we'll emit the
+ * accrued count at the end of the scan.
+ */
+ *recheck = false;
return true;
}
@@ -2510,13 +2523,14 @@ BitmapHeapScanNextBlock(TableScanDesc scan,
}
/*
- * Bitmap is exhausted. Time to emit empty tuples if relevant. We emit
- * all empty tuples at the end instead of emitting them per block we
- * skip fetching. This is necessary because the streaming read API
- * will only return TBMIterateResults for blocks actually fetched.
- * When we skip fetching a block, we keep track of how many empty
- * tuples to emit at the end of the BitmapHeapScan. We do not recheck
- * all NULL tuples.
+ * The bitmap is exhausted. Now emit any remaining empty tuples. The
+ * read stream API only returns TBMIterateResults for blocks actually
+ * fetched from the heap. Our callback will accrue a count of empty
+ * tuples to emit for all blocks we skipped fetching. So, if we skip
+ * fetching heap blocks at the end of the relation (or no heap blocks
+ * are fetched) we need to ensure we emit empty tuples before ending
+ * the scan. We don't recheck empty tuples so ensure `recheck` is
+ * unset.
*/
*recheck = false;
return bscan->rs_empty_tuples_pending > 0;
diff --git a/src/test/regress/expected/bitmapops.out b/src/test/regress/expected/bitmapops.out
index 3570973e3ca..64068e0469c 100644
--- a/src/test/regress/expected/bitmapops.out
+++ b/src/test/regress/expected/bitmapops.out
@@ -8,7 +8,7 @@
-- there's a maximum number of a,b combinations in the table.
-- That allows us to test all the different combinations of
-- lossy and non-lossy pages with the minimum amount of data
-CREATE TABLE bmscantest (a int, b int, t text);
+CREATE TABLE bmscantest (a int, b int, t text) WITH (autovacuum_enabled = false);
INSERT INTO bmscantest
SELECT (r%53), (r%59), 'foooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo'
FROM generate_series(1,70000) r;
@@ -20,7 +20,17 @@ set enable_indexscan=false;
set enable_seqscan=false;
-- Lower work_mem to trigger use of lossy bitmaps
set work_mem = 64;
--- Test bitmap-and.
+-- Test bitmap-and without the skip fetch optimization.
+SELECT count(*) FROM bmscantest WHERE a = 1 AND b = 1;
+ count
+-------
+ 23
+(1 row)
+
+-- Test that we return correct results when using the skip fetch optimization
+-- VACUUM FREEZE will set all the pages in the relation all-visible, enabling
+-- the optimization.
+VACUUM (FREEZE) bmscantest;
SELECT count(*) FROM bmscantest WHERE a = 1 AND b = 1;
count
-------
diff --git a/src/test/regress/sql/bitmapops.sql b/src/test/regress/sql/bitmapops.sql
index 498f4721b51..1b175f6ff96 100644
--- a/src/test/regress/sql/bitmapops.sql
+++ b/src/test/regress/sql/bitmapops.sql
@@ -12,7 +12,7 @@
-- That allows us to test all the different combinations of
-- lossy and non-lossy pages with the minimum amount of data
-CREATE TABLE bmscantest (a int, b int, t text);
+CREATE TABLE bmscantest (a int, b int, t text) WITH (autovacuum_enabled = false);
INSERT INTO bmscantest
SELECT (r%53), (r%59), 'foooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo'
@@ -29,8 +29,14 @@ set enable_seqscan=false;
-- Lower work_mem to trigger use of lossy bitmaps
set work_mem = 64;
+-- Test bitmap-and without the skip fetch optimization.
+SELECT count(*) FROM bmscantest WHERE a = 1 AND b = 1;
+
+-- Test that we return correct results when using the skip fetch optimization
+-- VACUUM FREEZE will set all the pages in the relation all-visible, enabling
+-- the optimization.
+VACUUM (FREEZE) bmscantest;
--- Test bitmap-and.
SELECT count(*) FROM bmscantest WHERE a = 1 AND b = 1;
-- Test bitmap-or.