diff options
author | Andres Freund <andres@anarazel.de> | 2019-05-19 15:10:28 -0700 |
---|---|---|
committer | Andres Freund <andres@anarazel.de> | 2019-05-19 15:10:28 -0700 |
commit | c3b23ae457ddc8b7bfacb3c0569278615a2df2cd (patch) | |
tree | c54478a1c46b6bd795cc8d67e083767e3580c224 /src/backend/access/heap | |
parent | bd1592e8570282b1650af6b8eede0016496daecd (diff) | |
download | postgresql-c3b23ae457ddc8b7bfacb3c0569278615a2df2cd.tar.gz postgresql-c3b23ae457ddc8b7bfacb3c0569278615a2df2cd.zip |
Don't to predicate lock for analyze scans, refactor scan option passing.
Before this commit, when ANALYZE was run on a table and serializable
was used (either by virtue of an explicit BEGIN TRANSACTION ISOLATION
LEVEL SERIALIZABLE, or default_transaction_isolation being set to
serializable) a null pointer dereference lead to a crash.
The analyze scan doesn't need a snapshot (nor predicate locking), but
before this commit a scan only contained information about being a
bitmap or sample scan.
Refactor the option passing to the scan_begin callback to use a
bitmask instead. Alternatively we could have added a new boolean
parameter, but that seems harder to read. Even before this issue
various people (Heikki, Tom, Robert) suggested doing so.
These changes don't change the scan APIs outside of tableam. The flags
argument could be exposed, it's not necessary to fix this
problem. Also the wrapper table_beginscan* functions encapsulate most
of that complexity.
After these changes fixing the bug is trivial, just don't acquire
predicate lock for analyze style scans. That was already done for
bitmap heap scans. Add an assert that a snapshot is passed when
acquiring the predicate lock, so this kind of bug doesn't require
running with serializable.
Also add a comment about sample scans currently requiring predicate
locking the entire relation, that previously wasn't remarked upon.
Reported-By: Joe Wildish
Author: Andres Freund
Discussion:
https://postgr.es/m/4EA80A20-E9BF-49F1-9F01-5B66CAB21453@elusive.cx
https://postgr.es/m/20190411164947.nkii4gaeilt4bui7@alap3.anarazel.de
https://postgr.es/m/20190518203102.g7peu2fianukjuxm@alap3.anarazel.de
Diffstat (limited to 'src/backend/access/heap')
-rw-r--r-- | src/backend/access/heap/heapam.c | 120 | ||||
-rw-r--r-- | src/backend/access/heap/heapam_handler.c | 6 |
2 files changed, 72 insertions, 54 deletions
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index d8d4f3b1f5a..19d2c529d80 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -245,8 +245,8 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) if (!RelationUsesLocalBuffers(scan->rs_base.rs_rd) && scan->rs_nblocks > NBuffers / 4) { - allow_strat = scan->rs_base.rs_allow_strat; - allow_sync = scan->rs_base.rs_allow_sync; + allow_strat = (scan->rs_base.rs_flags & SO_ALLOW_STRAT) != 0; + allow_sync = (scan->rs_base.rs_flags & SO_ALLOW_SYNC) != 0; } else allow_strat = allow_sync = false; @@ -267,7 +267,10 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) if (scan->rs_base.rs_parallel != NULL) { /* For parallel scan, believe whatever ParallelTableScanDesc says. */ - scan->rs_base.rs_syncscan = scan->rs_base.rs_parallel->phs_syncscan; + if (scan->rs_base.rs_parallel->phs_syncscan) + scan->rs_base.rs_flags |= SO_ALLOW_SYNC; + else + scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC; } else if (keep_startblock) { @@ -276,16 +279,19 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) * so that rewinding a cursor doesn't generate surprising results. * Reset the active syncscan setting, though. */ - scan->rs_base.rs_syncscan = (allow_sync && synchronize_seqscans); + if (allow_sync && synchronize_seqscans) + scan->rs_base.rs_flags |= SO_ALLOW_SYNC; + else + scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC; } else if (allow_sync && synchronize_seqscans) { - scan->rs_base.rs_syncscan = true; + scan->rs_base.rs_flags |= SO_ALLOW_SYNC; scan->rs_startblock = ss_get_location(scan->rs_base.rs_rd, scan->rs_nblocks); } else { - scan->rs_base.rs_syncscan = false; + scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC; scan->rs_startblock = 0; } @@ -305,11 +311,11 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock) memcpy(scan->rs_base.rs_key, key, scan->rs_base.rs_nkeys * sizeof(ScanKeyData)); /* - * Currently, we don't have a stats counter for bitmap heap scans (but the - * underlying bitmap index scans will be counted) or sample scans (we only - * update stats for tuple fetches there) + * Currently, we only have a stats counter for sequential heap scans (but + * e.g for bitmap scans the underlying bitmap index scans will be counted, + * and for sample scans we update stats for tuple fetches). */ - if (!scan->rs_base.rs_bitmapscan && !scan->rs_base.rs_samplescan) + if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN) pgstat_count_heap_scan(scan->rs_base.rs_rd); } @@ -325,7 +331,8 @@ heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlk HeapScanDesc scan = (HeapScanDesc) sscan; Assert(!scan->rs_inited); /* else too late to change */ - Assert(!scan->rs_base.rs_syncscan); /* else rs_startblock is significant */ + /* else rs_startblock is significant */ + Assert(!(scan->rs_base.rs_flags & SO_ALLOW_SYNC)); /* Check startBlk is valid (but allow case of zero blocks...) */ Assert(startBlk == 0 || startBlk < scan->rs_nblocks); @@ -375,7 +382,7 @@ heapgetpage(TableScanDesc sscan, BlockNumber page) RBM_NORMAL, scan->rs_strategy); scan->rs_cblock = page; - if (!scan->rs_base.rs_pageatatime) + if (!(scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE)) return; buffer = scan->rs_cbuf; @@ -574,7 +581,7 @@ heapgettup(HeapScanDesc scan, * time, and much more likely that we'll just bollix things for * forward scanners. */ - scan->rs_base.rs_syncscan = false; + scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC; /* start from last page of the scan */ if (scan->rs_startblock > 0) page = scan->rs_startblock - 1; @@ -738,7 +745,7 @@ heapgettup(HeapScanDesc scan, * a little bit backwards on every invocation, which is confusing. * We don't guarantee any specific ordering in general, though. */ - if (scan->rs_base.rs_syncscan) + if (scan->rs_base.rs_flags & SO_ALLOW_SYNC) ss_report_location(scan->rs_base.rs_rd, page); } @@ -885,7 +892,7 @@ heapgettup_pagemode(HeapScanDesc scan, * time, and much more likely that we'll just bollix things for * forward scanners. */ - scan->rs_base.rs_syncscan = false; + scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC; /* start from last page of the scan */ if (scan->rs_startblock > 0) page = scan->rs_startblock - 1; @@ -1037,7 +1044,7 @@ heapgettup_pagemode(HeapScanDesc scan, * a little bit backwards on every invocation, which is confusing. * We don't guarantee any specific ordering in general, though. */ - if (scan->rs_base.rs_syncscan) + if (scan->rs_base.rs_flags & SO_ALLOW_SYNC) ss_report_location(scan->rs_base.rs_rd, page); } @@ -1125,12 +1132,7 @@ TableScanDesc heap_beginscan(Relation relation, Snapshot snapshot, int nkeys, ScanKey key, ParallelTableScanDesc parallel_scan, - bool allow_strat, - bool allow_sync, - bool allow_pagemode, - bool is_bitmapscan, - bool is_samplescan, - bool temp_snap) + uint32 flags) { HeapScanDesc scan; @@ -1151,33 +1153,39 @@ heap_beginscan(Relation relation, Snapshot snapshot, scan->rs_base.rs_rd = relation; scan->rs_base.rs_snapshot = snapshot; scan->rs_base.rs_nkeys = nkeys; - scan->rs_base.rs_bitmapscan = is_bitmapscan; - scan->rs_base.rs_samplescan = is_samplescan; - scan->rs_strategy = NULL; /* set in initscan */ - scan->rs_base.rs_allow_strat = allow_strat; - scan->rs_base.rs_allow_sync = allow_sync; - scan->rs_base.rs_temp_snap = temp_snap; + scan->rs_base.rs_flags = flags; scan->rs_base.rs_parallel = parallel_scan; + scan->rs_strategy = NULL; /* set in initscan */ /* - * we can use page-at-a-time mode if it's an MVCC-safe snapshot + * Disable page-at-a-time mode if it's not a MVCC-safe snapshot. */ - scan->rs_base.rs_pageatatime = - allow_pagemode && snapshot && IsMVCCSnapshot(snapshot); + if (!(snapshot && IsMVCCSnapshot(snapshot))) + scan->rs_base.rs_flags &= ~SO_ALLOW_PAGEMODE; /* - * For a seqscan in a serializable transaction, acquire a predicate lock - * on the entire relation. This is required not only to lock all the - * matching tuples, but also to conflict with new insertions into the - * table. In an indexscan, we take page locks on the index pages covering - * the range specified in the scan qual, but in a heap scan there is - * nothing more fine-grained to lock. A bitmap scan is a different story, - * there we have already scanned the index and locked the index pages - * covering the predicate. But in that case we still have to lock any - * matching heap tuples. + * For seqscan and sample scans in a serializable transaction, acquire a + * predicate lock on the entire relation. This is required not only to + * lock all the matching tuples, but also to conflict with new insertions + * into the table. In an indexscan, we take page locks on the index pages + * covering the range specified in the scan qual, but in a heap scan there + * is nothing more fine-grained to lock. A bitmap scan is a different + * story, there we have already scanned the index and locked the index + * pages covering the predicate. But in that case we still have to lock + * any matching heap tuples. For sample scan we could optimize the locking + * to be at least page-level granularity, but we'd need to add per-tuple + * locking for that. */ - if (!is_bitmapscan) + if (scan->rs_base.rs_flags & (SO_TYPE_SEQSCAN | SO_TYPE_SAMPLESCAN)) + { + /* + * Ensure a missing snapshot is noticed reliably, even if the + * isolation mode means predicate locking isn't performed (and + * therefore the snapshot isn't used here). + */ + Assert(snapshot); PredicateLockRelation(relation, snapshot); + } /* we only need to set this up once */ scan->rs_ctup.t_tableOid = RelationGetRelid(relation); @@ -1204,10 +1212,21 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params, if (set_params) { - scan->rs_base.rs_allow_strat = allow_strat; - scan->rs_base.rs_allow_sync = allow_sync; - scan->rs_base.rs_pageatatime = - allow_pagemode && IsMVCCSnapshot(scan->rs_base.rs_snapshot); + if (allow_strat) + scan->rs_base.rs_flags |= SO_ALLOW_STRAT; + else + scan->rs_base.rs_flags &= ~SO_ALLOW_STRAT; + + if (allow_sync) + scan->rs_base.rs_flags |= SO_ALLOW_SYNC; + else + scan->rs_base.rs_flags &= ~SO_ALLOW_SYNC; + + if (allow_pagemode && scan->rs_base.rs_snapshot && + IsMVCCSnapshot(scan->rs_base.rs_snapshot)) + scan->rs_base.rs_flags |= SO_ALLOW_PAGEMODE; + else + scan->rs_base.rs_flags &= ~SO_ALLOW_PAGEMODE; } /* @@ -1246,7 +1265,7 @@ heap_endscan(TableScanDesc sscan) if (scan->rs_strategy != NULL) FreeAccessStrategy(scan->rs_strategy); - if (scan->rs_base.rs_temp_snap) + if (scan->rs_base.rs_flags & SO_TEMP_SNAPSHOT) UnregisterSnapshot(scan->rs_base.rs_snapshot); pfree(scan); @@ -1288,7 +1307,7 @@ heap_getnext(TableScanDesc sscan, ScanDirection direction) HEAPDEBUG_1; /* heap_getnext( info ) */ - if (scan->rs_base.rs_pageatatime) + if (scan->rs_base.rs_flags & SO_ALLOW_PAGEMODE) heapgettup_pagemode(scan, direction, scan->rs_base.rs_nkeys, scan->rs_base.rs_key); else @@ -1335,11 +1354,10 @@ heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *s HEAPAMSLOTDEBUG_1; /* heap_getnextslot( info ) */ - if (scan->rs_base.rs_pageatatime) - heapgettup_pagemode(scan, direction, - scan->rs_base.rs_nkeys, scan->rs_base.rs_key); + if (sscan->rs_flags & SO_ALLOW_PAGEMODE) + heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key); else - heapgettup(scan, direction, scan->rs_base.rs_nkeys, scan->rs_base.rs_key); + heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key); if (scan->rs_ctup.t_data == NULL) { diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 35553c7c92d..8d8161fd971 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -2323,7 +2323,7 @@ heapam_scan_sample_next_block(TableScanDesc scan, SampleScanState *scanstate) * a little bit backwards on every invocation, which is confusing. * We don't guarantee any specific ordering in general, though. */ - if (scan->rs_syncscan) + if (scan->rs_flags & SO_ALLOW_SYNC) ss_report_location(scan->rs_rd, blockno); if (blockno == hscan->rs_startblock) @@ -2357,7 +2357,7 @@ heapam_scan_sample_next_tuple(TableScanDesc scan, SampleScanState *scanstate, HeapScanDesc hscan = (HeapScanDesc) scan; TsmRoutine *tsm = scanstate->tsmroutine; BlockNumber blockno = hscan->rs_cblock; - bool pagemode = scan->rs_pageatatime; + bool pagemode = (scan->rs_flags & SO_ALLOW_PAGEMODE) != 0; Page page; bool all_visible; @@ -2504,7 +2504,7 @@ SampleHeapTupleVisible(TableScanDesc scan, Buffer buffer, { HeapScanDesc hscan = (HeapScanDesc) scan; - if (scan->rs_pageatatime) + if (scan->rs_flags & SO_ALLOW_PAGEMODE) { /* * In pageatatime mode, heapgetpage() already did visibility checks, |