aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/transam/xlog.c
diff options
context:
space:
mode:
authorMichael Paquier <michael@paquier.xyz>2023-07-25 13:38:58 +0900
committerMichael Paquier <michael@paquier.xyz>2023-07-25 13:38:58 +0900
commit71e4cc6b8ec6a08f81973bd387fe575134cd0bdf (patch)
treece6ce6a6caab58d61b1b8c60f7869c09c66b60ed /src/backend/access/transam/xlog.c
parentd38ad8e31dee1a69f4f6d4b6bb4e007e3751a93c (diff)
downloadpostgresql-71e4cc6b8ec6a08f81973bd387fe575134cd0bdf.tar.gz
postgresql-71e4cc6b8ec6a08f81973bd387fe575134cd0bdf.zip
Optimize WAL insertion lock acquisition and release with some atomics
The WAL insertion lock variable insertingAt is currently being read and written with the help of the LWLock wait list lock to avoid any read of torn values. This wait list lock can become a point of contention on a highly concurrent write workloads. This commit switches insertingAt to a 64b atomic variable that provides torn-free reads/writes. On platforms without 64b atomic support, the fallback implementation uses spinlocks to provide the same guarantees for the values read. LWLockWaitForVar(), through LWLockConflictsWithVar(), reads the new value to check if it still needs to wait with a u64 atomic operation. LWLockUpdateVar() updates the variable before waking up the waiters with an exchange_u64 (full memory barrier). LWLockReleaseClearVar() now uses also an exchange_u64 to reset the variable. Before this commit, all these steps relied on LWLockWaitListLock() and LWLockWaitListUnlock(). This reduces contention on LWLock wait list lock and improves performance of highly-concurrent write workloads. Here are some numbers using pg_logical_emit_message() (HEAD at d6677b93) with various arbitrary record lengths and clients up to 1k on a rather-large machine (64 vCPUs, 512GB of RAM, 16 cores per sockets, 2 sockets), in terms of TPS numbers coming from pgbench: message_size_b | 16 | 64 | 256 | 1024 --------------------+--------+--------+--------+------- patch_4_clients | 83830 | 82929 | 80478 | 73131 patch_16_clients | 267655 | 264973 | 250566 | 213985 patch_64_clients | 380423 | 378318 | 356907 | 294248 patch_256_clients | 360915 | 354436 | 326209 | 263664 patch_512_clients | 332654 | 321199 | 287521 | 240128 patch_1024_clients | 288263 | 276614 | 258220 | 217063 patch_2048_clients | 252280 | 243558 | 230062 | 192429 patch_4096_clients | 212566 | 213654 | 205951 | 166955 head_4_clients | 83686 | 83766 | 81233 | 73749 head_16_clients | 266503 | 265546 | 249261 | 213645 head_64_clients | 366122 | 363462 | 341078 | 261707 head_256_clients | 132600 | 132573 | 134392 | 165799 head_512_clients | 118937 | 114332 | 116860 | 150672 head_1024_clients | 133546 | 115256 | 125236 | 151390 head_2048_clients | 137877 | 117802 | 120909 | 138165 head_4096_clients | 113440 | 115611 | 120635 | 114361 Bharath has been measuring similar improvements, where the limit of the WAL insertion lock begins to be felt when more than 256 concurrent clients are involved in this specific workload. An extra patch has been discussed to introduce a fast-exit path in LWLockUpdateVar() when there are no waiters, still this does not influence the write-heavy workload cases discussed as there are always waiters. This will be considered separately. Author: Bharath Rupireddy Reviewed-by: Nathan Bossart, Andres Freund, Michael Paquier Discussion: https://postgr.es/m/CALj2ACVF+6jLvqKe6xhDzCCkr=rfd6upaGc3477Pji1Ke9G7Bg@mail.gmail.com
Diffstat (limited to 'src/backend/access/transam/xlog.c')
-rw-r--r--src/backend/access/transam/xlog.c4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 8b0710abe60..f7d4750fc0b 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -376,7 +376,7 @@ typedef struct XLogwrtResult
typedef struct
{
LWLock lock;
- XLogRecPtr insertingAt;
+ pg_atomic_uint64 insertingAt;
XLogRecPtr lastImportantAt;
} WALInsertLock;
@@ -4611,7 +4611,7 @@ XLOGShmemInit(void)
for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
{
LWLockInitialize(&WALInsertLocks[i].l.lock, LWTRANCHE_WAL_INSERT);
- WALInsertLocks[i].l.insertingAt = InvalidXLogRecPtr;
+ pg_atomic_init_u64(&WALInsertLocks[i].l.insertingAt, InvalidXLogRecPtr);
WALInsertLocks[i].l.lastImportantAt = InvalidXLogRecPtr;
}