aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/storage/buffer/buf_init.c61
-rw-r--r--src/backend/storage/buffer/bufmgr.c57
-rw-r--r--src/backend/storage/lmgr/lwlock.c15
3 files changed, 89 insertions, 44 deletions
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index 3ae2848da05..2a84a1ebade 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -20,6 +20,9 @@
BufferDescPadded *BufferDescriptors;
char *BufferBlocks;
+LWLockMinimallyPadded *BufferIOLWLockArray = NULL;
+LWLockTranche BufferIOLWLockTranche;
+LWLockTranche BufferContentLWLockTranche;
/*
@@ -65,22 +68,45 @@ void
InitBufferPool(void)
{
bool foundBufs,
- foundDescs;
+ foundDescs,
+ foundIOLocks;
/* Align descriptors to a cacheline boundary. */
- BufferDescriptors = (BufferDescPadded *) CACHELINEALIGN(
- ShmemInitStruct("Buffer Descriptors",
- NBuffers * sizeof(BufferDescPadded) + PG_CACHE_LINE_SIZE,
- &foundDescs));
+ BufferDescriptors = (BufferDescPadded *)
+ CACHELINEALIGN(
+ ShmemInitStruct("Buffer Descriptors",
+ NBuffers * sizeof(BufferDescPadded)
+ + PG_CACHE_LINE_SIZE,
+ &foundDescs));
BufferBlocks = (char *)
ShmemInitStruct("Buffer Blocks",
NBuffers * (Size) BLCKSZ, &foundBufs);
- if (foundDescs || foundBufs)
+ /* Align lwlocks to cacheline boundary */
+ BufferIOLWLockArray = (LWLockMinimallyPadded *)
+ CACHELINEALIGN(ShmemInitStruct("Buffer IO Locks",
+ NBuffers * (Size) sizeof(LWLockMinimallyPadded)
+ + PG_CACHE_LINE_SIZE,
+ &foundIOLocks));
+
+ BufferIOLWLockTranche.name = "Buffer IO Locks";
+ BufferIOLWLockTranche.array_base = BufferIOLWLockArray;
+ BufferIOLWLockTranche.array_stride = sizeof(LWLockMinimallyPadded);
+ LWLockRegisterTranche(LWTRANCHE_BUFFER_IO_IN_PROGRESS,
+ &BufferIOLWLockTranche);
+
+ BufferContentLWLockTranche.name = "Buffer Content Locks";
+ BufferContentLWLockTranche.array_base =
+ ((char *) BufferDescriptors) + offsetof(BufferDesc, content_lock);
+ BufferContentLWLockTranche.array_stride = sizeof(BufferDescPadded);
+ LWLockRegisterTranche(LWTRANCHE_BUFFER_CONTENT,
+ &BufferContentLWLockTranche);
+
+ if (foundDescs || foundBufs || foundIOLocks)
{
- /* both should be present or neither */
- Assert(foundDescs && foundBufs);
+ /* should find all of these, or none of them */
+ Assert(foundDescs && foundBufs && foundIOLocks);
/* note: this path is only taken in EXEC_BACKEND case */
}
else
@@ -110,8 +136,11 @@ InitBufferPool(void)
*/
buf->freeNext = i + 1;
- buf->io_in_progress_lock = LWLockAssign();
- buf->content_lock = LWLockAssign();
+ LWLockInitialize(BufferDescriptorGetContentLock(buf),
+ LWTRANCHE_BUFFER_CONTENT);
+
+ LWLockInitialize(BufferDescriptorGetIOLock(buf),
+ LWTRANCHE_BUFFER_IO_IN_PROGRESS);
}
/* Correct last entry of linked list */
@@ -144,5 +173,17 @@ BufferShmemSize(void)
/* size of stuff controlled by freelist.c */
size = add_size(size, StrategyShmemSize());
+ /*
+ * It would be nice to include the I/O locks in the BufferDesc, but that
+ * would increase the size of a BufferDesc to more than one cache line, and
+ * benchmarking has shown that keeping every BufferDesc aligned on a cache
+ * line boundary is important for performance. So, instead, the array of
+ * I/O locks is allocated in a separate tranche. Because those locks are
+ * not highly contentended, we lay out the array with minimal padding.
+ */
+ size = add_size(size, mul_size(NBuffers, sizeof(LWLockMinimallyPadded)));
+ /* to allow aligning the above */
+ size = add_size(size, PG_CACHE_LINE_SIZE);
+
return size;
}
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 0d5fb0db88f..a1ad23ccf52 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -738,7 +738,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
if (!isLocalBuf)
{
if (mode == RBM_ZERO_AND_LOCK)
- LWLockAcquire(bufHdr->content_lock, LW_EXCLUSIVE);
+ LWLockAcquire(BufferDescriptorGetContentLock(bufHdr),
+ LW_EXCLUSIVE);
else if (mode == RBM_ZERO_AND_CLEANUP_LOCK)
LockBufferForCleanup(BufferDescriptorGetBuffer(bufHdr));
}
@@ -879,7 +880,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
if ((mode == RBM_ZERO_AND_LOCK || mode == RBM_ZERO_AND_CLEANUP_LOCK) &&
!isLocalBuf)
{
- LWLockAcquire(bufHdr->content_lock, LW_EXCLUSIVE);
+ LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_EXCLUSIVE);
}
if (isLocalBuf)
@@ -1045,7 +1046,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* happens to be trying to split the page the first one got from
* StrategyGetBuffer.)
*/
- if (LWLockConditionalAcquire(buf->content_lock, LW_SHARED))
+ if (LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf),
+ LW_SHARED))
{
/*
* If using a nondefault strategy, and writing the buffer
@@ -1067,7 +1069,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
StrategyRejectBuffer(strategy, buf))
{
/* Drop lock/pin and loop around for another buffer */
- LWLockRelease(buf->content_lock);
+ LWLockRelease(BufferDescriptorGetContentLock(buf));
UnpinBuffer(buf, true);
continue;
}
@@ -1080,7 +1082,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
smgr->smgr_rnode.node.relNode);
FlushBuffer(buf, NULL);
- LWLockRelease(buf->content_lock);
+ LWLockRelease(BufferDescriptorGetContentLock(buf));
TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(forkNum, blockNum,
smgr->smgr_rnode.node.spcNode,
@@ -1395,7 +1397,7 @@ MarkBufferDirty(Buffer buffer)
Assert(BufferIsPinned(buffer));
/* unfortunately we can't check if the lock is held exclusively */
- Assert(LWLockHeldByMe(bufHdr->content_lock));
+ Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr)));
LockBufHdr(bufHdr);
@@ -1595,8 +1597,8 @@ UnpinBuffer(BufferDesc *buf, bool fixOwner)
if (ref->refcount == 0)
{
/* I'd better not still hold any locks on the buffer */
- Assert(!LWLockHeldByMe(buf->content_lock));
- Assert(!LWLockHeldByMe(buf->io_in_progress_lock));
+ Assert(!LWLockHeldByMe(BufferDescriptorGetContentLock(buf)));
+ Assert(!LWLockHeldByMe(BufferDescriptorGetIOLock(buf)));
LockBufHdr(buf);
@@ -2116,11 +2118,11 @@ SyncOneBuffer(int buf_id, bool skip_recently_used)
* buffer is clean by the time we've locked it.)
*/
PinBuffer_Locked(bufHdr);
- LWLockAcquire(bufHdr->content_lock, LW_SHARED);
+ LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
FlushBuffer(bufHdr, NULL);
- LWLockRelease(bufHdr->content_lock);
+ LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
UnpinBuffer(bufHdr, true);
return result | BUF_WRITTEN;
@@ -2926,9 +2928,9 @@ FlushRelationBuffers(Relation rel)
(bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
{
PinBuffer_Locked(bufHdr);
- LWLockAcquire(bufHdr->content_lock, LW_SHARED);
+ LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
FlushBuffer(bufHdr, rel->rd_smgr);
- LWLockRelease(bufHdr->content_lock);
+ LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
UnpinBuffer(bufHdr, true);
}
else
@@ -2978,9 +2980,9 @@ FlushDatabaseBuffers(Oid dbid)
(bufHdr->flags & BM_VALID) && (bufHdr->flags & BM_DIRTY))
{
PinBuffer_Locked(bufHdr);
- LWLockAcquire(bufHdr->content_lock, LW_SHARED);
+ LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
FlushBuffer(bufHdr, NULL);
- LWLockRelease(bufHdr->content_lock);
+ LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
UnpinBuffer(bufHdr, true);
}
else
@@ -3004,7 +3006,7 @@ FlushOneBuffer(Buffer buffer)
bufHdr = GetBufferDescriptor(buffer - 1);
- Assert(LWLockHeldByMe(bufHdr->content_lock));
+ Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr)));
FlushBuffer(bufHdr, NULL);
}
@@ -3101,7 +3103,7 @@ MarkBufferDirtyHint(Buffer buffer, bool buffer_std)
Assert(GetPrivateRefCount(buffer) > 0);
/* here, either share or exclusive lock is OK */
- Assert(LWLockHeldByMe(bufHdr->content_lock));
+ Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr)));
/*
* This routine might get called many times on the same page, if we are
@@ -3254,11 +3256,11 @@ LockBuffer(Buffer buffer, int mode)
buf = GetBufferDescriptor(buffer - 1);
if (mode == BUFFER_LOCK_UNLOCK)
- LWLockRelease(buf->content_lock);
+ LWLockRelease(BufferDescriptorGetContentLock(buf));
else if (mode == BUFFER_LOCK_SHARE)
- LWLockAcquire(buf->content_lock, LW_SHARED);
+ LWLockAcquire(BufferDescriptorGetContentLock(buf), LW_SHARED);
else if (mode == BUFFER_LOCK_EXCLUSIVE)
- LWLockAcquire(buf->content_lock, LW_EXCLUSIVE);
+ LWLockAcquire(BufferDescriptorGetContentLock(buf), LW_EXCLUSIVE);
else
elog(ERROR, "unrecognized buffer lock mode: %d", mode);
}
@@ -3279,7 +3281,8 @@ ConditionalLockBuffer(Buffer buffer)
buf = GetBufferDescriptor(buffer - 1);
- return LWLockConditionalAcquire(buf->content_lock, LW_EXCLUSIVE);
+ return LWLockConditionalAcquire(BufferDescriptorGetContentLock(buf),
+ LW_EXCLUSIVE);
}
/*
@@ -3489,8 +3492,8 @@ WaitIO(BufferDesc *buf)
UnlockBufHdr(buf);
if (!(sv_flags & BM_IO_IN_PROGRESS))
break;
- LWLockAcquire(buf->io_in_progress_lock, LW_SHARED);
- LWLockRelease(buf->io_in_progress_lock);
+ LWLockAcquire(BufferDescriptorGetIOLock(buf), LW_SHARED);
+ LWLockRelease(BufferDescriptorGetIOLock(buf));
}
}
@@ -3523,7 +3526,7 @@ StartBufferIO(BufferDesc *buf, bool forInput)
* Grab the io_in_progress lock so that other processes can wait for
* me to finish the I/O.
*/
- LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE);
+ LWLockAcquire(BufferDescriptorGetIOLock(buf), LW_EXCLUSIVE);
LockBufHdr(buf);
@@ -3537,7 +3540,7 @@ StartBufferIO(BufferDesc *buf, bool forInput)
* him to get unwedged.
*/
UnlockBufHdr(buf);
- LWLockRelease(buf->io_in_progress_lock);
+ LWLockRelease(BufferDescriptorGetIOLock(buf));
WaitIO(buf);
}
@@ -3547,7 +3550,7 @@ StartBufferIO(BufferDesc *buf, bool forInput)
{
/* someone else already did the I/O */
UnlockBufHdr(buf);
- LWLockRelease(buf->io_in_progress_lock);
+ LWLockRelease(BufferDescriptorGetIOLock(buf));
return false;
}
@@ -3595,7 +3598,7 @@ TerminateBufferIO(BufferDesc *buf, bool clear_dirty, int set_flag_bits)
InProgressBuf = NULL;
- LWLockRelease(buf->io_in_progress_lock);
+ LWLockRelease(BufferDescriptorGetIOLock(buf));
}
/*
@@ -3620,7 +3623,7 @@ AbortBufferIO(void)
* we can use TerminateBufferIO. Anyone who's executing WaitIO on the
* buffer will be in a busy spin until we succeed in doing this.
*/
- LWLockAcquire(buf->io_in_progress_lock, LW_EXCLUSIVE);
+ LWLockAcquire(BufferDescriptorGetIOLock(buf), LW_EXCLUSIVE);
LockBufHdr(buf);
Assert(buf->flags & BM_IO_IN_PROGRESS);
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 84691df053b..d43fb61edb3 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -344,18 +344,15 @@ NumLWLocks(void)
int numLocks;
/*
- * Possibly this logic should be spread out among the affected modules,
- * the same way that shmem space estimation is done. But for now, there
- * are few enough users of LWLocks that we can get away with just keeping
- * the knowledge here.
+ * Many users of LWLocks no longer reserve space in the main array here,
+ * but instead allocate separate tranches. The latter approach has the
+ * advantage of allowing LWLOCK_STATS and LOCK_DEBUG output to produce
+ * more useful output.
*/
/* Predefined LWLocks */
numLocks = NUM_FIXED_LWLOCKS;
- /* bufmgr.c needs two for each shared buffer */
- numLocks += 2 * NBuffers;
-
/* proc.c needs one for each backend or auxiliary process */
numLocks += MaxBackends + NUM_AUXILIARY_PROCS;
@@ -423,6 +420,10 @@ CreateLWLocks(void)
StaticAssertExpr(LW_VAL_EXCLUSIVE > (uint32) MAX_BACKENDS,
"MAX_BACKENDS too big for lwlock.c");
+ StaticAssertExpr(sizeof(LWLock) <= LWLOCK_MINIMAL_SIZE &&
+ sizeof(LWLock) <= LWLOCK_PADDED_SIZE,
+ "Miscalculated LWLock padding");
+
if (!IsUnderPostmaster)
{
int numLocks = NumLWLocks();