aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/src/sgml/ref/create_table.sgml3
-rw-r--r--src/backend/access/gist/gist.c24
-rw-r--r--src/backend/access/gist/gistbuild.c12
-rw-r--r--src/backend/access/gist/gistutil.c30
-rw-r--r--src/backend/access/gist/gistvacuum.c2
-rw-r--r--src/backend/access/transam/xlog.c51
-rw-r--r--src/backend/storage/buffer/bufmgr.c19
-rw-r--r--src/bin/pg_controldata/pg_controldata.c3
-rw-r--r--src/bin/pg_resetxlog/pg_resetxlog.c1
-rw-r--r--src/include/access/gist_private.h2
-rw-r--r--src/include/access/xlog.h1
-rw-r--r--src/include/catalog/pg_control.h4
12 files changed, 121 insertions, 31 deletions
diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml
index 8872920446a..af11eb05a65 100644
--- a/doc/src/sgml/ref/create_table.sgml
+++ b/doc/src/sgml/ref/create_table.sgml
@@ -182,8 +182,7 @@ CREATE [ [ GLOBAL | LOCAL ] { TEMPORARY | TEMP } | UNLOGGED ] TABLE [ IF NOT EXI
automatically truncated after a crash or unclean shutdown. The contents
of an unlogged table are also not replicated to standby servers.
Any indexes created on an unlogged table are automatically unlogged as
- well; however, unlogged <link linkend="GiST">GiST indexes</link> are
- currently not supported and cannot be created on an unlogged table.
+ well.
</para>
</listitem>
</varlistentry>
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index e2d3390300e..eba95f18664 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -16,6 +16,7 @@
#include "access/genam.h"
#include "access/gist_private.h"
+#include "access/heapam_xlog.h"
#include "catalog/index.h"
#include "catalog/pg_collation.h"
#include "miscadmin.h"
@@ -71,9 +72,22 @@ createTempGistContext(void)
Datum
gistbuildempty(PG_FUNCTION_ARGS)
{
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("unlogged GiST indexes are not supported")));
+ Relation index = (Relation) PG_GETARG_POINTER(0);
+ Buffer buffer;
+
+ /* Initialize the root page */
+ buffer = ReadBufferExtended(index, INIT_FORKNUM, P_NEW, RBM_NORMAL, NULL);
+ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
+
+ /* Initialize and xlog buffer */
+ START_CRIT_SECTION();
+ GISTInitBuffer(buffer, F_LEAF);
+ MarkBufferDirty(buffer);
+ log_newpage_buffer(buffer);
+ END_CRIT_SECTION();
+
+ /* Unlock and release the buffer */
+ UnlockReleaseBuffer(buffer);
PG_RETURN_VOID();
}
@@ -391,7 +405,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
dist, oldrlink, oldnsn, leftchildbuf,
markfollowright);
else
- recptr = GetXLogRecPtrForTemp();
+ recptr = gistGetFakeLSN(rel);
for (ptr = dist; ptr; ptr = ptr->next)
{
@@ -448,7 +462,7 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
}
else
{
- recptr = GetXLogRecPtrForTemp();
+ recptr = gistGetFakeLSN(rel);
PageSetLSN(page, recptr);
}
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index aec5b524809..0cf22cdf3a8 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -158,16 +158,6 @@ gistbuild(PG_FUNCTION_ARGS)
elog(ERROR, "index \"%s\" already contains data",
RelationGetRelationName(index));
- /*
- * We can't yet handle unlogged GiST indexes, because we depend on LSNs.
- * This is duplicative of an error in gistbuildempty, but we want to check
- * here so as to throw error before doing all the index-build work.
- */
- if (heap->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("unlogged GiST indexes are not supported")));
-
/* no locking is needed */
buildstate.giststate = initGISTstate(index);
@@ -204,7 +194,7 @@ gistbuild(PG_FUNCTION_ARGS)
PageSetTLI(page, ThisTimeLineID);
}
else
- PageSetLSN(page, GetXLogRecPtrForTemp());
+ PageSetLSN(page, gistGetFakeLSN(heap));
UnlockReleaseBuffer(buffer);
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index e5c3d69fca7..f7d50ddb712 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -798,16 +798,30 @@ gistoptions(PG_FUNCTION_ARGS)
}
/*
- * Temporary GiST indexes are not WAL-logged, but we need LSNs to detect
- * concurrent page splits anyway. GetXLogRecPtrForTemp() provides a fake
- * sequence of LSNs for that purpose. Each call generates an LSN that is
- * greater than any previous value returned by this function in the same
- * session.
+ * Temporary and unlogged GiST indexes are not WAL-logged, but we need LSNs
+ * to detect concurrent page splits anyway. This function provides a fake
+ * sequence of LSNs for that purpose.
*/
XLogRecPtr
-GetXLogRecPtrForTemp(void)
+gistGetFakeLSN(Relation rel)
{
static XLogRecPtr counter = 1;
- counter++;
- return counter;
+
+ if (rel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
+ {
+ /*
+ * Temporary relations are only accessible in our session, so a
+ * simple backend-local counter will do.
+ */
+ return counter++;
+ }
+ else
+ {
+ /*
+ * Unlogged relations are accessible from other backends, and survive
+ * (clean) restarts. GetFakeLSNForUnloggedRel() handles that for us.
+ */
+ Assert(rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED);
+ return GetFakeLSNForUnloggedRel();
+ }
}
diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c
index b5be6765d4e..1d9f8320166 100644
--- a/src/backend/access/gist/gistvacuum.c
+++ b/src/backend/access/gist/gistvacuum.c
@@ -238,7 +238,7 @@ gistbulkdelete(PG_FUNCTION_ARGS)
PageSetTLI(page, ThisTimeLineID);
}
else
- PageSetLSN(page, GetXLogRecPtrForTemp());
+ PageSetLSN(page, gistGetFakeLSN(rel));
END_CRIT_SECTION();
}
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 140f9109a6f..479c14da902 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -391,6 +391,10 @@ typedef struct XLogCtlData
XLogRecPtr asyncXactLSN; /* LSN of newest async commit/abort */
XLogSegNo lastRemovedSegNo; /* latest removed/recycled XLOG segment */
+ /* Fake LSN counter, for unlogged relations. Protected by ulsn_lck */
+ XLogRecPtr unloggedLSN;
+ slock_t ulsn_lck;
+
/* Protected by WALWriteLock: */
XLogCtlWrite Write;
@@ -3697,6 +3701,31 @@ GetSystemIdentifier(void)
}
/*
+ * Returns a fake LSN for unlogged relations.
+ *
+ * Each call generates an LSN that is greater than any previous value
+ * returned. The current counter value is saved and restored across clean
+ * shutdowns, but like unlogged relations, does not survive a crash. This can
+ * be used in lieu of real LSN values returned by XLogInsert, if you need an
+ * LSN-like increasing sequence of numbers without writing any WAL.
+ */
+XLogRecPtr
+GetFakeLSNForUnloggedRel(void)
+{
+ XLogRecPtr nextUnloggedLSN;
+
+ /* use volatile pointer to prevent code rearrangement */
+ volatile XLogCtlData *xlogctl = XLogCtl;
+
+ /* increment the unloggedLSN counter, need SpinLock */
+ SpinLockAcquire(&xlogctl->ulsn_lck);
+ nextUnloggedLSN = xlogctl->unloggedLSN++;
+ SpinLockRelease(&xlogctl->ulsn_lck);
+
+ return nextUnloggedLSN;
+}
+
+/*
* Auto-tune the number of XLOG buffers.
*
* The preferred setting for wal_buffers is about 3% of shared_buffers, with
@@ -3844,6 +3873,7 @@ XLOGShmemInit(void)
XLogCtl->WalWriterSleeping = false;
XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
SpinLockInit(&XLogCtl->info_lck);
+ SpinLockInit(&XLogCtl->ulsn_lck);
InitSharedLatch(&XLogCtl->recoveryWakeupLatch);
/*
@@ -3989,6 +4019,7 @@ BootStrapXLOG(void)
ControlFile->time = checkPoint.time;
ControlFile->checkPoint = checkPoint.redo;
ControlFile->checkPointCopy = checkPoint;
+ ControlFile->unloggedLSN = 1;
/* Set important parameter values for use when replaying WAL */
ControlFile->MaxConnections = MaxConnections;
@@ -5033,6 +5064,16 @@ StartupXLOG(void)
XLogCtl->ckptXid = checkPoint.nextXid;
/*
+ * Initialize unlogged LSN. On a clean shutdown, it's restored from the
+ * control file. On recovery, all unlogged relations are blown away, so
+ * the unlogged LSN counter can be reset too.
+ */
+ if (ControlFile->state == DB_SHUTDOWNED)
+ XLogCtl->unloggedLSN = ControlFile->unloggedLSN;
+ else
+ XLogCtl->unloggedLSN = 1;
+
+ /*
* We must replay WAL entries using the same TimeLineID they were created
* under, so temporarily adopt the TLI indicated by the checkpoint (see
* also xlog_redo()).
@@ -6916,6 +6957,16 @@ CreateCheckPoint(int flags)
/* crash recovery should always recover to the end of WAL */
ControlFile->minRecoveryPoint = InvalidXLogRecPtr;
ControlFile->minRecoveryPointTLI = 0;
+
+ /*
+ * Persist unloggedLSN value. It's reset on crash recovery, so this goes
+ * unused on non-shutdown checkpoints, but seems useful to store it always
+ * for debugging purposes.
+ */
+ SpinLockAcquire(&XLogCtl->ulsn_lck);
+ ControlFile->unloggedLSN = XLogCtl->unloggedLSN;
+ SpinLockRelease(&XLogCtl->ulsn_lck);
+
UpdateControlFile();
LWLockRelease(ControlFileLock);
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 13b80aefc5b..405ff61130e 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -1922,9 +1922,24 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
* Force XLOG flush up to buffer's LSN. This implements the basic WAL
* rule that log updates must hit disk before any of the data-file changes
* they describe do.
+ *
+ * However, this rule does not apply to unlogged relations, which will be
+ * lost after a crash anyway. Most unlogged relation pages do not bear
+ * LSNs since we never emit WAL records for them, and therefore flushing
+ * up through the buffer LSN would be useless, but harmless. However, GiST
+ * indexes use LSNs internally to track page-splits, and therefore unlogged
+ * GiST pages bear "fake" LSNs generated by GetFakeLSNForUnloggedRel. It
+ * is unlikely but possible that the fake LSN counter could advance past
+ * the WAL insertion point; and if it did happen, attempting to flush WAL
+ * through that location would fail, with disastrous system-wide
+ * consequences. To make sure that can't happen, skip the flush if the
+ * buffer isn't permanent.
*/
- recptr = BufferGetLSN(buf);
- XLogFlush(recptr);
+ if (buf->flags & BM_PERMANENT)
+ {
+ recptr = BufferGetLSN(buf);
+ XLogFlush(recptr);
+ }
/*
* Now it's safe to write buffer to disk. Note that no one else should
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c
index 33725154fd7..cab25684d96 100644
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -240,6 +240,9 @@ main(int argc, char *argv[])
ControlFile.checkPointCopy.oldestMultiDB);
printf(_("Time of latest checkpoint: %s\n"),
ckpttime_str);
+ printf(_("Fake LSN counter for unlogged rels: %X/%X\n"),
+ (uint32) (ControlFile.unloggedLSN >> 32),
+ (uint32) ControlFile.unloggedLSN);
printf(_("Min recovery ending location: %X/%X\n"),
(uint32) (ControlFile.minRecoveryPoint >> 32),
(uint32) ControlFile.minRecoveryPoint);
diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c
index 272813eaabf..317d8606a0b 100644
--- a/src/bin/pg_resetxlog/pg_resetxlog.c
+++ b/src/bin/pg_resetxlog/pg_resetxlog.c
@@ -510,6 +510,7 @@ GuessControlValues(void)
ControlFile.state = DB_SHUTDOWNED;
ControlFile.time = (pg_time_t) time(NULL);
ControlFile.checkPoint = ControlFile.checkPointCopy.redo;
+ ControlFile.unloggedLSN = 1;
/* minRecoveryPoint, backupStartPoint and backupEndPoint can be left zero */
diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h
index c2f9031b4fe..cae6dbc225c 100644
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -512,7 +512,7 @@ extern void gistMakeUnionKey(GISTSTATE *giststate, int attno,
GISTENTRY *entry2, bool isnull2,
Datum *dst, bool *dstisnull);
-extern XLogRecPtr GetXLogRecPtrForTemp(void);
+extern XLogRecPtr gistGetFakeLSN(Relation rel);
/* gistvacuum.c */
extern Datum gistbulkdelete(PG_FUNCTION_ARGS);
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 72e32425964..8a65492a346 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -294,6 +294,7 @@ extern char *XLogFileNameP(TimeLineID tli, XLogSegNo segno);
extern void UpdateControlFile(void);
extern uint64 GetSystemIdentifier(void);
+extern XLogRecPtr GetFakeLSNForUnloggedRel(void);
extern Size XLOGShmemSize(void);
extern void XLOGShmemInit(void);
extern void BootStrapXLOG(void);
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h
index 0c647e77ad7..306d1888540 100644
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -21,7 +21,7 @@
/* Version identifier for this pg_control format */
-#define PG_CONTROL_VERSION 934
+#define PG_CONTROL_VERSION 935
/*
* Body of CheckPoint XLOG records. This is declared here because we keep
@@ -126,6 +126,8 @@ typedef struct ControlFileData
CheckPoint checkPointCopy; /* copy of last check point record */
+ XLogRecPtr unloggedLSN; /* current fake LSN value, for unlogged rels */
+
/*
* These two values determine the minimum point we must recover up to
* before starting up: