aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/nbtree/nbtsort.c30
-rw-r--r--src/backend/commands/tablecmds.c27
-rw-r--r--src/backend/storage/smgr/smgr.c13
3 files changed, 46 insertions, 24 deletions
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 28f147fcafe..1852aef6723 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -56,7 +56,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.85 2004/07/21 22:31:20 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.86 2004/08/15 23:44:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -322,16 +322,15 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
wstate->btws_zeropage = (Page) palloc0(BLCKSZ);
smgrwrite(wstate->index->rd_smgr, wstate->btws_pages_written++,
(char *) wstate->btws_zeropage,
- !wstate->btws_use_wal);
+ true);
}
/*
- * Now write the page. If not using WAL, say isTemp = true, to suppress
- * duplicate fsync. If we are using WAL, it surely isn't a temp index,
- * so !use_wal is a sufficient condition.
+ * Now write the page. We say isTemp = true even if it's not a
+ * temp index, because there's no need for smgr to schedule an fsync
+ * for this write; we'll do it ourselves before ending the build.
*/
- smgrwrite(wstate->index->rd_smgr, blkno, (char *) page,
- !wstate->btws_use_wal);
+ smgrwrite(wstate->index->rd_smgr, blkno, (char *) page, true);
if (blkno == wstate->btws_pages_written)
wstate->btws_pages_written++;
@@ -802,9 +801,20 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
_bt_uppershutdown(wstate, state);
/*
- * If we weren't using WAL, and the index isn't temp, we must fsync it
- * down to disk before it's safe to commit the transaction.
+ * If the index isn't temp, we must fsync it down to disk before it's
+ * safe to commit the transaction. (For a temp index we don't care
+ * since the index will be uninteresting after a crash anyway.)
+ *
+ * It's obvious that we must do this when not WAL-logging the build.
+ * It's less obvious that we have to do it even if we did WAL-log the
+ * index pages. The reason is that since we're building outside
+ * shared buffers, a CHECKPOINT occurring during the build has no way
+ * to flush the previously written data to disk (indeed it won't know
+ * the index even exists). A crash later on would replay WAL from the
+ * checkpoint, therefore it wouldn't replay our earlier WAL entries.
+ * If we do not fsync those pages here, they might still not be on disk
+ * when the crash occurs.
*/
- if (!wstate->btws_use_wal && !wstate->index->rd_istemp)
+ if (!wstate->index->rd_istemp)
smgrimmedsync(wstate->index->rd_smgr);
}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index 87a2fb4856d..a7eac55f252 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.125 2004/08/13 04:50:28 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.126 2004/08/15 23:44:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -5479,18 +5479,29 @@ copy_relation_data(Relation rel, SMgrRelation dst)
}
/*
- * Now write the page. If not using WAL, say isTemp = true, to
- * suppress duplicate fsync. If we are using WAL, it surely isn't a
- * temp rel, so !use_wal is a sufficient condition.
+ * Now write the page. We say isTemp = true even if it's not a
+ * temp rel, because there's no need for smgr to schedule an fsync
+ * for this write; we'll do it ourselves below.
*/
- smgrwrite(dst, blkno, buf, !use_wal);
+ smgrwrite(dst, blkno, buf, true);
}
/*
- * If we weren't using WAL, and the rel isn't temp, we must fsync it
- * down to disk before it's safe to commit the transaction.
+ * If the rel isn't temp, we must fsync it down to disk before it's
+ * safe to commit the transaction. (For a temp rel we don't care
+ * since the rel will be uninteresting after a crash anyway.)
+ *
+ * It's obvious that we must do this when not WAL-logging the copy.
+ * It's less obvious that we have to do it even if we did WAL-log the
+ * copied pages. The reason is that since we're copying outside
+ * shared buffers, a CHECKPOINT occurring during the copy has no way
+ * to flush the previously written data to disk (indeed it won't know
+ * the new rel even exists). A crash later on would replay WAL from the
+ * checkpoint, therefore it wouldn't replay our earlier WAL entries.
+ * If we do not fsync those pages here, they might still not be on disk
+ * when the crash occurs.
*/
- if (!use_wal && !rel->rd_istemp)
+ if (!rel->rd_istemp)
smgrimmedsync(dst);
}
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index c9efd6341bc..aba6b4a7d8e 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.77 2004/07/17 03:28:55 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.78 2004/08/15 23:44:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -621,14 +621,15 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
*
* Synchronously force all of the specified relation down to disk.
*
- * This is really only useful for non-WAL-logged index building:
- * instead of incrementally WAL-logging the index build steps,
- * we can just write completed index pages to disk with smgrwrite
+ * This is useful for building completely new relations (eg, new
+ * indexes). Instead of incrementally WAL-logging the index build
+ * steps, we can just write completed index pages to disk with smgrwrite
* or smgrextend, and then fsync the completed index file before
* committing the transaction. (This is sufficient for purposes of
* crash recovery, since it effectively duplicates forcing a checkpoint
- * for the completed index. But it is *not* workable if one wishes
- * to use the WAL log for PITR or replication purposes.)
+ * for the completed index. But it is *not* sufficient if one wishes
+ * to use the WAL log for PITR or replication purposes: in that case
+ * we have to make WAL entries as well.)
*
* The preceding writes should specify isTemp = true to avoid
* duplicative fsyncs.