aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/gin/ginscan.c4
-rw-r--r--src/backend/access/gist/gistget.c4
-rw-r--r--src/backend/access/hash/hashsearch.c4
-rw-r--r--src/backend/access/heap/heapam.c40
-rw-r--r--src/backend/access/index/genam.c4
-rw-r--r--src/backend/access/index/indexam.c12
-rw-r--r--src/backend/access/nbtree/nbtsearch.c4
-rw-r--r--src/backend/access/transam/twophase.c5
-rw-r--r--src/backend/access/transam/twophase_rmgr.c12
-rw-r--r--src/backend/access/transam/xact.c15
-rw-r--r--src/backend/executor/nodeBitmapHeapscan.c8
-rw-r--r--src/backend/postmaster/bgwriter.c14
-rw-r--r--src/backend/postmaster/pgstat.c836
-rw-r--r--src/backend/storage/buffer/bufmgr.c12
-rw-r--r--src/backend/utils/cache/relcache.c4
-rw-r--r--src/include/access/heapam.h6
-rw-r--r--src/include/access/relscan.h6
-rw-r--r--src/include/access/twophase_rmgr.h5
-rw-r--r--src/include/pgstat.h206
-rw-r--r--src/include/utils/rel.h15
20 files changed, 802 insertions, 414 deletions
diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c
index 22896bc5d77..2eb1ba95b4b 100644
--- a/src/backend/access/gin/ginscan.c
+++ b/src/backend/access/gin/ginscan.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.9 2007/01/31 15:09:45 teodor Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.10 2007/05/27 03:50:38 tgl Exp $
*-------------------------------------------------------------------------
*/
@@ -189,7 +189,7 @@ newScanKey(IndexScanDesc scan)
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("GIN index does not support search with void query")));
- pgstat_count_index_scan(&scan->xs_pgstat_info);
+ pgstat_count_index_scan(scan->indexRelation);
}
Datum
diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c
index 226812322aa..ed839de4034 100644
--- a/src/backend/access/gist/gistget.c
+++ b/src/backend/access/gist/gistget.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.65 2007/04/06 22:33:41 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gist/gistget.c,v 1.66 2007/05/27 03:50:38 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -165,7 +165,7 @@ gistnext(IndexScanDesc scan, ScanDirection dir, ItemPointer tids,
stk->next = NULL;
stk->block = GIST_ROOT_BLKNO;
- pgstat_count_index_scan(&scan->xs_pgstat_info);
+ pgstat_count_index_scan(scan->indexRelation);
}
else if (so->curbuf == InvalidBuffer)
{
diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c
index 5de0f402297..104a0c14de3 100644
--- a/src/backend/access/hash/hashsearch.c
+++ b/src/backend/access/hash/hashsearch.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.49 2007/05/03 16:45:58 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/hash/hashsearch.c,v 1.50 2007/05/27 03:50:38 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -127,7 +127,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
ItemPointer current;
OffsetNumber offnum;
- pgstat_count_index_scan(&scan->xs_pgstat_info);
+ pgstat_count_index_scan(rel);
current = &(so->hashso_curpos);
ItemPointerSetInvalid(current);
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index ee2be7cfdb1..9edeaff1306 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.232 2007/04/08 01:26:27 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.233 2007/05/27 03:50:38 tgl Exp $
*
*
* INTERFACE ROUTINES
@@ -100,7 +100,7 @@ initscan(HeapScanDesc scan, ScanKey key)
if (key != NULL)
memcpy(scan->rs_key, key, scan->rs_nkeys * sizeof(ScanKeyData));
- pgstat_count_heap_scan(&scan->rs_pgstat_info);
+ pgstat_count_heap_scan(scan->rs_rd);
}
/*
@@ -701,6 +701,8 @@ relation_open(Oid relationId, LOCKMODE lockmode)
if (!RelationIsValid(r))
elog(ERROR, "could not open relation with OID %u", relationId);
+ pgstat_initstats(r);
+
return r;
}
@@ -743,6 +745,8 @@ try_relation_open(Oid relationId, LOCKMODE lockmode)
if (!RelationIsValid(r))
elog(ERROR, "could not open relation with OID %u", relationId);
+ pgstat_initstats(r);
+
return r;
}
@@ -787,6 +791,8 @@ relation_open_nowait(Oid relationId, LOCKMODE lockmode)
if (!RelationIsValid(r))
elog(ERROR, "could not open relation with OID %u", relationId);
+ pgstat_initstats(r);
+
return r;
}
@@ -873,8 +879,6 @@ heap_open(Oid relationId, LOCKMODE lockmode)
errmsg("\"%s\" is a composite type",
RelationGetRelationName(r))));
- pgstat_initstats(&r->pgstat_info, r);
-
return r;
}
@@ -903,8 +907,6 @@ heap_openrv(const RangeVar *relation, LOCKMODE lockmode)
errmsg("\"%s\" is a composite type",
RelationGetRelationName(r))));
- pgstat_initstats(&r->pgstat_info, r);
-
return r;
}
@@ -954,8 +956,6 @@ heap_beginscan(Relation relation, Snapshot snapshot,
else
scan->rs_key = NULL;
- pgstat_initstats(&scan->rs_pgstat_info, relation);
-
initscan(scan, key);
return scan;
@@ -1059,7 +1059,7 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction)
*/
HEAPDEBUG_3; /* heap_getnext returning tuple */
- pgstat_count_heap_getnext(&scan->rs_pgstat_info);
+ pgstat_count_heap_getnext(scan->rs_rd);
return &(scan->rs_ctup);
}
@@ -1086,6 +1086,10 @@ heap_getnext(HeapScanDesc scan, ScanDirection direction)
* and return it in *userbuf (so the caller must eventually unpin it); when
* keep_buf = false, the pin is released and *userbuf is set to InvalidBuffer.
*
+ * stats_relation is the relation to charge the heap_fetch operation against
+ * for statistical purposes. (This could be the heap rel itself, an
+ * associated index, or NULL to not count the fetch at all.)
+ *
* It is somewhat inconsistent that we ereport() on invalid block number but
* return false on invalid item number. There are a couple of reasons though.
* One is that the caller can relatively easily check the block number for
@@ -1101,12 +1105,12 @@ heap_fetch(Relation relation,
HeapTuple tuple,
Buffer *userbuf,
bool keep_buf,
- PgStat_Info *pgstat_info)
+ Relation stats_relation)
{
/* Assume *userbuf is undefined on entry */
*userbuf = InvalidBuffer;
return heap_release_fetch(relation, snapshot, tuple,
- userbuf, keep_buf, pgstat_info);
+ userbuf, keep_buf, stats_relation);
}
/*
@@ -1125,7 +1129,7 @@ heap_release_fetch(Relation relation,
HeapTuple tuple,
Buffer *userbuf,
bool keep_buf,
- PgStat_Info *pgstat_info)
+ Relation stats_relation)
{
ItemPointer tid = &(tuple->t_self);
ItemId lp;
@@ -1210,9 +1214,9 @@ heap_release_fetch(Relation relation,
*/
*userbuf = buffer;
- /* Count the successful fetch in *pgstat_info, if given. */
- if (pgstat_info != NULL)
- pgstat_count_heap_fetch(pgstat_info);
+ /* Count the successful fetch against appropriate rel, if any */
+ if (stats_relation != NULL)
+ pgstat_count_heap_fetch(stats_relation);
return true;
}
@@ -1517,7 +1521,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
*/
CacheInvalidateHeapTuple(relation, heaptup);
- pgstat_count_heap_insert(&relation->pgstat_info);
+ pgstat_count_heap_insert(relation);
/*
* If heaptup is a private copy, release it. Don't forget to copy t_self
@@ -1807,7 +1811,7 @@ l1:
if (have_tuple_lock)
UnlockTuple(relation, &(tp.t_self), ExclusiveLock);
- pgstat_count_heap_delete(&relation->pgstat_info);
+ pgstat_count_heap_delete(relation);
return HeapTupleMayBeUpdated;
}
@@ -2269,7 +2273,7 @@ l2:
if (have_tuple_lock)
UnlockTuple(relation, &(oldtup.t_self), ExclusiveLock);
- pgstat_count_heap_update(&relation->pgstat_info);
+ pgstat_count_heap_update(relation);
/*
* If heaptup is a private copy, release it. Don't forget to copy t_self
diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c
index 49ffff6e51d..0009739180c 100644
--- a/src/backend/access/index/genam.c
+++ b/src/backend/access/index/genam.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/index/genam.c,v 1.61 2007/01/20 18:43:35 neilc Exp $
+ * $PostgreSQL: pgsql/src/backend/access/index/genam.c,v 1.62 2007/05/27 03:50:38 tgl Exp $
*
* NOTES
* many of the old access method routines have been turned into
@@ -96,8 +96,6 @@ RelationGetIndexScan(Relation indexRelation,
scan->xs_ctup.t_data = NULL;
scan->xs_cbuf = InvalidBuffer;
- pgstat_initstats(&scan->xs_pgstat_info, indexRelation);
-
/*
* Let the AM fill in the key and any opaque data it wants.
*/
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index 23522ba740e..d905013a5fc 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.97 2007/01/05 22:19:23 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.98 2007/05/27 03:50:38 tgl Exp $
*
* INTERFACE ROUTINES
* index_open - open an index relation by relation OID
@@ -145,8 +145,6 @@ index_open(Oid relationId, LOCKMODE lockmode)
errmsg("\"%s\" is not an index",
RelationGetRelationName(r))));
- pgstat_initstats(&r->pgstat_info, r);
-
return r;
}
@@ -433,14 +431,14 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
return NULL; /* failure exit */
}
- pgstat_count_index_tuples(&scan->xs_pgstat_info, 1);
+ pgstat_count_index_tuples(scan->indexRelation, 1);
/*
* Fetch the heap tuple and see if it matches the snapshot.
*/
if (heap_release_fetch(scan->heapRelation, scan->xs_snapshot,
heapTuple, &scan->xs_cbuf, true,
- &scan->xs_pgstat_info))
+ scan->indexRelation))
break;
/* Skip if no undeleted tuple at this location */
@@ -502,7 +500,7 @@ index_getnext_indexitem(IndexScanDesc scan,
Int32GetDatum(direction)));
if (found)
- pgstat_count_index_tuples(&scan->xs_pgstat_info, 1);
+ pgstat_count_index_tuples(scan->indexRelation, 1);
return found;
}
@@ -543,7 +541,7 @@ index_getmulti(IndexScanDesc scan,
Int32GetDatum(max_tids),
PointerGetDatum(returned_tids)));
- pgstat_count_index_tuples(&scan->xs_pgstat_info, *returned_tids);
+ pgstat_count_index_tuples(scan->indexRelation, *returned_tids);
return found;
}
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 036a97a8d04..b947d770aa2 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.112 2007/04/06 22:33:42 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.113 2007/05/27 03:50:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -453,7 +453,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
int i;
StrategyNumber strat_total;
- pgstat_count_index_scan(&scan->xs_pgstat_info);
+ pgstat_count_index_scan(rel);
/*
* Examine the scan keys and eliminate any redundant keys; also mark the
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 6f495a84087..7fdf5a7eed3 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.30 2007/04/30 21:01:52 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.31 2007/05/27 03:50:39 tgl Exp $
*
* NOTES
* Each global transaction is associated with a global transaction
@@ -1211,7 +1211,8 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
else
ProcessRecords(bufptr, xid, twophase_postabort_callbacks);
- pgstat_count_xact_commit();
+ /* Count the prepared xact as committed or aborted */
+ AtEOXact_PgStat(isCommit);
/*
* And now we can clean up our mess.
diff --git a/src/backend/access/transam/twophase_rmgr.c b/src/backend/access/transam/twophase_rmgr.c
index e93bac7b2d8..9c2f14a1a38 100644
--- a/src/backend/access/transam/twophase_rmgr.c
+++ b/src/backend/access/transam/twophase_rmgr.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/transam/twophase_rmgr.c,v 1.4 2007/01/05 22:19:23 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/twophase_rmgr.c,v 1.5 2007/05/27 03:50:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -16,6 +16,7 @@
#include "access/twophase_rmgr.h"
#include "commands/async.h"
+#include "pgstat.h"
#include "storage/lock.h"
#include "utils/flatfiles.h"
#include "utils/inval.h"
@@ -27,7 +28,8 @@ const TwoPhaseCallback twophase_recover_callbacks[TWOPHASE_RM_MAX_ID + 1] =
lock_twophase_recover, /* Lock */
NULL, /* Inval */
NULL, /* flat file update */
- NULL /* notify/listen */
+ NULL, /* notify/listen */
+ NULL /* pgstat */
};
const TwoPhaseCallback twophase_postcommit_callbacks[TWOPHASE_RM_MAX_ID + 1] =
@@ -36,7 +38,8 @@ const TwoPhaseCallback twophase_postcommit_callbacks[TWOPHASE_RM_MAX_ID + 1] =
lock_twophase_postcommit, /* Lock */
inval_twophase_postcommit, /* Inval */
flatfile_twophase_postcommit, /* flat file update */
- notify_twophase_postcommit /* notify/listen */
+ notify_twophase_postcommit, /* notify/listen */
+ pgstat_twophase_postcommit /* pgstat */
};
const TwoPhaseCallback twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID + 1] =
@@ -45,5 +48,6 @@ const TwoPhaseCallback twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID + 1] =
lock_twophase_postabort, /* Lock */
NULL, /* Inval */
NULL, /* flat file update */
- NULL /* notify/listen */
+ NULL, /* notify/listen */
+ pgstat_twophase_postabort /* pgstat */
};
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index f2685ee0b34..c16b4fa6be9 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.242 2007/04/30 21:01:52 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.243 2007/05/27 03:50:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1661,8 +1661,7 @@ CommitTransaction(void)
AtEOXact_Files();
AtEOXact_ComboCid();
AtEOXact_HashTables(true);
- pgstat_clear_snapshot();
- pgstat_count_xact_commit();
+ AtEOXact_PgStat(true);
pgstat_report_txn_timestamp(0);
CurrentResourceOwner = NULL;
@@ -1796,6 +1795,7 @@ PrepareTransaction(void)
AtPrepare_UpdateFlatFiles();
AtPrepare_Inval();
AtPrepare_Locks();
+ AtPrepare_PgStat();
/*
* Here is where we really truly prepare.
@@ -1853,6 +1853,8 @@ PrepareTransaction(void)
/* notify and flatfiles don't need a postprepare call */
+ PostPrepare_PgStat();
+
PostPrepare_Inval();
PostPrepare_smgr();
@@ -1880,7 +1882,7 @@ PrepareTransaction(void)
AtEOXact_Files();
AtEOXact_ComboCid();
AtEOXact_HashTables(true);
- pgstat_clear_snapshot();
+ /* don't call AtEOXact_PgStat here */
CurrentResourceOwner = NULL;
ResourceOwnerDelete(TopTransactionResourceOwner);
@@ -2035,8 +2037,7 @@ AbortTransaction(void)
AtEOXact_Files();
AtEOXact_ComboCid();
AtEOXact_HashTables(false);
- pgstat_clear_snapshot();
- pgstat_count_xact_rollback();
+ AtEOXact_PgStat(false);
pgstat_report_txn_timestamp(0);
/*
@@ -3749,6 +3750,7 @@ CommitSubTransaction(void)
AtEOSubXact_Files(true, s->subTransactionId,
s->parent->subTransactionId);
AtEOSubXact_HashTables(true, s->nestingLevel);
+ AtEOSubXact_PgStat(true, s->nestingLevel);
/*
* We need to restore the upper transaction's read-only state, in case the
@@ -3861,6 +3863,7 @@ AbortSubTransaction(void)
AtEOSubXact_Files(false, s->subTransactionId,
s->parent->subTransactionId);
AtEOSubXact_HashTables(false, s->nestingLevel);
+ AtEOSubXact_PgStat(false, s->nestingLevel);
}
/*
diff --git a/src/backend/executor/nodeBitmapHeapscan.c b/src/backend/executor/nodeBitmapHeapscan.c
index 3e9a91de2f5..07729da2be6 100644
--- a/src/backend/executor/nodeBitmapHeapscan.c
+++ b/src/backend/executor/nodeBitmapHeapscan.c
@@ -21,7 +21,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.16 2007/01/05 22:19:28 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.17 2007/05/27 03:50:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -189,7 +189,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
scan->rs_ctup.t_len = ItemIdGetLength(lp);
ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
- pgstat_count_heap_fetch(&scan->rs_pgstat_info);
+ pgstat_count_heap_fetch(scan->rs_rd);
/*
* Set up the result slot to point to this tuple. Note that the slot
@@ -389,7 +389,7 @@ ExecBitmapHeapReScan(BitmapHeapScanState *node, ExprContext *exprCtxt)
heap_rescan(node->ss.ss_currentScanDesc, NULL);
/* undo bogus "seq scan" count (see notes in ExecInitBitmapHeapScan) */
- pgstat_discount_heap_scan(&node->ss.ss_currentScanDesc->rs_pgstat_info);
+ pgstat_discount_heap_scan(node->ss.ss_currentScanDesc->rs_rd);
if (node->tbm)
tbm_free(node->tbm);
@@ -535,7 +535,7 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
* when we actually aren't doing any such thing. Reverse out the added
* scan count. (Eventually we may want to count bitmap scans separately.)
*/
- pgstat_discount_heap_scan(&scanstate->ss.ss_currentScanDesc->rs_pgstat_info);
+ pgstat_discount_heap_scan(scanstate->ss.ss_currentScanDesc->rs_rd);
/*
* get the scan type from the relation descriptor.
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 273588424eb..10f57f00b8f 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -37,7 +37,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.37 2007/03/30 18:34:55 mha Exp $
+ * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.38 2007/05/27 03:50:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -126,13 +126,6 @@ typedef struct
static BgWriterShmemStruct *BgWriterShmem;
/*
- * BgWriter statistics counters.
- * Stored directly in a stats message structure so it can be sent
- * without needing to copy things around.
- */
-PgStat_MsgBgWriter BgWriterStats;
-
-/*
* GUC parameters
*/
int BgWriterDelay = 200;
@@ -251,11 +244,6 @@ BackgroundWriterMain(void)
MemoryContextSwitchTo(bgwriter_context);
/*
- * Initialize statistics counters to zero
- */
- memset(&BgWriterStats, 0, sizeof(BgWriterStats));
-
- /*
* If an exception is encountered, processing resumes here.
*
* See notes in postgres.c about the design of this coding.
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 1fac5af284b..b41a16de44c 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -13,7 +13,7 @@
*
* Copyright (c) 2001-2007, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.155 2007/04/30 16:37:08 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/postmaster/pgstat.c,v 1.156 2007/05/27 03:50:39 tgl Exp $
* ----------
*/
#include "postgres.h"
@@ -39,6 +39,7 @@
#include "access/heapam.h"
#include "access/transam.h"
+#include "access/twophase_rmgr.h"
#include "access/xact.h"
#include "catalog/pg_database.h"
#include "libpq/ip.h"
@@ -98,6 +99,13 @@ bool pgstat_collect_tuplelevel = false;
bool pgstat_collect_blocklevel = false;
bool pgstat_collect_querystring = false;
+/*
+ * BgWriter global statistics counters (unused in other processes).
+ * Stored directly in a stats message structure so it can be sent
+ * without needing to copy things around. We assume this inits to zeroes.
+ */
+PgStat_MsgBgWriter BgWriterStats;
+
/* ----------
* Local data
* ----------
@@ -111,44 +119,64 @@ static time_t last_pgstat_start_time;
static bool pgStatRunningInCollector = false;
/*
- * Place where backends store per-table info to be sent to the collector.
- * We store shared relations separately from non-shared ones, to be able to
- * send them in separate messages.
+ * Structures in which backends store per-table info that's waiting to be
+ * sent to the collector.
*
- * NOTE: once allocated, a PgStat_MsgTabstat struct belonging to a
- * TabStatArray is never moved or deleted for the life of the backend.
- * Also, we zero out the t_id fields of the contained PgStat_TableEntry
- * structs whenever they are not actively in use. This allows PgStat_Info
- * pointers to be treated as long-lived data, avoiding repeated searches in
- * pgstat_initstats() when a relation is repeatedly heap_open'd or
- * index_open'd during a transaction.
+ * NOTE: once allocated, TabStatusArray structures are never moved or deleted
+ * for the life of the backend. Also, we zero out the t_id fields of the
+ * contained PgStat_TableStatus structs whenever they are not actively in use.
+ * This allows relcache pgstat_info pointers to be treated as long-lived data,
+ * avoiding repeated searches in pgstat_initstats() when a relation is
+ * repeatedly opened during a transaction.
*/
-typedef struct TabStatArray
+#define TABSTAT_QUANTUM 100 /* we alloc this many at a time */
+
+typedef struct TabStatusArray
{
- int tsa_alloc; /* num allocated */
- int tsa_used; /* num actually used */
- PgStat_MsgTabstat **tsa_messages; /* the array itself */
-} TabStatArray;
+ struct TabStatusArray *tsa_next; /* link to next array, if any */
+ int tsa_used; /* # entries currently used */
+ PgStat_TableStatus tsa_entries[TABSTAT_QUANTUM]; /* per-table data */
+} TabStatusArray;
-#define TABSTAT_QUANTUM 4 /* we alloc this many at a time */
+static TabStatusArray *pgStatTabList = NULL;
+
+/*
+ * Tuple insertion/deletion counts for an open transaction can't be propagated
+ * into PgStat_TableStatus counters until we know if it is going to commit
+ * or abort. Hence, we keep these counts in per-subxact structs that live
+ * in TopTransactionContext. This data structure is designed on the assumption
+ * that subxacts won't usually modify very many tables.
+ */
+typedef struct PgStat_SubXactStatus
+{
+ int nest_level; /* subtransaction nest level */
+ struct PgStat_SubXactStatus *prev; /* higher-level subxact if any */
+ PgStat_TableXactStatus *first; /* head of list for this subxact */
+} PgStat_SubXactStatus;
-static TabStatArray RegularTabStat = {0, 0, NULL};
-static TabStatArray SharedTabStat = {0, 0, NULL};
+static PgStat_SubXactStatus *pgStatXactStack = NULL;
static int pgStatXactCommit = 0;
static int pgStatXactRollback = 0;
+/* Record that's written to 2PC state file when pgstat state is persisted */
+typedef struct TwoPhasePgStatRecord
+{
+ PgStat_Counter tuples_inserted; /* tuples inserted in xact */
+ PgStat_Counter tuples_deleted; /* tuples deleted in xact */
+ Oid t_id; /* table's OID */
+ bool t_shared; /* is it a shared catalog? */
+} TwoPhasePgStatRecord;
+
+/*
+ * Info about current "snapshot" of stats file
+ */
static MemoryContext pgStatLocalContext = NULL;
static HTAB *pgStatDBHash = NULL;
static PgBackendStatus *localBackendStatusTable = NULL;
static int localNumBackends = 0;
/*
- * BgWriter global statistics counters, from bgwriter.c
- */
-extern PgStat_MsgBgWriter BgWriterStats;
-
-/*
* Cluster wide statistics, kept in the stats collector.
* Contains statistics that are not collected per database
* or per table.
@@ -177,9 +205,12 @@ static void pgstat_write_statsfile(void);
static HTAB *pgstat_read_statsfile(Oid onlydb);
static void backend_read_statsfile(void);
static void pgstat_read_current_status(void);
-static void pgstat_report_one_tabstat(TabStatArray *tsarr, Oid dbid);
+
+static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
static HTAB *pgstat_collect_oids(Oid catalogid);
+static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
+
static void pgstat_setup_memcxt(void);
static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
@@ -617,12 +648,19 @@ void allow_immediate_pgstat_restart(void)
void
pgstat_report_tabstat(bool force)
{
+ /* we assume this inits to all zeroes: */
+ static const PgStat_TableCounts all_zeroes;
static TimestampTz last_report = 0;
+
TimestampTz now;
+ PgStat_MsgTabstat regular_msg;
+ PgStat_MsgTabstat shared_msg;
+ TabStatusArray *tsa;
+ int i;
/* Don't expend a clock check if nothing to do */
- if (RegularTabStat.tsa_used == 0 &&
- SharedTabStat.tsa_used == 0)
+ if (pgStatTabList == NULL ||
+ pgStatTabList->tsa_used == 0)
return;
/*
@@ -636,51 +674,101 @@ pgstat_report_tabstat(bool force)
last_report = now;
/*
- * For each message buffer used during the last queries, set the header
- * fields and send it out; then mark the entries unused.
+ * Scan through the TabStatusArray struct(s) to find tables that actually
+ * have counts, and build messages to send. We have to separate shared
+ * relations from regular ones because the databaseid field in the
+ * message header has to depend on that.
*/
- pgstat_report_one_tabstat(&RegularTabStat, MyDatabaseId);
- pgstat_report_one_tabstat(&SharedTabStat, InvalidOid);
+ regular_msg.m_databaseid = MyDatabaseId;
+ shared_msg.m_databaseid = InvalidOid;
+ regular_msg.m_nentries = 0;
+ shared_msg.m_nentries = 0;
+
+ for (tsa = pgStatTabList; tsa != NULL; tsa = tsa->tsa_next)
+ {
+ for (i = 0; i < tsa->tsa_used; i++)
+ {
+ PgStat_TableStatus *entry = &tsa->tsa_entries[i];
+ PgStat_MsgTabstat *this_msg;
+ PgStat_TableEntry *this_ent;
+
+ /* Shouldn't have any pending transaction-dependent counts */
+ Assert(entry->trans == NULL);
+
+ /*
+ * Ignore entries that didn't accumulate any actual counts,
+ * such as indexes that were opened by the planner but not used.
+ */
+ if (memcmp(&entry->t_counts, &all_zeroes,
+ sizeof(PgStat_TableCounts)) == 0)
+ continue;
+ /*
+ * OK, insert data into the appropriate message, and send if full.
+ */
+ this_msg = entry->t_shared ? &shared_msg : &regular_msg;
+ this_ent = &this_msg->m_entry[this_msg->m_nentries];
+ this_ent->t_id = entry->t_id;
+ memcpy(&this_ent->t_counts, &entry->t_counts,
+ sizeof(PgStat_TableCounts));
+ if (++this_msg->m_nentries >= PGSTAT_NUM_TABENTRIES)
+ {
+ pgstat_send_tabstat(this_msg);
+ this_msg->m_nentries = 0;
+ }
+ }
+ /* zero out TableStatus structs after use */
+ MemSet(tsa->tsa_entries, 0,
+ tsa->tsa_used * sizeof(PgStat_TableStatus));
+ tsa->tsa_used = 0;
+ }
+
+ /*
+ * Send partial messages. If force is true, make sure that any pending
+ * xact commit/abort gets counted, even if no table stats to send.
+ */
+ if (regular_msg.m_nentries > 0 ||
+ (force && (pgStatXactCommit > 0 || pgStatXactRollback > 0)))
+ pgstat_send_tabstat(&regular_msg);
+ if (shared_msg.m_nentries > 0)
+ pgstat_send_tabstat(&shared_msg);
}
+/*
+ * Subroutine for pgstat_report_tabstat: finish and send a tabstat message
+ */
static void
-pgstat_report_one_tabstat(TabStatArray *tsarr, Oid dbid)
+pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
{
- int i;
-
- for (i = 0; i < tsarr->tsa_used; i++)
- {
- PgStat_MsgTabstat *tsmsg = tsarr->tsa_messages[i];
- int n;
- int len;
+ int n;
+ int len;
- n = tsmsg->m_nentries;
- len = offsetof(PgStat_MsgTabstat, m_entry[0]) +
- n * sizeof(PgStat_TableEntry);
+ /* It's unlikely we'd get here with no socket, but maybe not impossible */
+ if (pgStatSock < 0)
+ return;
+ /*
+ * Report accumulated xact commit/rollback whenever we send a normal
+ * tabstat message
+ */
+ if (OidIsValid(tsmsg->m_databaseid))
+ {
tsmsg->m_xact_commit = pgStatXactCommit;
tsmsg->m_xact_rollback = pgStatXactRollback;
pgStatXactCommit = 0;
pgStatXactRollback = 0;
+ }
+ else
+ {
+ tsmsg->m_xact_commit = 0;
+ tsmsg->m_xact_rollback = 0;
+ }
- /*
- * It's unlikely we'd get here with no socket, but maybe not
- * impossible
- */
- if (pgStatSock >= 0)
- {
- pgstat_setheader(&tsmsg->m_hdr, PGSTAT_MTYPE_TABSTAT);
- tsmsg->m_databaseid = dbid;
- pgstat_send(tsmsg, len);
- }
+ n = tsmsg->m_nentries;
+ len = offsetof(PgStat_MsgTabstat, m_entry[0]) +
+ n * sizeof(PgStat_TableEntry);
- /*
- * Zero out the entries, to mark them unused and prepare them
- * for next use.
- */
- MemSet(tsmsg, 0, len);
- }
- tsarr->tsa_used = 0;
+ pgstat_setheader(&tsmsg->m_hdr, PGSTAT_MTYPE_TABSTAT);
+ pgstat_send(tsmsg, len);
}
@@ -1016,209 +1104,489 @@ pgstat_ping(void)
pgstat_send(&msg, sizeof(msg));
}
-/*
- * Enlarge a TabStatArray
- */
-static void
-more_tabstat_space(TabStatArray *tsarr)
-{
- PgStat_MsgTabstat *newMessages;
- PgStat_MsgTabstat **msgArray;
- int newAlloc;
- int i;
-
- AssertArg(PointerIsValid(tsarr));
-
- newAlloc = tsarr->tsa_alloc + TABSTAT_QUANTUM;
-
- /* Create (another) quantum of message buffers, and zero them */
- newMessages = (PgStat_MsgTabstat *)
- MemoryContextAllocZero(TopMemoryContext,
- sizeof(PgStat_MsgTabstat) * TABSTAT_QUANTUM);
-
- /* Create or enlarge the pointer array */
- if (tsarr->tsa_messages == NULL)
- msgArray = (PgStat_MsgTabstat **)
- MemoryContextAlloc(TopMemoryContext,
- sizeof(PgStat_MsgTabstat *) * newAlloc);
- else
- msgArray = (PgStat_MsgTabstat **)
- repalloc(tsarr->tsa_messages,
- sizeof(PgStat_MsgTabstat *) * newAlloc);
-
- for (i = 0; i < TABSTAT_QUANTUM; i++)
- msgArray[tsarr->tsa_alloc + i] = newMessages++;
- tsarr->tsa_messages = msgArray;
- tsarr->tsa_alloc = newAlloc;
-
- Assert(tsarr->tsa_used < tsarr->tsa_alloc);
-}
/* ----------
* pgstat_initstats() -
*
- * Called from various places usually dealing with initialization
- * of Relation or Scan structures. The data placed into these
- * structures from here tell where later to count for buffer reads,
- * scans and tuples fetched.
- *
- * NOTE: PgStat_Info pointers in scan structures are really redundant
- * with those in relcache entries. The passed stats pointer might point
- * either to the Relation struct's own pgstat_info field, or to one in
- * a scan structure; we'll set the Relation pg_statinfo and copy it to
- * the scan struct.
+ * Initialize a relcache entry to count access statistics.
+ * Called whenever a relation is opened.
*
* We assume that a relcache entry's pgstat_info field is zeroed by
* relcache.c when the relcache entry is made; thereafter it is long-lived
- * data. We can avoid repeated searches of the TabStat arrays when the
+ * data. We can avoid repeated searches of the TabStatus arrays when the
* same relation is touched repeatedly within a transaction.
* ----------
*/
void
-pgstat_initstats(PgStat_Info *stats, Relation rel)
+pgstat_initstats(Relation rel)
{
Oid rel_id = rel->rd_id;
- PgStat_TableEntry *useent;
- TabStatArray *tsarr;
- PgStat_MsgTabstat *tsmsg;
- int mb;
- int i;
+ char relkind = rel->rd_rel->relkind;
+
+ /* We only count stats for things that have storage */
+ if (!(relkind == RELKIND_RELATION ||
+ relkind == RELKIND_INDEX ||
+ relkind == RELKIND_TOASTVALUE))
+ {
+ rel->pgstat_info = NULL;
+ return;
+ }
if (pgStatSock < 0 ||
!(pgstat_collect_tuplelevel ||
pgstat_collect_blocklevel))
{
- /* We're not counting at all. */
- stats->tabentry = NULL;
+ /* We're not counting at all */
+ rel->pgstat_info = NULL;
return;
}
/*
* If we already set up this relation in the current transaction,
- * just copy the pointer.
+ * nothing to do.
*/
- if (rel->pgstat_info.tabentry != NULL &&
- ((PgStat_TableEntry *) rel->pgstat_info.tabentry)->t_id == rel_id)
- {
- stats->tabentry = rel->pgstat_info.tabentry;
+ if (rel->pgstat_info != NULL &&
+ rel->pgstat_info->t_id == rel_id)
return;
- }
+
+ /* Else find or make the PgStat_TableStatus entry, and update link */
+ rel->pgstat_info = get_tabstat_entry(rel_id, rel->rd_rel->relisshared);
+}
+
+/*
+ * get_tabstat_entry - find or create a PgStat_TableStatus entry for rel
+ */
+static PgStat_TableStatus *
+get_tabstat_entry(Oid rel_id, bool isshared)
+{
+ PgStat_TableStatus *entry;
+ TabStatusArray *tsa;
+ TabStatusArray *prev_tsa;
+ int i;
/*
- * Search the already-used message slots for this relation.
+ * Search the already-used tabstat slots for this relation.
*/
- tsarr = rel->rd_rel->relisshared ? &SharedTabStat : &RegularTabStat;
-
- for (mb = 0; mb < tsarr->tsa_used; mb++)
+ prev_tsa = NULL;
+ for (tsa = pgStatTabList; tsa != NULL; prev_tsa = tsa, tsa = tsa->tsa_next)
{
- tsmsg = tsarr->tsa_messages[mb];
-
- for (i = tsmsg->m_nentries; --i >= 0;)
+ for (i = 0; i < tsa->tsa_used; i++)
{
- if (tsmsg->m_entry[i].t_id == rel_id)
- {
- rel->pgstat_info.tabentry = (void *) &(tsmsg->m_entry[i]);
- stats->tabentry = rel->pgstat_info.tabentry;
- return;
- }
+ entry = &tsa->tsa_entries[i];
+ if (entry->t_id == rel_id)
+ return entry;
}
- if (tsmsg->m_nentries >= PGSTAT_NUM_TABENTRIES)
- continue;
-
- /*
- * Not found, but found a message buffer with an empty slot instead.
- * Fine, let's use this one. We assume the entry was already zeroed,
- * either at creation or after last use.
- */
- i = tsmsg->m_nentries++;
- useent = &tsmsg->m_entry[i];
- useent->t_id = rel_id;
- rel->pgstat_info.tabentry = (void *) useent;
- stats->tabentry = rel->pgstat_info.tabentry;
- return;
+ if (tsa->tsa_used < TABSTAT_QUANTUM)
+ {
+ /*
+ * It must not be present, but we found a free slot instead.
+ * Fine, let's use this one. We assume the entry was already
+ * zeroed, either at creation or after last use.
+ */
+ entry = &tsa->tsa_entries[tsa->tsa_used++];
+ entry->t_id = rel_id;
+ entry->t_shared = isshared;
+ return entry;
+ }
}
/*
- * If we ran out of message buffers, we just allocate more.
+ * We ran out of tabstat slots, so allocate more. Be sure they're zeroed.
*/
- if (tsarr->tsa_used >= tsarr->tsa_alloc)
- more_tabstat_space(tsarr);
+ tsa = (TabStatusArray *) MemoryContextAllocZero(TopMemoryContext,
+ sizeof(TabStatusArray));
+ if (prev_tsa)
+ prev_tsa->tsa_next = tsa;
+ else
+ pgStatTabList = tsa;
+
+ /*
+ * Use the first entry of the new TabStatusArray.
+ */
+ entry = &tsa->tsa_entries[tsa->tsa_used++];
+ entry->t_id = rel_id;
+ entry->t_shared = isshared;
+ return entry;
+}
+
+/*
+ * get_tabstat_stack_level - add a new (sub)transaction stack entry if needed
+ */
+static PgStat_SubXactStatus *
+get_tabstat_stack_level(int nest_level)
+{
+ PgStat_SubXactStatus *xact_state;
+
+ xact_state = pgStatXactStack;
+ if (xact_state == NULL || xact_state->nest_level != nest_level)
+ {
+ xact_state = (PgStat_SubXactStatus *)
+ MemoryContextAlloc(TopTransactionContext,
+ sizeof(PgStat_SubXactStatus));
+ xact_state->nest_level = nest_level;
+ xact_state->prev = pgStatXactStack;
+ xact_state->first = NULL;
+ pgStatXactStack = xact_state;
+ }
+ return xact_state;
+}
+
+/*
+ * add_tabstat_xact_level - add a new (sub)transaction state record
+ */
+static void
+add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level)
+{
+ PgStat_SubXactStatus *xact_state;
+ PgStat_TableXactStatus *trans;
/*
- * Use the first entry of the next message buffer.
+ * If this is the first rel to be modified at the current nest level,
+ * we first have to push a transaction stack entry.
*/
- mb = tsarr->tsa_used++;
- tsmsg = tsarr->tsa_messages[mb];
- tsmsg->m_nentries = 1;
- useent = &tsmsg->m_entry[0];
- useent->t_id = rel_id;
- rel->pgstat_info.tabentry = (void *) useent;
- stats->tabentry = rel->pgstat_info.tabentry;
+ xact_state = get_tabstat_stack_level(nest_level);
+
+ /* Now make a per-table stack entry */
+ trans = (PgStat_TableXactStatus *)
+ MemoryContextAllocZero(TopTransactionContext,
+ sizeof(PgStat_TableXactStatus));
+ trans->nest_level = nest_level;
+ trans->upper = pgstat_info->trans;
+ trans->parent = pgstat_info;
+ trans->next = xact_state->first;
+ xact_state->first = trans;
+ pgstat_info->trans = trans;
+}
+
+/*
+ * pgstat_count_heap_insert - count a tuple insertion
+ */
+void
+pgstat_count_heap_insert(Relation rel)
+{
+ PgStat_TableStatus *pgstat_info = rel->pgstat_info;
+
+ if (pgstat_collect_tuplelevel && pgstat_info != NULL)
+ {
+ int nest_level = GetCurrentTransactionNestLevel();
+
+ /* t_tuples_inserted is nontransactional, so just advance it */
+ pgstat_info->t_counts.t_tuples_inserted++;
+
+ /* We have to log the transactional effect at the proper level */
+ if (pgstat_info->trans == NULL ||
+ pgstat_info->trans->nest_level != nest_level)
+ add_tabstat_xact_level(pgstat_info, nest_level);
+
+ pgstat_info->trans->tuples_inserted++;
+ }
+}
+
+/*
+ * pgstat_count_heap_update - count a tuple update
+ */
+void
+pgstat_count_heap_update(Relation rel)
+{
+ PgStat_TableStatus *pgstat_info = rel->pgstat_info;
+
+ if (pgstat_collect_tuplelevel && pgstat_info != NULL)
+ {
+ int nest_level = GetCurrentTransactionNestLevel();
+
+ /* t_tuples_updated is nontransactional, so just advance it */
+ pgstat_info->t_counts.t_tuples_updated++;
+
+ /* We have to log the transactional effect at the proper level */
+ if (pgstat_info->trans == NULL ||
+ pgstat_info->trans->nest_level != nest_level)
+ add_tabstat_xact_level(pgstat_info, nest_level);
+
+ /* An UPDATE both inserts a new tuple and deletes the old */
+ pgstat_info->trans->tuples_inserted++;
+ pgstat_info->trans->tuples_deleted++;
+ }
+}
+
+/*
+ * pgstat_count_heap_delete - count a tuple deletion
+ */
+void
+pgstat_count_heap_delete(Relation rel)
+{
+ PgStat_TableStatus *pgstat_info = rel->pgstat_info;
+
+ if (pgstat_collect_tuplelevel && pgstat_info != NULL)
+ {
+ int nest_level = GetCurrentTransactionNestLevel();
+
+ /* t_tuples_deleted is nontransactional, so just advance it */
+ pgstat_info->t_counts.t_tuples_deleted++;
+
+ /* We have to log the transactional effect at the proper level */
+ if (pgstat_info->trans == NULL ||
+ pgstat_info->trans->nest_level != nest_level)
+ add_tabstat_xact_level(pgstat_info, nest_level);
+
+ pgstat_info->trans->tuples_deleted++;
+ }
}
/* ----------
- * pgstat_count_xact_commit() -
+ * AtEOXact_PgStat
*
- * Called from access/transam/xact.c to count transaction commits.
+ * Called from access/transam/xact.c at top-level transaction commit/abort.
* ----------
*/
void
-pgstat_count_xact_commit(void)
+AtEOXact_PgStat(bool isCommit)
{
- if (!pgstat_collect_tuplelevel &&
- !pgstat_collect_blocklevel)
- return;
-
- pgStatXactCommit++;
+ PgStat_SubXactStatus *xact_state;
/*
- * If there was no relation activity yet, just make one existing message
- * buffer used without slots, causing the next report to tell new
- * xact-counters.
+ * Count transaction commit or abort. (We use counters, not just bools,
+ * in case the reporting message isn't sent right away.)
*/
- if (RegularTabStat.tsa_alloc == 0)
- more_tabstat_space(&RegularTabStat);
+ if (isCommit)
+ pgStatXactCommit++;
+ else
+ pgStatXactRollback++;
- if (RegularTabStat.tsa_used == 0)
+ /*
+ * Transfer transactional insert/update counts into the base tabstat
+ * entries. We don't bother to free any of the transactional state,
+ * since it's all in TopTransactionContext and will go away anyway.
+ */
+ xact_state = pgStatXactStack;
+ if (xact_state != NULL)
{
- RegularTabStat.tsa_used++;
- RegularTabStat.tsa_messages[0]->m_nentries = 0;
+ PgStat_TableXactStatus *trans;
+
+ Assert(xact_state->nest_level == 1);
+ Assert(xact_state->prev == NULL);
+ for (trans = xact_state->first; trans != NULL; trans = trans->next)
+ {
+ PgStat_TableStatus *tabstat;
+
+ Assert(trans->nest_level == 1);
+ Assert(trans->upper == NULL);
+ tabstat = trans->parent;
+ Assert(tabstat->trans == trans);
+ if (isCommit)
+ {
+ tabstat->t_counts.t_new_live_tuples += trans->tuples_inserted;
+ tabstat->t_counts.t_new_dead_tuples += trans->tuples_deleted;
+ }
+ else
+ {
+ /* inserted tuples are dead, deleted tuples are unaffected */
+ tabstat->t_counts.t_new_dead_tuples += trans->tuples_inserted;
+ }
+ tabstat->trans = NULL;
+ }
}
-}
+ pgStatXactStack = NULL;
+ /* Make sure any stats snapshot is thrown away */
+ pgstat_clear_snapshot();
+}
/* ----------
- * pgstat_count_xact_rollback() -
+ * AtEOSubXact_PgStat
*
- * Called from access/transam/xact.c to count transaction rollbacks.
+ * Called from access/transam/xact.c at subtransaction commit/abort.
* ----------
*/
void
-pgstat_count_xact_rollback(void)
+AtEOSubXact_PgStat(bool isCommit, int nestDepth)
{
- if (!pgstat_collect_tuplelevel &&
- !pgstat_collect_blocklevel)
- return;
-
- pgStatXactRollback++;
+ PgStat_SubXactStatus *xact_state;
/*
- * If there was no relation activity yet, just make one existing message
- * buffer used without slots, causing the next report to tell new
- * xact-counters.
+ * Transfer transactional insert/update counts into the next higher
+ * subtransaction state.
*/
- if (RegularTabStat.tsa_alloc == 0)
- more_tabstat_space(&RegularTabStat);
+ xact_state = pgStatXactStack;
+ if (xact_state != NULL &&
+ xact_state->nest_level >= nestDepth)
+ {
+ PgStat_TableXactStatus *trans;
+ PgStat_TableXactStatus *next_trans;
+
+ /* delink xact_state from stack immediately to simplify reuse case */
+ pgStatXactStack = xact_state->prev;
+
+ for (trans = xact_state->first; trans != NULL; trans = next_trans)
+ {
+ PgStat_TableStatus *tabstat;
+
+ next_trans = trans->next;
+ Assert(trans->nest_level == nestDepth);
+ tabstat = trans->parent;
+ Assert(tabstat->trans == trans);
+ if (isCommit)
+ {
+ if (trans->upper && trans->upper->nest_level == nestDepth - 1)
+ {
+ trans->upper->tuples_inserted += trans->tuples_inserted;
+ trans->upper->tuples_deleted += trans->tuples_deleted;
+ tabstat->trans = trans->upper;
+ pfree(trans);
+ }
+ else
+ {
+ /*
+ * When there isn't an immediate parent state, we can
+ * just reuse the record instead of going through a
+ * palloc/pfree pushup (this works since it's all in
+ * TopTransactionContext anyway). We have to re-link
+ * it into the parent level, though, and that might mean
+ * pushing a new entry into the pgStatXactStack.
+ */
+ PgStat_SubXactStatus *upper_xact_state;
+
+ upper_xact_state = get_tabstat_stack_level(nestDepth - 1);
+ trans->next = upper_xact_state->first;
+ upper_xact_state->first = trans;
+ trans->nest_level = nestDepth - 1;
+ }
+ }
+ else
+ {
+ /*
+ * On abort, inserted tuples are dead (and can be bounced out
+ * to the top-level tabstat), deleted tuples are unaffected
+ */
+ tabstat->t_counts.t_new_dead_tuples += trans->tuples_inserted;
+ tabstat->trans = trans->upper;
+ pfree(trans);
+ }
+ }
+ pfree(xact_state);
+ }
+}
+
+
+/*
+ * AtPrepare_PgStat
+ * Save the transactional stats state at 2PC transaction prepare.
+ *
+ * In this phase we just generate 2PC records for all the pending
+ * transaction-dependent stats work.
+ */
+void
+AtPrepare_PgStat(void)
+{
+ PgStat_SubXactStatus *xact_state;
- if (RegularTabStat.tsa_used == 0)
+ xact_state = pgStatXactStack;
+ if (xact_state != NULL)
{
- RegularTabStat.tsa_used++;
- RegularTabStat.tsa_messages[0]->m_nentries = 0;
+ PgStat_TableXactStatus *trans;
+
+ Assert(xact_state->nest_level == 1);
+ Assert(xact_state->prev == NULL);
+ for (trans = xact_state->first; trans != NULL; trans = trans->next)
+ {
+ PgStat_TableStatus *tabstat;
+ TwoPhasePgStatRecord record;
+
+ Assert(trans->nest_level == 1);
+ Assert(trans->upper == NULL);
+ tabstat = trans->parent;
+ Assert(tabstat->trans == trans);
+
+ record.tuples_inserted = trans->tuples_inserted;
+ record.tuples_deleted = trans->tuples_deleted;
+ record.t_id = tabstat->t_id;
+ record.t_shared = tabstat->t_shared;
+
+ RegisterTwoPhaseRecord(TWOPHASE_RM_PGSTAT_ID, 0,
+ &record, sizeof(TwoPhasePgStatRecord));
+ }
}
}
+/*
+ * PostPrepare_PgStat
+ * Clean up after successful PREPARE.
+ *
+ * All we need do here is unlink the transaction stats state from the
+ * nontransactional state. The nontransactional action counts will be
+ * reported to the stats collector immediately, while the effects on live
+ * and dead tuple counts are preserved in the 2PC state file.
+ *
+ * Note: AtEOXact_PgStat is not called during PREPARE.
+ */
+void
+PostPrepare_PgStat(void)
+{
+ PgStat_SubXactStatus *xact_state;
+
+ /*
+ * We don't bother to free any of the transactional state,
+ * since it's all in TopTransactionContext and will go away anyway.
+ */
+ xact_state = pgStatXactStack;
+ if (xact_state != NULL)
+ {
+ PgStat_TableXactStatus *trans;
+
+ for (trans = xact_state->first; trans != NULL; trans = trans->next)
+ {
+ PgStat_TableStatus *tabstat;
+
+ tabstat = trans->parent;
+ tabstat->trans = NULL;
+ }
+ }
+ pgStatXactStack = NULL;
+
+ /* Make sure any stats snapshot is thrown away */
+ pgstat_clear_snapshot();
+}
+
+/*
+ * 2PC processing routine for COMMIT PREPARED case.
+ *
+ * Load the saved counts into our local pgstats state.
+ */
+void
+pgstat_twophase_postcommit(TransactionId xid, uint16 info,
+ void *recdata, uint32 len)
+{
+ TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
+ PgStat_TableStatus *pgstat_info;
+
+ /* Find or create a tabstat entry for the rel */
+ pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared);
+
+ pgstat_info->t_counts.t_new_live_tuples += rec->tuples_inserted;
+ pgstat_info->t_counts.t_new_dead_tuples += rec->tuples_deleted;
+}
+
+/*
+ * 2PC processing routine for ROLLBACK PREPARED case.
+ *
+ * Load the saved counts into our local pgstats state, but treat them
+ * as aborted.
+ */
+void
+pgstat_twophase_postabort(TransactionId xid, uint16 info,
+ void *recdata, uint32 len)
+{
+ TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
+ PgStat_TableStatus *pgstat_info;
+
+ /* Find or create a tabstat entry for the rel */
+ pgstat_info = get_tabstat_entry(rec->t_id, rec->t_shared);
+
+ /* inserted tuples are dead, deleted tuples are no-ops */
+ pgstat_info->t_counts.t_new_dead_tuples += rec->tuples_inserted;
+}
+
/* ----------
* pgstat_fetch_stat_dbentry() -
@@ -1725,18 +2093,15 @@ pgstat_send(void *msg, int len)
void
pgstat_send_bgwriter(void)
{
+ /* We assume this initializes to zeroes */
+ static const PgStat_MsgBgWriter all_zeroes;
+
/*
* This function can be called even if nothing at all has happened.
* In this case, avoid sending a completely empty message to
* the stats collector.
*/
- if (BgWriterStats.m_timed_checkpoints == 0 &&
- BgWriterStats.m_requested_checkpoints == 0 &&
- BgWriterStats.m_buf_written_checkpoints == 0 &&
- BgWriterStats.m_buf_written_lru == 0 &&
- BgWriterStats.m_buf_written_all == 0 &&
- BgWriterStats.m_maxwritten_lru == 0 &&
- BgWriterStats.m_maxwritten_all == 0)
+ if (memcmp(&BgWriterStats, &all_zeroes, sizeof(PgStat_MsgBgWriter)) == 0)
return;
/*
@@ -1746,10 +2111,9 @@ pgstat_send_bgwriter(void)
pgstat_send(&BgWriterStats, sizeof(BgWriterStats));
/*
- * Clear out the bgwriter statistics buffer, so it can be
- * re-used.
+ * Clear out the statistics buffer, so it can be re-used.
*/
- memset(&BgWriterStats, 0, sizeof(BgWriterStats));
+ MemSet(&BgWriterStats, 0, sizeof(BgWriterStats));
}
@@ -2509,60 +2873,50 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
* If it's a new table entry, initialize counters to the values we
* just got.
*/
- tabentry->numscans = tabmsg[i].t_numscans;
- tabentry->tuples_returned = tabmsg[i].t_tuples_returned;
- tabentry->tuples_fetched = tabmsg[i].t_tuples_fetched;
- tabentry->tuples_inserted = tabmsg[i].t_tuples_inserted;
- tabentry->tuples_updated = tabmsg[i].t_tuples_updated;
- tabentry->tuples_deleted = tabmsg[i].t_tuples_deleted;
-
- tabentry->n_live_tuples = tabmsg[i].t_tuples_inserted;
- tabentry->n_dead_tuples = tabmsg[i].t_tuples_updated +
- tabmsg[i].t_tuples_deleted;
+ tabentry->numscans = tabmsg[i].t_counts.t_numscans;
+ tabentry->tuples_returned = tabmsg[i].t_counts.t_tuples_returned;
+ tabentry->tuples_fetched = tabmsg[i].t_counts.t_tuples_fetched;
+ tabentry->tuples_inserted = tabmsg[i].t_counts.t_tuples_inserted;
+ tabentry->tuples_updated = tabmsg[i].t_counts.t_tuples_updated;
+ tabentry->tuples_deleted = tabmsg[i].t_counts.t_tuples_deleted;
+ tabentry->n_live_tuples = tabmsg[i].t_counts.t_new_live_tuples;
+ tabentry->n_dead_tuples = tabmsg[i].t_counts.t_new_dead_tuples;
+ tabentry->blocks_fetched = tabmsg[i].t_counts.t_blocks_fetched;
+ tabentry->blocks_hit = tabmsg[i].t_counts.t_blocks_hit;
+
tabentry->last_anl_tuples = 0;
tabentry->vacuum_timestamp = 0;
tabentry->autovac_vacuum_timestamp = 0;
tabentry->analyze_timestamp = 0;
tabentry->autovac_analyze_timestamp = 0;
-
- tabentry->blocks_fetched = tabmsg[i].t_blocks_fetched;
- tabentry->blocks_hit = tabmsg[i].t_blocks_hit;
}
else
{
/*
* Otherwise add the values to the existing entry.
*/
- tabentry->numscans += tabmsg[i].t_numscans;
- tabentry->tuples_returned += tabmsg[i].t_tuples_returned;
- tabentry->tuples_fetched += tabmsg[i].t_tuples_fetched;
- tabentry->tuples_inserted += tabmsg[i].t_tuples_inserted;
- tabentry->tuples_updated += tabmsg[i].t_tuples_updated;
- tabentry->tuples_deleted += tabmsg[i].t_tuples_deleted;
-
- tabentry->n_live_tuples += tabmsg[i].t_tuples_inserted -
- tabmsg[i].t_tuples_deleted;
- tabentry->n_dead_tuples += tabmsg[i].t_tuples_updated +
- tabmsg[i].t_tuples_deleted;
-
- tabentry->blocks_fetched += tabmsg[i].t_blocks_fetched;
- tabentry->blocks_hit += tabmsg[i].t_blocks_hit;
+ tabentry->numscans += tabmsg[i].t_counts.t_numscans;
+ tabentry->tuples_returned += tabmsg[i].t_counts.t_tuples_returned;
+ tabentry->tuples_fetched += tabmsg[i].t_counts.t_tuples_fetched;
+ tabentry->tuples_inserted += tabmsg[i].t_counts.t_tuples_inserted;
+ tabentry->tuples_updated += tabmsg[i].t_counts.t_tuples_updated;
+ tabentry->tuples_deleted += tabmsg[i].t_counts.t_tuples_deleted;
+ tabentry->n_live_tuples += tabmsg[i].t_counts.t_new_live_tuples;
+ tabentry->n_dead_tuples += tabmsg[i].t_counts.t_new_dead_tuples;
+ tabentry->blocks_fetched += tabmsg[i].t_counts.t_blocks_fetched;
+ tabentry->blocks_hit += tabmsg[i].t_counts.t_blocks_hit;
}
/*
- * Add table stats to the database entry.
- */
- dbentry->n_tuples_returned += tabmsg[i].t_tuples_returned;
- dbentry->n_tuples_fetched += tabmsg[i].t_tuples_fetched;
- dbentry->n_tuples_inserted += tabmsg[i].t_tuples_inserted;
- dbentry->n_tuples_updated += tabmsg[i].t_tuples_updated;
- dbentry->n_tuples_deleted += tabmsg[i].t_tuples_deleted;
-
- /*
- * And add the block IO to the database entry.
+ * Add per-table stats to the per-database entry, too.
*/
- dbentry->n_blocks_fetched += tabmsg[i].t_blocks_fetched;
- dbentry->n_blocks_hit += tabmsg[i].t_blocks_hit;
+ dbentry->n_tuples_returned += tabmsg[i].t_counts.t_tuples_returned;
+ dbentry->n_tuples_fetched += tabmsg[i].t_counts.t_tuples_fetched;
+ dbentry->n_tuples_inserted += tabmsg[i].t_counts.t_tuples_inserted;
+ dbentry->n_tuples_updated += tabmsg[i].t_counts.t_tuples_updated;
+ dbentry->n_tuples_deleted += tabmsg[i].t_counts.t_tuples_deleted;
+ dbentry->n_blocks_fetched += tabmsg[i].t_counts.t_blocks_fetched;
+ dbentry->n_blocks_hit += tabmsg[i].t_counts.t_blocks_hit;
}
}
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 9f4876a6050..e2cfc870e2e 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.218 2007/05/02 23:34:48 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.219 2007/05/27 03:50:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -88,12 +88,6 @@ static bool IsForInput;
/* local state for LockBufferForCleanup */
static volatile BufferDesc *PinCountWaitBuf = NULL;
-/*
- * Global statistics for the bgwriter. The contents of this variable
- * only makes sense in the bgwriter process.
- */
-extern PgStat_MsgBgWriter BgWriterStats;
-
static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum,
bool zeroPage);
@@ -174,7 +168,7 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
if (isExtend)
blockNum = smgrnblocks(reln->rd_smgr);
- pgstat_count_buffer_read(&reln->pgstat_info, reln);
+ pgstat_count_buffer_read(reln);
if (isLocalBuf)
{
@@ -204,7 +198,7 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
if (!isExtend)
{
/* Just need to update stats before we exit */
- pgstat_count_buffer_hit(&reln->pgstat_info, reln);
+ pgstat_count_buffer_hit(reln);
if (VacuumCostActive)
VacuumCostBalance += VacuumCostPageHit;
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 7d554c2ada2..45cb103adee 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.260 2007/05/02 21:08:46 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.261 2007/05/27 03:50:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1802,6 +1802,7 @@ RelationClearRelation(Relation relation, bool rebuild)
int old_refcnt = relation->rd_refcnt;
SubTransactionId old_createSubid = relation->rd_createSubid;
SubTransactionId old_newRelfilenodeSubid = relation->rd_newRelfilenodeSubid;
+ struct PgStat_TableStatus *old_pgstat_info = relation->pgstat_info;
TupleDesc old_att = relation->rd_att;
RuleLock *old_rules = relation->rd_rules;
MemoryContext old_rulescxt = relation->rd_rulescxt;
@@ -1821,6 +1822,7 @@ RelationClearRelation(Relation relation, bool rebuild)
relation->rd_refcnt = old_refcnt;
relation->rd_createSubid = old_createSubid;
relation->rd_newRelfilenodeSubid = old_newRelfilenodeSubid;
+ relation->pgstat_info = old_pgstat_info;
if (equalTupleDescs(old_att, relation->rd_att))
{
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 5ea66e74672..ebb2e984c24 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.123 2007/04/08 01:26:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.124 2007/05/27 03:50:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -147,10 +147,10 @@ extern HeapTuple heap_getnext(HeapScanDesc scan, ScanDirection direction);
extern bool heap_fetch(Relation relation, Snapshot snapshot,
HeapTuple tuple, Buffer *userbuf, bool keep_buf,
- PgStat_Info *pgstat_info);
+ Relation stats_relation);
extern bool heap_release_fetch(Relation relation, Snapshot snapshot,
HeapTuple tuple, Buffer *userbuf, bool keep_buf,
- PgStat_Info *pgstat_info);
+ Relation stats_relation);
extern void heap_get_latest_tid(Relation relation, Snapshot snapshot,
ItemPointer tid);
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 77bca6be482..7a1ea39352a 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.52 2007/01/20 18:43:35 neilc Exp $
+ * $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.53 2007/05/27 03:50:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -37,8 +37,6 @@ typedef struct HeapScanDescData
/* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
ItemPointerData rs_mctid; /* marked scan position, if any */
- PgStat_Info rs_pgstat_info; /* statistics collector hook */
-
/* these fields only used in page-at-a-time mode */
int rs_cindex; /* current tuple's index in vistuples */
int rs_mindex; /* marked tuple's saved index */
@@ -78,8 +76,6 @@ typedef struct IndexScanDescData
HeapTupleData xs_ctup; /* current heap tuple, if any */
Buffer xs_cbuf; /* current heap buffer in scan, if any */
/* NB: if xs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
-
- PgStat_Info xs_pgstat_info; /* statistics collector hook */
} IndexScanDescData;
typedef IndexScanDescData *IndexScanDesc;
diff --git a/src/include/access/twophase_rmgr.h b/src/include/access/twophase_rmgr.h
index 0dbcd226fbd..e98ad7cb375 100644
--- a/src/include/access/twophase_rmgr.h
+++ b/src/include/access/twophase_rmgr.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/twophase_rmgr.h,v 1.4 2007/01/05 22:19:51 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/access/twophase_rmgr.h,v 1.5 2007/05/27 03:50:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -26,7 +26,8 @@ typedef uint8 TwoPhaseRmgrId;
#define TWOPHASE_RM_INVAL_ID 2
#define TWOPHASE_RM_FLATFILES_ID 3
#define TWOPHASE_RM_NOTIFY_ID 4
-#define TWOPHASE_RM_MAX_ID TWOPHASE_RM_NOTIFY_ID
+#define TWOPHASE_RM_PGSTAT_ID 5
+#define TWOPHASE_RM_MAX_ID TWOPHASE_RM_PGSTAT_ID
extern const TwoPhaseCallback twophase_recover_callbacks[];
extern const TwoPhaseCallback twophase_postcommit_callbacks[];
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 694ee44db19..476fd47dc7b 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -5,7 +5,7 @@
*
* Copyright (c) 2001-2007, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/pgstat.h,v 1.58 2007/04/30 16:37:08 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/pgstat.h,v 1.59 2007/05/27 03:50:39 tgl Exp $
* ----------
*/
#ifndef PGSTAT_H
@@ -40,6 +40,90 @@ typedef enum StatMsgType
*/
typedef int64 PgStat_Counter;
+/* ----------
+ * PgStat_TableCounts The actual per-table counts kept by a backend
+ *
+ * This struct should contain only actual event counters, because we memcmp
+ * it against zeroes to detect whether there are any counts to transmit.
+ * It is a component of PgStat_TableStatus (within-backend state) and
+ * PgStat_TableEntry (the transmitted message format).
+ *
+ * Note: for a table, tuples_returned is the number of tuples successfully
+ * fetched by heap_getnext, while tuples_fetched is the number of tuples
+ * successfully fetched by heap_fetch under the control of bitmap indexscans.
+ * For an index, tuples_returned is the number of index entries returned by
+ * the index AM, while tuples_fetched is the number of tuples successfully
+ * fetched by heap_fetch under the control of simple indexscans for this index.
+ *
+ * tuples_inserted/tuples_updated/tuples_deleted count attempted actions,
+ * regardless of whether the transaction committed. new_live_tuples and
+ * new_dead_tuples are properly adjusted depending on commit or abort.
+ * ----------
+ */
+typedef struct PgStat_TableCounts
+{
+ PgStat_Counter t_numscans;
+
+ PgStat_Counter t_tuples_returned;
+ PgStat_Counter t_tuples_fetched;
+
+ PgStat_Counter t_tuples_inserted;
+ PgStat_Counter t_tuples_updated;
+ PgStat_Counter t_tuples_deleted;
+
+ PgStat_Counter t_new_live_tuples;
+ PgStat_Counter t_new_dead_tuples;
+
+ PgStat_Counter t_blocks_fetched;
+ PgStat_Counter t_blocks_hit;
+} PgStat_TableCounts;
+
+
+/* ------------------------------------------------------------
+ * Structures kept in backend local memory while accumulating counts
+ * ------------------------------------------------------------
+ */
+
+
+/* ----------
+ * PgStat_TableStatus Per-table status within a backend
+ *
+ * Most of the event counters are nontransactional, ie, we count events
+ * in committed and aborted transactions alike. For these, we just count
+ * directly in the PgStat_TableStatus. However, new_live_tuples and
+ * new_dead_tuples must be derived from tuple insertion and deletion counts
+ * with awareness of whether the transaction or subtransaction committed or
+ * aborted. Hence, we also keep a stack of per-(sub)transaction status
+ * records for every table modified in the current transaction. At commit
+ * or abort, we propagate tuples_inserted and tuples_deleted up to the
+ * parent subtransaction level, or out to the parent PgStat_TableStatus,
+ * as appropriate.
+ * ----------
+ */
+typedef struct PgStat_TableStatus
+{
+ Oid t_id; /* table's OID */
+ bool t_shared; /* is it a shared catalog? */
+ struct PgStat_TableXactStatus *trans; /* lowest subxact's counts */
+ PgStat_TableCounts t_counts; /* event counts to be sent */
+} PgStat_TableStatus;
+
+/* ----------
+ * PgStat_TableXactStatus Per-table, per-subtransaction status
+ * ----------
+ */
+typedef struct PgStat_TableXactStatus
+{
+ PgStat_Counter tuples_inserted; /* tuples inserted in (sub)xact */
+ PgStat_Counter tuples_deleted; /* tuples deleted in (sub)xact */
+ int nest_level; /* subtransaction nest level */
+ /* links to other structs for same relation: */
+ struct PgStat_TableXactStatus *upper; /* next higher subxact if any */
+ PgStat_TableStatus *parent; /* per-table status */
+ /* structs of same subxact level are linked here: */
+ struct PgStat_TableXactStatus *next; /* next of same subxact */
+} PgStat_TableXactStatus;
+
/* ------------------------------------------------------------
* Message formats follow
@@ -78,30 +162,12 @@ typedef struct PgStat_MsgDummy
/* ----------
* PgStat_TableEntry Per-table info in a MsgTabstat
- *
- * Note: for a table, tuples_returned is the number of tuples successfully
- * fetched by heap_getnext, while tuples_fetched is the number of tuples
- * successfully fetched by heap_fetch under the control of bitmap indexscans.
- * For an index, tuples_returned is the number of index entries returned by
- * the index AM, while tuples_fetched is the number of tuples successfully
- * fetched by heap_fetch under the control of simple indexscans for this index.
* ----------
*/
typedef struct PgStat_TableEntry
{
Oid t_id;
-
- PgStat_Counter t_numscans;
-
- PgStat_Counter t_tuples_returned;
- PgStat_Counter t_tuples_fetched;
-
- PgStat_Counter t_tuples_inserted;
- PgStat_Counter t_tuples_updated;
- PgStat_Counter t_tuples_deleted;
-
- PgStat_Counter t_blocks_fetched;
- PgStat_Counter t_blocks_hit;
+ PgStat_TableCounts t_counts;
} PgStat_TableEntry;
/* ----------
@@ -393,6 +459,10 @@ extern bool pgstat_collect_tuplelevel;
extern bool pgstat_collect_blocklevel;
extern bool pgstat_collect_querystring;
+/*
+ * BgWriter statistics counters are updated directly by bgwriter and bufmgr
+ */
+extern PgStat_MsgBgWriter BgWriterStats;
/* ----------
* Functions called from postmaster
@@ -436,83 +506,67 @@ extern void pgstat_report_activity(const char *what);
extern void pgstat_report_txn_timestamp(TimestampTz tstamp);
extern void pgstat_report_waiting(bool waiting);
-extern void pgstat_initstats(PgStat_Info *stats, Relation rel);
+extern void pgstat_initstats(Relation rel);
+/* nontransactional event counts are simple enough to inline */
-#define pgstat_count_heap_scan(s) \
+#define pgstat_count_heap_scan(rel) \
do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_numscans++; \
+ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
+ (rel)->pgstat_info->t_counts.t_numscans++; \
} while (0)
/* kluge for bitmap scans: */
-#define pgstat_discount_heap_scan(s) \
+#define pgstat_discount_heap_scan(rel) \
do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_numscans--; \
+ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
+ (rel)->pgstat_info->t_counts.t_numscans--; \
} while (0)
-#define pgstat_count_heap_getnext(s) \
+#define pgstat_count_heap_getnext(rel) \
do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_returned++; \
+ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
+ (rel)->pgstat_info->t_counts.t_tuples_returned++; \
} while (0)
-#define pgstat_count_heap_fetch(s) \
+#define pgstat_count_heap_fetch(rel) \
do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_fetched++; \
+ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
+ (rel)->pgstat_info->t_counts.t_tuples_fetched++; \
} while (0)
-#define pgstat_count_heap_insert(s) \
+#define pgstat_count_index_scan(rel) \
do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_inserted++; \
+ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
+ (rel)->pgstat_info->t_counts.t_numscans++; \
} while (0)
-#define pgstat_count_heap_update(s) \
+#define pgstat_count_index_tuples(rel, n) \
do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_updated++; \
+ if (pgstat_collect_tuplelevel && (rel)->pgstat_info != NULL) \
+ (rel)->pgstat_info->t_counts.t_tuples_returned += (n); \
} while (0)
-#define pgstat_count_heap_delete(s) \
+#define pgstat_count_buffer_read(rel) \
do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_deleted++; \
+ if (pgstat_collect_blocklevel && (rel)->pgstat_info != NULL) \
+ (rel)->pgstat_info->t_counts.t_blocks_fetched++; \
} while (0)
-#define pgstat_count_index_scan(s) \
+#define pgstat_count_buffer_hit(rel) \
do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_numscans++; \
- } while (0)
-#define pgstat_count_index_tuples(s, n) \
- do { \
- if (pgstat_collect_tuplelevel && (s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_tuples_returned += (n); \
- } while (0)
-#define pgstat_count_buffer_read(s,r) \
- do { \
- if (pgstat_collect_blocklevel) { \
- if ((s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_blocks_fetched++; \
- else { \
- pgstat_initstats((s), (r)); \
- if ((s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_blocks_fetched++; \
- } \
- } \
- } while (0)
-#define pgstat_count_buffer_hit(s,r) \
- do { \
- if (pgstat_collect_blocklevel) { \
- if ((s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_blocks_hit++; \
- else { \
- pgstat_initstats((s), (r)); \
- if ((s)->tabentry != NULL) \
- ((PgStat_TableEntry *)((s)->tabentry))->t_blocks_hit++; \
- } \
- } \
+ if (pgstat_collect_blocklevel && (rel)->pgstat_info != NULL) \
+ (rel)->pgstat_info->t_counts.t_blocks_hit++; \
} while (0)
+extern void pgstat_count_heap_insert(Relation rel);
+extern void pgstat_count_heap_update(Relation rel);
+extern void pgstat_count_heap_delete(Relation rel);
+
+extern void AtEOXact_PgStat(bool isCommit);
+extern void AtEOSubXact_PgStat(bool isCommit, int nestDepth);
+
+extern void AtPrepare_PgStat(void);
+extern void PostPrepare_PgStat(void);
+
+extern void pgstat_twophase_postcommit(TransactionId xid, uint16 info,
+ void *recdata, uint32 len);
+extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
+ void *recdata, uint32 len);
-extern void pgstat_count_xact_commit(void);
-extern void pgstat_count_xact_rollback(void);
extern void pgstat_send_bgwriter(void);
/* ----------
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 33795de2bf8..bc6bf190b86 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.100 2007/03/29 00:15:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.101 2007/05/27 03:50:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -90,15 +90,6 @@ typedef struct TriggerDesc
/*
- * Same for the statistics collector data in Relation and scan data.
- */
-typedef struct PgStat_Info
-{
- void *tabentry;
-} PgStat_Info;
-
-
-/*
* Cached lookup information for the index access method functions defined
* by the pg_am row associated with an index relation.
*/
@@ -200,8 +191,8 @@ typedef struct RelationData
List *rd_indpred; /* index predicate tree, if any */
void *rd_amcache; /* available for use by index AM */
- /* statistics collection area */
- PgStat_Info pgstat_info;
+ /* use "struct" here to avoid needing to include pgstat.h: */
+ struct PgStat_TableStatus *pgstat_info; /* statistics collection area */
} RelationData;
typedef RelationData *Relation;