aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/pgstattuple/Makefile4
-rw-r--r--contrib/pgstattuple/pgstatapprox.c273
-rw-r--r--contrib/pgstattuple/pgstattuple--1.2--1.3.sql18
-rw-r--r--contrib/pgstattuple/pgstattuple--1.3.sql (renamed from contrib/pgstattuple/pgstattuple--1.2.sql)18
-rw-r--r--contrib/pgstattuple/pgstattuple.control2
-rw-r--r--doc/src/sgml/pgstattuple.sgml136
6 files changed, 446 insertions, 5 deletions
diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile
index 862585cc017..6083dabefd3 100644
--- a/contrib/pgstattuple/Makefile
+++ b/contrib/pgstattuple/Makefile
@@ -1,10 +1,10 @@
# contrib/pgstattuple/Makefile
MODULE_big = pgstattuple
-OBJS = pgstattuple.o pgstatindex.o $(WIN32RES)
+OBJS = pgstattuple.o pgstatindex.o pgstatapprox.o $(WIN32RES)
EXTENSION = pgstattuple
-DATA = pgstattuple--1.2.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
+DATA = pgstattuple--1.3.sql pgstattuple--1.2--1.3.sql pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql pgstattuple--unpackaged--1.0.sql
PGFILEDESC = "pgstattuple - tuple-level statistics"
REGRESS = pgstattuple
diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c
new file mode 100644
index 00000000000..ae5ed56f986
--- /dev/null
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -0,0 +1,273 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgstatapproc.c
+ * Bloat estimation functions
+ *
+ * Copyright (c) 2014-2015, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/pgstattuple/pgstatapprox.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/visibilitymap.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "access/multixact.h"
+#include "access/htup_details.h"
+#include "catalog/namespace.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/freespace.h"
+#include "storage/procarray.h"
+#include "storage/lmgr.h"
+#include "utils/builtins.h"
+#include "utils/tqual.h"
+#include "commands/vacuum.h"
+
+PG_FUNCTION_INFO_V1(pgstattuple_approx);
+
+typedef struct output_type
+{
+ uint64 table_len;
+ uint64 scanned_percent;
+ uint64 tuple_count;
+ uint64 tuple_len;
+ double tuple_percent;
+ uint64 dead_tuple_count;
+ uint64 dead_tuple_len;
+ double dead_tuple_percent;
+ uint64 free_space;
+ double free_percent;
+} output_type;
+
+#define NUM_OUTPUT_COLUMNS 10
+
+/*
+ * This function takes an already open relation and scans its pages,
+ * skipping those that have the corresponding visibility map bit set.
+ * For pages we skip, we find the free space from the free space map
+ * and approximate tuple_len on that basis. For the others, we count
+ * the exact number of dead tuples etc.
+ *
+ * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but
+ * we do not try to avoid skipping single pages.
+ */
+static void
+statapprox_heap(Relation rel, output_type *stat)
+{
+ BlockNumber scanned,
+ nblocks,
+ blkno;
+ Buffer vmbuffer = InvalidBuffer;
+ BufferAccessStrategy bstrategy;
+ TransactionId OldestXmin;
+ uint64 misc_count = 0;
+
+ OldestXmin = GetOldestXmin(rel, true);
+ bstrategy = GetAccessStrategy(BAS_BULKREAD);
+
+ nblocks = RelationGetNumberOfBlocks(rel);
+ scanned = 0;
+
+ for (blkno = 0; blkno < nblocks; blkno++)
+ {
+ Buffer buf;
+ Page page;
+ OffsetNumber offnum,
+ maxoff;
+ Size freespace;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /*
+ * If the page has only visible tuples, then we can find out the
+ * free space from the FSM and move on.
+ */
+ if (visibilitymap_test(rel, blkno, &vmbuffer))
+ {
+ freespace = GetRecordedFreeSpace(rel, blkno);
+ stat->tuple_len += BLCKSZ - freespace;
+ stat->free_space += freespace;
+ continue;
+ }
+
+ buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno,
+ RBM_NORMAL, bstrategy);
+
+ LockBuffer(buf, BUFFER_LOCK_SHARE);
+
+ page = BufferGetPage(buf);
+
+ /*
+ * It's not safe to call PageGetHeapFreeSpace() on new pages, so
+ * we treat them as being free space for our purposes.
+ */
+ if (!PageIsNew(page))
+ stat->free_space += PageGetHeapFreeSpace(page);
+ else
+ stat->free_space += BLCKSZ - SizeOfPageHeaderData;
+
+ if (PageIsNew(page) || PageIsEmpty(page))
+ {
+ UnlockReleaseBuffer(buf);
+ continue;
+ }
+
+ scanned++;
+
+ /*
+ * Look at each tuple on the page and decide whether it's live
+ * or dead, then count it and its size. Unlike lazy_scan_heap,
+ * we can afford to ignore problems and special cases.
+ */
+ maxoff = PageGetMaxOffsetNumber(page);
+
+ for (offnum = FirstOffsetNumber;
+ offnum <= maxoff;
+ offnum = OffsetNumberNext(offnum))
+ {
+ ItemId itemid;
+ HeapTupleData tuple;
+
+ itemid = PageGetItemId(page, offnum);
+
+ if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) ||
+ ItemIdIsDead(itemid))
+ {
+ continue;
+ }
+
+ Assert(ItemIdIsNormal(itemid));
+
+ ItemPointerSet(&(tuple.t_self), blkno, offnum);
+
+ tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple.t_len = ItemIdGetLength(itemid);
+ tuple.t_tableOid = RelationGetRelid(rel);
+
+ /*
+ * We count live and dead tuples, but we also need to add up
+ * others in order to feed vac_estimate_reltuples.
+ */
+ switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
+ {
+ case HEAPTUPLE_RECENTLY_DEAD:
+ misc_count++;
+ /* Fall through */
+ case HEAPTUPLE_DEAD:
+ stat->dead_tuple_len += tuple.t_len;
+ stat->dead_tuple_count++;
+ break;
+ case HEAPTUPLE_LIVE:
+ stat->tuple_len += tuple.t_len;
+ stat->tuple_count++;
+ break;
+ case HEAPTUPLE_INSERT_IN_PROGRESS:
+ case HEAPTUPLE_DELETE_IN_PROGRESS:
+ misc_count++;
+ break;
+ default:
+ elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
+ break;
+ }
+ }
+
+ UnlockReleaseBuffer(buf);
+ }
+
+ stat->table_len = (uint64) nblocks * BLCKSZ;
+ stat->tuple_count = vac_estimate_reltuples(rel, false, nblocks, scanned,
+ stat->tuple_count+misc_count);
+
+ /*
+ * Calculate percentages if the relation has one or more pages.
+ */
+ if (nblocks != 0)
+ {
+ stat->scanned_percent = 100 * scanned / nblocks;
+ stat->tuple_percent = 100.0 * stat->tuple_len / stat->table_len;
+ stat->dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len;
+ stat->free_percent = 100.0 * stat->free_space / stat->table_len;
+ }
+
+ if (BufferIsValid(vmbuffer))
+ {
+ ReleaseBuffer(vmbuffer);
+ vmbuffer = InvalidBuffer;
+ }
+}
+
+/*
+ * Returns estimated live/dead tuple statistics for the given relid.
+ */
+Datum
+pgstattuple_approx(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ Relation rel;
+ output_type stat = {0};
+ TupleDesc tupdesc;
+ bool nulls[NUM_OUTPUT_COLUMNS];
+ Datum values[NUM_OUTPUT_COLUMNS];
+ HeapTuple ret;
+ int i = 0;
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ (errmsg("must be superuser to use pgstattuple functions"))));
+
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+
+ if (tupdesc->natts != NUM_OUTPUT_COLUMNS)
+ elog(ERROR, "incorrect number of output arguments");
+
+ rel = relation_open(relid, AccessShareLock);
+
+ /*
+ * Reject attempts to read non-local temporary relations; we would be
+ * likely to get wrong data since we have no visibility into the owning
+ * session's local buffers.
+ */
+ if (RELATION_IS_OTHER_TEMP(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot access temporary tables of other sessions")));
+
+ /*
+ * We support only ordinary relations and materialised views,
+ * because we depend on the visibility map and free space map
+ * for our estimates about unscanned pages.
+ */
+ if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
+ rel->rd_rel->relkind == RELKIND_MATVIEW))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("\"%s\" is not a table or materialized view",
+ RelationGetRelationName(rel))));
+
+ statapprox_heap(rel, &stat);
+
+ relation_close(rel, AccessShareLock);
+
+ memset(nulls, 0, sizeof(nulls));
+
+ values[i++] = Int64GetDatum(stat.table_len);
+ values[i++] = Float8GetDatum(stat.scanned_percent);
+ values[i++] = Int64GetDatum(stat.tuple_count);
+ values[i++] = Int64GetDatum(stat.tuple_len);
+ values[i++] = Float8GetDatum(stat.tuple_percent);
+ values[i++] = Int64GetDatum(stat.dead_tuple_count);
+ values[i++] = Int64GetDatum(stat.dead_tuple_len);
+ values[i++] = Float8GetDatum(stat.dead_tuple_percent);
+ values[i++] = Int64GetDatum(stat.free_space);
+ values[i++] = Float8GetDatum(stat.free_percent);
+
+ ret = heap_form_tuple(tupdesc, values, nulls);
+ return HeapTupleGetDatum(ret);
+}
diff --git a/contrib/pgstattuple/pgstattuple--1.2--1.3.sql b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql
new file mode 100644
index 00000000000..99301a27caf
--- /dev/null
+++ b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql
@@ -0,0 +1,18 @@
+/* contrib/pgstattuple/pgstattuple--1.2--1.3.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.3'" to load this file. \quit
+
+CREATE FUNCTION pgstattuple_approx(IN reloid regclass,
+ OUT table_len BIGINT, -- physical table length in bytes
+ OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned
+ OUT approx_tuple_count BIGINT, -- estimated number of live tuples
+ OUT approx_tuple_len BIGINT, -- estimated total length in bytes of live tuples
+ OUT approx_tuple_percent FLOAT8, -- live tuples in % (based on estimate)
+ OUT dead_tuple_count BIGINT, -- exact number of dead tuples
+ OUT dead_tuple_len BIGINT, -- exact total length in bytes of dead tuples
+ OUT dead_tuple_percent FLOAT8, -- dead tuples in % (based on estimate)
+ OUT approx_free_space BIGINT, -- estimated free space in bytes
+ OUT approx_free_percent FLOAT8) -- free space in % (based on estimate)
+AS 'MODULE_PATHNAME', 'pgstattuple_approx'
+LANGUAGE C STRICT;
diff --git a/contrib/pgstattuple/pgstattuple--1.2.sql b/contrib/pgstattuple/pgstattuple--1.3.sql
index e5fa2f58dab..f3996e74a87 100644
--- a/contrib/pgstattuple/pgstattuple--1.2.sql
+++ b/contrib/pgstattuple/pgstattuple--1.3.sql
@@ -1,4 +1,4 @@
-/* contrib/pgstattuple/pgstattuple--1.2.sql */
+/* contrib/pgstattuple/pgstattuple--1.3.sql */
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "CREATE EXTENSION pgstattuple" to load this file. \quit
@@ -77,3 +77,19 @@ CREATE FUNCTION pg_relpages(IN relname regclass)
RETURNS BIGINT
AS 'MODULE_PATHNAME', 'pg_relpagesbyid'
LANGUAGE C STRICT;
+
+/* New stuff in 1.3 begins here */
+
+CREATE FUNCTION pgstattuple_approx(IN reloid regclass,
+ OUT table_len BIGINT, -- physical table length in bytes
+ OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned
+ OUT approx_tuple_count BIGINT, -- estimated number of live tuples
+ OUT approx_tuple_len BIGINT, -- estimated total length in bytes of live tuples
+ OUT approx_tuple_percent FLOAT8, -- live tuples in % (based on estimate)
+ OUT dead_tuple_count BIGINT, -- exact number of dead tuples
+ OUT dead_tuple_len BIGINT, -- exact total length in bytes of dead tuples
+ OUT dead_tuple_percent FLOAT8, -- dead tuples in % (based on estimate)
+ OUT approx_free_space BIGINT, -- estimated free space in bytes
+ OUT approx_free_percent FLOAT8) -- free space in % (based on estimate)
+AS 'MODULE_PATHNAME', 'pgstattuple_approx'
+LANGUAGE C STRICT;
diff --git a/contrib/pgstattuple/pgstattuple.control b/contrib/pgstattuple/pgstattuple.control
index a7cf47fd921..c03b180143f 100644
--- a/contrib/pgstattuple/pgstattuple.control
+++ b/contrib/pgstattuple/pgstattuple.control
@@ -1,5 +1,5 @@
# pgstattuple extension
comment = 'show tuple-level statistics'
-default_version = '1.2'
+default_version = '1.3'
module_pathname = '$libdir/pgstattuple'
relocatable = true
diff --git a/doc/src/sgml/pgstattuple.sgml b/doc/src/sgml/pgstattuple.sgml
index 9cabd71166e..b31fee9b0e1 100644
--- a/doc/src/sgml/pgstattuple.sgml
+++ b/doc/src/sgml/pgstattuple.sgml
@@ -358,6 +358,140 @@ pending_tuples | 0
</listitem>
</varlistentry>
+ <varlistentry>
+ <term>
+ <indexterm>
+ <primary>pgstattuple_approx</primary>
+ </indexterm>
+ <function>pgstattuple_approx(regclass) returns record</>
+ </term>
+
+ <listitem>
+ <para>
+ <function>pgstattuple_approx</function> is a faster alternative to
+ <function>pgstattuple</function> that returns approximate results.
+ The argument is the target relation's OID.
+ For example:
+<programlisting>
+test=> SELECT * FROM pgstattuple_approx('pg_catalog.pg_proc'::regclass);
+-[ RECORD 1 ]--------+-------
+table_len | 573440
+scanned_percent | 2
+approx_tuple_count | 2740
+approx_tuple_len | 561210
+approx_tuple_percent | 97.87
+dead_tuple_count | 0
+dead_tuple_len | 0
+dead_tuple_percent | 0
+approx_free_space | 11996
+approx_free_percent | 2.09
+</programlisting>
+ The output columns are described in <xref linkend="pgstatapprox-columns">.
+ </para>
+
+ <para>
+ Whereas <function>pgstattuple</function> always performs a
+ full-table scan and returns an exact count of live and dead tuples
+ (and their sizes) and free space, <function>pgstattuple_approx</function>
+ tries to avoid the full-table scan and returns exact dead tuple
+ statistics along with an approximation of the number and
+ size of live tuples and free space.
+ </para>
+
+ <para>
+ It does this by skipping pages that have only visible tuples
+ according to the visibility map (if a page has the corresponding VM
+ bit set, then it is assumed to contain no dead tuples). For such
+ pages, it derives the free space value from the free space map, and
+ assumes that the rest of the space on the page is taken up by live
+ tuples.
+ </para>
+
+ <para>
+ For pages that cannot be skipped, it scans each tuple, recording its
+ presence and size in the appropriate counters, and adding up the
+ free space on the page. At the end, it estimates the total number of
+ live tuples based on the number of pages and tuples scanned (in the
+ same way that VACUUM estimates pg_class.reltuples).
+ </para>
+
+ <table id="pgstatapprox-columns">
+ <title><function>pgstattuple_approx</function> Output Columns</title>
+ <tgroup cols="3">
+ <thead>
+ <row>
+ <entry>Column</entry>
+ <entry>Type</entry>
+ <entry>Description</entry>
+ </row>
+ </thead>
+
+ <tbody>
+ <row>
+ <entry><structfield>table_len</structfield></entry>
+ <entry><type>bigint</type></entry>
+ <entry>Physical relation length in bytes (exact)</entry>
+ </row>
+ <row>
+ <entry><structfield>scanned_percent</structfield></entry>
+ <entry><type>float8</type></entry>
+ <entry>Percentage of table scanned</entry>
+ </row>
+ <row>
+ <entry><structfield>tuple_count</structfield></entry>
+ <entry><type>bigint</type></entry>
+ <entry>Number of live tuples (estimated)</entry>
+ </row>
+ <row>
+ <entry><structfield>tuple_len</structfield></entry>
+ <entry><type>bigint</type></entry>
+ <entry>Total length of live tuples in bytes (estimated)</entry>
+ </row>
+ <row>
+ <entry><structfield>tuple_percent</structfield></entry>
+ <entry><type>float8</type></entry>
+ <entry>Percentage of live tuples</entry>
+ </row>
+ <row>
+ <entry><structfield>dead_tuple_count</structfield></entry>
+ <entry><type>bigint</type></entry>
+ <entry>Number of dead tuples (exact)</entry>
+ </row>
+ <row>
+ <entry><structfield>dead_tuple_len</structfield></entry>
+ <entry><type>bigint</type></entry>
+ <entry>Total length of dead tuples in bytes (exact)</entry>
+ </row>
+ <row>
+ <entry><structfield>dead_tuple_percent</structfield></entry>
+ <entry><type>float8</type></entry>
+ <entry>Percentage of dead tuples</entry>
+ </row>
+ <row>
+ <entry><structfield>approx_free_space</structfield></entry>
+ <entry><type>bigint</type></entry>
+ <entry>Total free space in bytes (estimated)</entry>
+ </row>
+ <row>
+ <entry><structfield>approx_free_percent</structfield></entry>
+ <entry><type>float8</type></entry>
+ <entry>Percentage of free space</entry>
+ </row>
+
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>
+ In the above output, the free space figures may not match the
+ <function>pgstattuple</function> output exactly, because the free
+ space map gives us an exact figure, but is not guaranteed to be
+ accurate to the byte.
+ </para>
+
+ </listitem>
+ </varlistentry>
+
</variablelist>
</sect2>
@@ -365,7 +499,7 @@ pending_tuples | 0
<title>Authors</title>
<para>
- Tatsuo Ishii and Satoshi Nagayasu
+ Tatsuo Ishii, Satoshi Nagayasu and Abhijit Menon-Sen
</para>
</sect2>