aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/src/sgml/catalogs.sgml94
-rw-r--r--doc/src/sgml/ref/allfiles.sgml5
-rw-r--r--doc/src/sgml/ref/commit_prepared.sgml111
-rw-r--r--doc/src/sgml/ref/prepare_transaction.sgml160
-rw-r--r--doc/src/sgml/ref/rollback_prepared.sgml111
-rw-r--r--doc/src/sgml/reference.sgml5
-rw-r--r--doc/src/sgml/runtime.sgml31
-rw-r--r--src/backend/access/transam/Makefile4
-rw-r--r--src/backend/access/transam/subtrans.c19
-rw-r--r--src/backend/access/transam/transam.c25
-rw-r--r--src/backend/access/transam/twophase.c1659
-rw-r--r--src/backend/access/transam/twophase_rmgr.c49
-rw-r--r--src/backend/access/transam/xact.c640
-rw-r--r--src/backend/access/transam/xlog.c92
-rw-r--r--src/backend/catalog/system_views.sql59
-rw-r--r--src/backend/commands/async.c57
-rw-r--r--src/backend/nodes/copyfuncs.c3
-rw-r--r--src/backend/nodes/equalfuncs.c3
-rw-r--r--src/backend/parser/gram.y42
-rw-r--r--src/backend/parser/keywords.c3
-rw-r--r--src/backend/postmaster/postmaster.c12
-rw-r--r--src/backend/storage/ipc/ipci.c27
-rw-r--r--src/backend/storage/ipc/procarray.c140
-rw-r--r--src/backend/storage/lmgr/lmgr.c7
-rw-r--r--src/backend/storage/lmgr/lock.c537
-rw-r--r--src/backend/storage/lmgr/proc.c53
-rw-r--r--src/backend/storage/smgr/smgr.c28
-rw-r--r--src/backend/tcop/postgres.c5
-rw-r--r--src/backend/tcop/utility.c38
-rw-r--r--src/backend/utils/cache/inval.c101
-rw-r--r--src/backend/utils/init/flatfiles.c71
-rw-r--r--src/backend/utils/init/postinit.c6
-rw-r--r--src/backend/utils/misc/guc.c12
-rw-r--r--src/backend/utils/misc/postgresql.conf.sample1
-rw-r--r--src/backend/utils/mmgr/portalmem.c44
-rw-r--r--src/bin/initdb/initdb.c3
-rw-r--r--src/bin/psql/common.c17
-rw-r--r--src/include/access/subtrans.h4
-rw-r--r--src/include/access/twophase.h49
-rw-r--r--src/include/access/twophase_rmgr.h39
-rw-r--r--src/include/access/xact.h38
-rw-r--r--src/include/catalog/catversion.h4
-rw-r--r--src/include/catalog/pg_proc.h4
-rw-r--r--src/include/commands/async.h6
-rw-r--r--src/include/nodes/parsenodes.h8
-rw-r--r--src/include/storage/ipc.h6
-rw-r--r--src/include/storage/lmgr.h4
-rw-r--r--src/include/storage/lock.h17
-rw-r--r--src/include/storage/lwlock.h3
-rw-r--r--src/include/storage/proc.h25
-rw-r--r--src/include/storage/procarray.h18
-rw-r--r--src/include/storage/smgr.h3
-rw-r--r--src/include/utils/builtins.h5
-rw-r--r--src/include/utils/flatfiles.h6
-rw-r--r--src/include/utils/inval.h9
-rw-r--r--src/include/utils/portal.h3
-rw-r--r--src/test/regress/expected/prepared_xacts.out213
-rw-r--r--src/test/regress/expected/rules.out3
-rw-r--r--src/test/regress/parallel_schedule2
-rw-r--r--src/test/regress/serial_schedule3
-rw-r--r--src/test/regress/sql/prepared_xacts.sql137
61 files changed, 4449 insertions, 434 deletions
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 41d7a4e34d5..c5473b9501b 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -1,6 +1,6 @@
<!--
Documentation of the system catalogs, directed toward PostgreSQL developers
- $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.103 2005/06/13 23:14:47 tgl Exp $
+ $PostgreSQL: pgsql/doc/src/sgml/catalogs.sgml,v 2.104 2005/06/17 22:32:41 tgl Exp $
-->
<chapter id="catalogs">
@@ -3933,6 +3933,11 @@
</row>
<row>
+ <entry><link linkend="view-pg-prepared-xacts"><structname>pg_prepared_xacts</structname></link></entry>
+ <entry>currently prepared transactions</entry>
+ </row>
+
+ <row>
<entry><link linkend="view-pg-rules"><structname>pg_rules</structname></link></entry>
<entry>rules</entry>
</row>
@@ -4167,8 +4172,10 @@
<entry><structfield>pid</structfield></entry>
<entry><type>integer</type></entry>
<entry></entry>
- <entry>process ID of the server process holding or awaiting this
- lock</entry>
+ <entry>
+ Process ID of the server process holding or awaiting this
+ lock. Zero if the lock is held by a prepared transaction.
+ </entry>
</row>
<row>
<entry><structfield>mode</structfield></entry>
@@ -4250,6 +4257,87 @@
</sect1>
+ <sect1 id="view-pg-prepared-xacts">
+ <title><structname>pg_prepared_xacts</structname></title>
+
+ <indexterm zone="view-pg-prepared-xacts">
+ <primary>pg_prepared_xacts</primary>
+ </indexterm>
+
+ <para>
+ The view <structname>pg_prepared_xacts</structname> displays
+ information about transactions that are currently prepared for two-phase
+ commit (see <xref linkend="sql-prepare-transaction"
+ endterm="sql-prepare-transaction-title"> for details).
+ </para>
+
+ <para>
+ <structname>pg_prepared_xacts</structname> contains one row per prepared
+ transaction. An entry is removed when the transaction is committed or
+ rolled back.
+ </para>
+
+ <table>
+ <title><structname>pg_prepared_xacts</> Columns</title>
+
+ <tgroup cols=4>
+ <thead>
+ <row>
+ <entry>Name</entry>
+ <entry>Type</entry>
+ <entry>References</entry>
+ <entry>Description</entry>
+ </row>
+ </thead>
+ <tbody>
+ <row>
+ <entry><structfield>transaction</structfield></entry>
+ <entry><type>xid</type></entry>
+ <entry></entry>
+ <entry>
+ Numeric transaction identifier of the prepared transaction
+ </entry>
+ </row>
+ <row>
+ <entry><structfield>gid</structfield></entry>
+ <entry><type>text</type></entry>
+ <entry></entry>
+ <entry>
+ Global transaction identifier that was assigned to the transaction
+ </entry>
+ </row>
+ <row>
+ <entry><structfield>owner</structfield></entry>
+ <entry><type>name</type></entry>
+ <entry><literal><link linkend="catalog-pg-shadow"><structname>pg_shadow</structname></link>.usename</literal></entry>
+ <entry>
+ Name of the user that executed the transaction
+ </entry>
+ </row>
+ <row>
+ <entry><structfield>database</structfield></entry>
+ <entry><type>name</type></entry>
+ <entry><literal><link linkend="catalog-pg-database"><structname>pg_database</structname></link>.datname</literal></entry>
+ <entry>
+ Name of the database in which the transaction was executed
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>
+ When the <structname>pg_prepared_xacts</structname> view is accessed, the
+ internal transaction manager data structures are momentarily locked, and
+ a copy is made for the view to display. This ensures that the
+ view produces a consistent set of results, while not blocking
+ normal operations longer than necessary. Nonetheless
+ there could be some impact on database performance if this view is
+ read often.
+ </para>
+
+ </sect1>
+
<sect1 id="view-pg-rules">
<title><structname>pg_rules</structname></title>
diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml
index 6326f96f72f..33e9e68b9d5 100644
--- a/doc/src/sgml/ref/allfiles.sgml
+++ b/doc/src/sgml/ref/allfiles.sgml
@@ -1,5 +1,5 @@
<!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/allfiles.sgml,v 1.62 2004/08/21 16:16:04 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/allfiles.sgml,v 1.63 2005/06/17 22:32:42 tgl Exp $
PostgreSQL documentation
Complete list of usable sgml source files in this directory.
-->
@@ -30,6 +30,7 @@ Complete list of usable sgml source files in this directory.
<!entity cluster system "cluster.sgml">
<!entity commentOn system "comment.sgml">
<!entity commit system "commit.sgml">
+<!entity commitPrepared system "commit_prepared.sgml">
<!entity copyTable system "copy.sgml">
<!entity createAggregate system "create_aggregate.sgml">
<!entity createCast system "create_cast.sgml">
@@ -88,11 +89,13 @@ Complete list of usable sgml source files in this directory.
<!entity move system "move.sgml">
<!entity notify system "notify.sgml">
<!entity prepare system "prepare.sgml">
+<!entity prepareTransaction system "prepare_transaction.sgml">
<!entity reindex system "reindex.sgml">
<!entity releaseSavepoint system "release_savepoint.sgml">
<!entity reset system "reset.sgml">
<!entity revoke system "revoke.sgml">
<!entity rollback system "rollback.sgml">
+<!entity rollbackPrepared system "rollback_prepared.sgml">
<!entity rollbackTo system "rollback_to.sgml">
<!entity savepoint system "savepoint.sgml">
<!entity select system "select.sgml">
diff --git a/doc/src/sgml/ref/commit_prepared.sgml b/doc/src/sgml/ref/commit_prepared.sgml
new file mode 100644
index 00000000000..b18175815b2
--- /dev/null
+++ b/doc/src/sgml/ref/commit_prepared.sgml
@@ -0,0 +1,111 @@
+<!--
+$PostgreSQL: pgsql/doc/src/sgml/ref/commit_prepared.sgml,v 1.1 2005/06/17 22:32:42 tgl Exp $
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-COMMIT-PREPARED">
+ <refmeta>
+ <refentrytitle id="sql-commit-prepared-title">COMMIT PREPARED</refentrytitle>
+ <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+ <refname>COMMIT PREPARED</refname>
+ <refpurpose>commit a transaction that was earlier prepared for two-phase commit</refpurpose>
+ </refnamediv>
+
+ <indexterm zone="sql-commit-prepared">
+ <primary>COMMIT PREPARED</primary>
+ </indexterm>
+
+ <refsynopsisdiv>
+<synopsis>
+COMMIT PREPARED <replaceable class="PARAMETER">transaction_id</replaceable>
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>
+ <command>COMMIT PREPARED</command> commits a transaction that is in
+ prepared state.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Parameters</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><replaceable class="PARAMETER">transaction_id</replaceable></term>
+ <listitem>
+ <para>
+ The transaction identifier of the transaction that is to be
+ committed.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Notes</title>
+
+ <para>
+ To commit a prepared transaction, you must be either the same user that
+ executed the transaction originally, or a superuser. But you do not
+ have to be in the same session that executed the transaction.
+ </para>
+
+ <para>
+ This command cannot be executed inside a transaction block. The prepared
+ transaction is committed immediately.
+ </para>
+
+ <para>
+ All currently available prepared transactions are listed in the
+ <structname>pg_prepared_xacts</> system view.
+ </para>
+ </refsect1>
+
+ <refsect1 id="sql-commit-prepared-examples">
+ <title id="sql-commit-prepared-examples-title">Examples</title>
+ <para>
+ Commit the transaction identified by the transaction
+ identifier <literal>foobar</>:
+
+<programlisting>
+COMMIT PREPARED 'foobar';
+</programlisting>
+ </para>
+
+ </refsect1>
+
+ <refsect1>
+ <title>See Also</title>
+
+ <simplelist type="inline">
+ <member><xref linkend="sql-prepare-transaction" endterm="sql-prepare-transaction-title"></member>
+ <member><xref linkend="sql-rollback-prepared" endterm="sql-rollback-prepared-title"></member>
+ </simplelist>
+ </refsect1>
+
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:nil
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:1
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:"../reference.ced"
+sgml-exposed-tags:nil
+sgml-local-catalogs:"/usr/lib/sgml/catalog"
+sgml-local-ecat-files:nil
+End:
+-->
diff --git a/doc/src/sgml/ref/prepare_transaction.sgml b/doc/src/sgml/ref/prepare_transaction.sgml
new file mode 100644
index 00000000000..773689ae06d
--- /dev/null
+++ b/doc/src/sgml/ref/prepare_transaction.sgml
@@ -0,0 +1,160 @@
+<!--
+$PostgreSQL: pgsql/doc/src/sgml/ref/prepare_transaction.sgml,v 1.1 2005/06/17 22:32:42 tgl Exp $
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-PREPARE-TRANSACTION">
+ <refmeta>
+ <refentrytitle id="sql-prepare-transaction-title">PREPARE TRANSACTION</refentrytitle>
+ <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+ <refname>PREPARE TRANSACTION</refname>
+ <refpurpose>prepare the current transaction for two-phase commit</refpurpose>
+ </refnamediv>
+
+ <indexterm zone="sql-prepare-transaction">
+ <primary>PREPARE TRANSACTION</primary>
+ </indexterm>
+
+ <refsynopsisdiv>
+<synopsis>
+PREPARE TRANSACTION <replaceable class="PARAMETER">transaction_id</replaceable>
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>
+ <command>PREPARE TRANSACTION</command> prepares the current transaction
+ for two-phase commit. After this command, the transaction is no longer
+ associated with the current session; instead, its state is fully stored on
+ disk, and there is a very high probability that it can be committed
+ successfully, even if a database crash occurs before the commit is
+ requested.
+ </para>
+
+ <para>
+ Once prepared, a transaction can later be committed or rolled
+ back with <command>COMMIT PREPARED</command> or
+ <command>ROLLBACK PREPARED</command>, respectively. Those commands
+ can be issued from any session, not only the one that executed the
+ original transaction.
+ </para>
+
+ <para>
+ From the point of view of the issuing session, <command>PREPARE
+ TRANSACTION</command> is not unlike a <command>ROLLBACK</> command:
+ after executing it, there is no active current transaction, and the
+ effects of the prepared transaction are no longer visible. (The effects
+ will become visible again if the transaction is committed.)
+ </para>
+
+ <para>
+ If the <command>PREPARE TRANSACTION</command> command fails for any
+ reason, it becomes a <command>ROLLBACK</>: the current transaction
+ is canceled.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Parameters</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><replaceable class="PARAMETER">transaction_id</replaceable></term>
+ <listitem>
+ <para>
+ An arbitrary identifier that later identifies this transaction for
+ <command>COMMIT PREPARED</> or <command>ROLLBACK PREPARED</>.
+ The identifier must be written as a string literal, and must be
+ less than 200 bytes long. It must not be the same as the identifier
+ used for any currently prepared transaction.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Notes</title>
+
+ <para>
+ This command must be used inside a transaction block. Use
+ <command>BEGIN</command> to start one.
+ </para>
+
+ <para>
+ It is not currently allowed to <command>PREPARE</> a transaction that
+ has executed any operations involving temporary tables nor
+ created any cursors <literal>WITH HOLD</>. Those features are too tightly
+ tied to the current session to be useful in a transaction to be prepared.
+ </para>
+
+ <para>
+ If the transaction modified any run-time parameters with <command>SET</>,
+ those effects persist after <command>PREPARE TRANSACTION</>, and will not
+ be affected by any later <command>COMMIT PREPARED</command> or
+ <command>ROLLBACK PREPARED</command>. Thus, in this one respect
+ <command>PREPARE TRANSACTION</> acts more like <command>COMMIT</> than
+ <command>ROLLBACK</>.
+ </para>
+
+ <para>
+ All currently available prepared transactions are listed in the
+ <structname>pg_prepared_xacts</> system view.
+ </para>
+
+ <para>
+ From a performance standpoint, it is unwise to leave transactions in
+ the prepared state for a long time: this will for instance interfere with
+ the ability of <command>VACUUM</> to reclaim storage. Keep in mind also
+ that the transaction continues to hold whatever locks it held.
+ The intended
+ usage of the feature is that a prepared transaction will normally be
+ committed or rolled back as soon as an external transaction manager
+ has verified that other databases are also prepared to commit.
+ </para>
+ </refsect1>
+
+ <refsect1 id="sql-prepare-transaction-examples">
+ <title id="sql-prepare-transaction-examples-title">Examples</title>
+ <para>
+ Prepare the current transaction for two-phase commit, using
+ <literal>foobar</> as the transaction identifier:
+
+<programlisting>
+PREPARE TRANSACTION 'foobar';
+</programlisting>
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>See Also</title>
+
+ <simplelist type="inline">
+ <member><xref linkend="sql-commit-prepared" endterm="sql-commit-prepared-title"></member>
+ <member><xref linkend="sql-rollback-prepared" endterm="sql-rollback-prepared-title"></member>
+ </simplelist>
+ </refsect1>
+
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:nil
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:1
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:"../reference.ced"
+sgml-exposed-tags:nil
+sgml-local-catalogs:"/usr/lib/sgml/catalog"
+sgml-local-ecat-files:nil
+End:
+-->
diff --git a/doc/src/sgml/ref/rollback_prepared.sgml b/doc/src/sgml/ref/rollback_prepared.sgml
new file mode 100644
index 00000000000..51df9226321
--- /dev/null
+++ b/doc/src/sgml/ref/rollback_prepared.sgml
@@ -0,0 +1,111 @@
+<!--
+$PostgreSQL: pgsql/doc/src/sgml/ref/rollback_prepared.sgml,v 1.1 2005/06/17 22:32:42 tgl Exp $
+PostgreSQL documentation
+-->
+
+<refentry id="SQL-ROLLBACK-PREPARED">
+ <refmeta>
+ <refentrytitle id="sql-rollback-prepared-title">ROLLBACK PREPARED</refentrytitle>
+ <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+
+ <refnamediv>
+ <refname>ROLLBACK PREPARED</refname>
+ <refpurpose>cancel a transaction that was earlier prepared for two-phase commit</refpurpose>
+ </refnamediv>
+
+ <indexterm zone="sql-rollback-prepared">
+ <primary>ROLLBACK PREPARED</primary>
+ </indexterm>
+
+ <refsynopsisdiv>
+<synopsis>
+ROLLBACK PREPARED <replaceable class="PARAMETER">transaction_id</replaceable>
+</synopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>
+ <command>ROLLBACK PREPARED</command> rolls back a transaction that is in
+ prepared state.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Parameters</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><replaceable class="PARAMETER">transaction_id</replaceable></term>
+ <listitem>
+ <para>
+ The transaction identifier of the transaction that is to be
+ rolled back.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Notes</title>
+
+ <para>
+ To roll back a prepared transaction, you must be either the same user that
+ executed the transaction originally, or a superuser. But you do not
+ have to be in the same session that executed the transaction.
+ </para>
+
+ <para>
+ This command cannot be executed inside a transaction block. The prepared
+ transaction is rolled back immediately.
+ </para>
+
+ <para>
+ All currently available prepared transactions are listed in the
+ <structname>pg_prepared_xacts</> system view.
+ </para>
+ </refsect1>
+
+ <refsect1 id="sql-rollback-prepared-examples">
+ <title id="sql-rollback-prepared-examples-title">Examples</title>
+ <para>
+ Roll back the transaction identified by the transaction
+ identifier <literal>foobar</>:
+
+<programlisting>
+ROLLBACK PREPARED 'foobar';
+</programlisting>
+ </para>
+
+ </refsect1>
+
+ <refsect1>
+ <title>See Also</title>
+
+ <simplelist type="inline">
+ <member><xref linkend="sql-prepare-transaction" endterm="sql-prepare-transaction-title"></member>
+ <member><xref linkend="sql-commit-prepared" endterm="sql-commit-prepared-title"></member>
+ </simplelist>
+ </refsect1>
+
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:nil
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:1
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:"../reference.ced"
+sgml-exposed-tags:nil
+sgml-local-catalogs:"/usr/lib/sgml/catalog"
+sgml-local-ecat-files:nil
+End:
+-->
diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml
index 01dc578b9d3..4edec85c122 100644
--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -1,5 +1,5 @@
<!-- reference.sgml
-$PostgreSQL: pgsql/doc/src/sgml/reference.sgml,v 1.52 2004/08/21 16:16:03 tgl Exp $
+$PostgreSQL: pgsql/doc/src/sgml/reference.sgml,v 1.53 2005/06/17 22:32:42 tgl Exp $
PostgreSQL Reference Manual
-->
@@ -62,6 +62,7 @@ PostgreSQL Reference Manual
&cluster;
&commentOn;
&commit;
+ &commitPrepared;
&copyTable;
&createAggregate;
&createCast;
@@ -120,11 +121,13 @@ PostgreSQL Reference Manual
&move;
&notify;
&prepare;
+ &prepareTransaction;
&reindex;
&releaseSavepoint;
&reset;
&revoke;
&rollback;
+ &rollbackPrepared;
&rollbackTo;
&savepoint;
&select;
diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml
index dfb86511c3e..1a2a9935cc3 100644
--- a/doc/src/sgml/runtime.sgml
+++ b/doc/src/sgml/runtime.sgml
@@ -1,5 +1,5 @@
<!--
-$PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.328 2005/06/17 13:12:01 momjian Exp $
+$PostgreSQL: pgsql/doc/src/sgml/runtime.sgml,v 1.329 2005/06/17 22:32:42 tgl Exp $
-->
<chapter Id="runtime">
@@ -956,7 +956,7 @@ SET ENABLE_SEQSCAN TO OFF;
<para>
Sets the location of the Kerberos server key file. See
<xref linkend="kerberos-auth"> for details. This parameter
- can only be set at server start.
+ can only be set at server start.
</para>
</listitem>
</varlistentry>
@@ -1113,6 +1113,33 @@ SET ENABLE_SEQSCAN TO OFF;
</listitem>
</varlistentry>
+ <varlistentry id="guc-max-prepared-transactions" xreflabel="max_prepared_transactions">
+ <term><varname>max_prepared_transactions</varname> (<type>integer</type>)</term>
+ <indexterm>
+ <primary><varname>max_prepared_transactions</> configuration parameter</primary>
+ </indexterm>
+ <listitem>
+ <para>
+ Sets the maximum number of transactions that can be in the
+ <quote>prepared</> state simultaneously (see <xref
+ linkend="sql-prepare-transaction"
+ endterm="sql-prepare-transaction-title">).
+ Setting this parameter to zero disables the prepared-transaction
+ feature.
+ The default is 50.
+ This option can only be set at server start.
+ </para>
+
+ <para>
+ Increasing this parameter may cause <productname>PostgreSQL</>
+ to request more <systemitem class="osname">System V</> shared
+ memory than your operating system's default configuration
+ allows. See <xref linkend="sysvipc"> for information on how to
+ adjust those parameters, if necessary.
+ </para>
+ </listitem>
+ </varlistentry>
+
<varlistentry id="guc-work-mem" xreflabel="work_mem">
<term><varname>work_mem</varname> (<type>integer</type>)</term>
<indexterm>
diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile
index 295ecf9e14f..c6ecef17246 100644
--- a/src/backend/access/transam/Makefile
+++ b/src/backend/access/transam/Makefile
@@ -4,7 +4,7 @@
# Makefile for access/transam
#
# IDENTIFICATION
-# $PostgreSQL: pgsql/src/backend/access/transam/Makefile,v 1.20 2005/04/28 21:47:10 tgl Exp $
+# $PostgreSQL: pgsql/src/backend/access/transam/Makefile,v 1.21 2005/06/17 22:32:42 tgl Exp $
#
#-------------------------------------------------------------------------
@@ -12,7 +12,7 @@ subdir = src/backend/access/transam
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = clog.o transam.o varsup.o xact.o xlog.o xlogutils.o rmgr.o slru.o subtrans.o multixact.o
+OBJS = clog.o transam.o varsup.o xact.o xlog.o xlogutils.o rmgr.o slru.o subtrans.o multixact.o twophase.o twophase_rmgr.o
all: SUBSYS.o
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index 0b774363888..cea778d6a1c 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -22,7 +22,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/access/transam/subtrans.c,v 1.8 2005/05/19 21:35:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/subtrans.c,v 1.9 2005/06/17 22:32:42 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -222,22 +222,33 @@ ZeroSUBTRANSPage(int pageno)
/*
* This must be called ONCE during postmaster or standalone-backend startup,
* after StartupXLOG has initialized ShmemVariableCache->nextXid.
+ *
+ * oldestActiveXID is the oldest XID of any prepared transaction, or nextXid
+ * if there are none.
*/
void
-StartupSUBTRANS(void)
+StartupSUBTRANS(TransactionId oldestActiveXID)
{
int startPage;
+ int endPage;
/*
* Since we don't expect pg_subtrans to be valid across crashes, we
- * initialize the currently-active page to zeroes during startup.
+ * initialize the currently-active page(s) to zeroes during startup.
* Whenever we advance into a new page, ExtendSUBTRANS will likewise
* zero the new page without regard to whatever was previously on
* disk.
*/
LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
- startPage = TransactionIdToPage(ShmemVariableCache->nextXid);
+ startPage = TransactionIdToPage(oldestActiveXID);
+ endPage = TransactionIdToPage(ShmemVariableCache->nextXid);
+
+ while (startPage != endPage)
+ {
+ (void) ZeroSUBTRANSPage(startPage);
+ startPage++;
+ }
(void) ZeroSUBTRANSPage(startPage);
LWLockRelease(SubtransControlLock);
diff --git a/src/backend/access/transam/transam.c b/src/backend/access/transam/transam.c
index f88c25a37db..5fa6f82daf4 100644
--- a/src/backend/access/transam/transam.c
+++ b/src/backend/access/transam/transam.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/transam/transam.c,v 1.64 2005/02/20 21:46:48 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/transam.c,v 1.65 2005/06/17 22:32:42 tgl Exp $
*
* NOTES
* This file contains the high level access-method interface to the
@@ -173,6 +173,14 @@ TransactionIdDidCommit(TransactionId transactionId)
* recursively. However, if it's older than TransactionXmin, we can't
* look at pg_subtrans; instead assume that the parent crashed without
* cleaning up its children.
+ *
+ * Originally we Assert'ed that the result of SubTransGetParent was
+ * not zero. However with the introduction of prepared transactions,
+ * there can be a window just after database startup where we do not
+ * have complete knowledge in pg_subtrans of the transactions after
+ * TransactionXmin. StartupSUBTRANS() has ensured that any missing
+ * information will be zeroed. Since this case should not happen under
+ * normal conditions, it seems reasonable to emit a WARNING for it.
*/
if (xidstatus == TRANSACTION_STATUS_SUB_COMMITTED)
{
@@ -181,7 +189,12 @@ TransactionIdDidCommit(TransactionId transactionId)
if (TransactionIdPrecedes(transactionId, TransactionXmin))
return false;
parentXid = SubTransGetParent(transactionId);
- Assert(TransactionIdIsValid(parentXid));
+ if (!TransactionIdIsValid(parentXid))
+ {
+ elog(WARNING, "no pg_subtrans entry for subcommitted XID %u",
+ transactionId);
+ return false;
+ }
return TransactionIdDidCommit(parentXid);
}
@@ -224,7 +237,13 @@ TransactionIdDidAbort(TransactionId transactionId)
if (TransactionIdPrecedes(transactionId, TransactionXmin))
return true;
parentXid = SubTransGetParent(transactionId);
- Assert(TransactionIdIsValid(parentXid));
+ if (!TransactionIdIsValid(parentXid))
+ {
+ /* see notes in TransactionIdDidCommit */
+ elog(WARNING, "no pg_subtrans entry for subcommitted XID %u",
+ transactionId);
+ return true;
+ }
return TransactionIdDidAbort(parentXid);
}
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
new file mode 100644
index 00000000000..01cc50a6a46
--- /dev/null
+++ b/src/backend/access/transam/twophase.c
@@ -0,0 +1,1659 @@
+/*-------------------------------------------------------------------------
+ *
+ * twophase.c
+ * Two-phase commit support functions.
+ *
+ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.1 2005/06/17 22:32:42 tgl Exp $
+ *
+ * NOTES
+ * Each global transaction is associated with a global transaction
+ * identifier (GID). The client assigns a GID to a postgres
+ * transaction with the PREPARE TRANSACTION command.
+ *
+ * We keep all active global transactions in a shared memory array.
+ * When the PREPARE TRANSACTION command is issued, the GID is
+ * reserved for the transaction in the array. This is done before
+ * a WAL entry is made, because the reservation checks for duplicate
+ * GIDs and aborts the transaction if there already is a global
+ * transaction in prepared state with the same GID.
+ *
+ * A global transaction (gxact) also has a dummy PGPROC that is entered
+ * into the ProcArray array; this is what keeps the XID considered
+ * running by TransactionIdIsInProgress. It is also convenient as a
+ * PGPROC to hook the gxact's locks to.
+ *
+ * In order to survive crashes and shutdowns, all prepared
+ * transactions must be stored in permanent storage. This includes
+ * locking information, pending notifications etc. All that state
+ * information is written to the per-transaction state file in
+ * the pg_twophase directory.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "access/heapam.h"
+#include "access/subtrans.h"
+#include "access/twophase.h"
+#include "access/twophase_rmgr.h"
+#include "access/xact.h"
+#include "catalog/pg_type.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "storage/fd.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "storage/smgr.h"
+#include "utils/builtins.h"
+#include "pgstat.h"
+
+
+/*
+ * Directory where Two-phase commit files reside within PGDATA
+ */
+#define TWOPHASE_DIR "pg_twophase"
+
+/* GUC variable, can't be changed after startup */
+int max_prepared_xacts = 50;
+
+/*
+ * This struct describes one global transaction that is in prepared state
+ * or attempting to become prepared.
+ *
+ * The first component of the struct is a dummy PGPROC that is inserted
+ * into the global ProcArray so that the transaction appears to still be
+ * running and holding locks. It must be first because we cast pointers
+ * to PGPROC and pointers to GlobalTransactionData back and forth.
+ *
+ * The lifecycle of a global transaction is:
+ *
+ * 1. After checking that the requested GID is not in use, set up an
+ * entry in the TwoPhaseState->prepXacts array with the correct XID and GID,
+ * with locking_xid = my own XID and valid = false.
+ *
+ * 2. After successfully completing prepare, set valid = true and enter the
+ * contained PGPROC into the global ProcArray.
+ *
+ * 3. To begin COMMIT PREPARED or ROLLBACK PREPARED, check that the entry
+ * is valid and its locking_xid is no longer active, then store my current
+ * XID into locking_xid. This prevents concurrent attempts to commit or
+ * rollback the same prepared xact.
+ *
+ * 4. On completion of COMMIT PREPARED or ROLLBACK PREPARED, remove the entry
+ * from the ProcArray and the TwoPhaseState->prepXacts array and return it to
+ * the freelist.
+ *
+ * Note that if the preparing transaction fails between steps 1 and 2, the
+ * entry will remain in prepXacts until recycled. We can detect recyclable
+ * entries by checking for valid = false and locking_xid no longer active.
+ *
+ * typedef struct GlobalTransactionData *GlobalTransaction appears in
+ * twophase.h
+ */
+#define GIDSIZE 200
+
+typedef struct GlobalTransactionData
+{
+ PGPROC proc; /* dummy proc */
+ AclId owner; /* ID of user that executed the xact */
+ TransactionId locking_xid; /* top-level XID of backend working on xact */
+ bool valid; /* TRUE if fully prepared */
+ char gid[GIDSIZE]; /* The GID assigned to the prepared xact */
+} GlobalTransactionData;
+
+/*
+ * Two Phase Commit shared state. Access to this struct is protected
+ * by TwoPhaseStateLock.
+ */
+typedef struct TwoPhaseStateData
+{
+ /* Head of linked list of free GlobalTransactionData structs */
+ SHMEM_OFFSET freeGXacts;
+
+ /* Number of valid prepXacts entries. */
+ int numPrepXacts;
+
+ /*
+ * There are max_prepared_xacts items in this array, but C wants a
+ * fixed-size array.
+ */
+ GlobalTransaction prepXacts[1]; /* VARIABLE LENGTH ARRAY */
+} TwoPhaseStateData; /* VARIABLE LENGTH STRUCT */
+
+static TwoPhaseStateData *TwoPhaseState;
+
+
+static void RecordTransactionCommitPrepared(TransactionId xid,
+ int nchildren,
+ TransactionId *children,
+ int nrels,
+ RelFileNode *rels);
+static void RecordTransactionAbortPrepared(TransactionId xid,
+ int nchildren,
+ TransactionId *children,
+ int nrels,
+ RelFileNode *rels);
+static void ProcessRecords(char *bufptr, TransactionId xid,
+ const TwoPhaseCallback callbacks[]);
+
+
+/*
+ * Initialization of shared memory
+ */
+int
+TwoPhaseShmemSize(void)
+{
+ /* Need the fixed struct, the array of pointers, and the GTD structs */
+ return MAXALIGN(offsetof(TwoPhaseStateData, prepXacts) +
+ sizeof(GlobalTransaction) * max_prepared_xacts) +
+ sizeof(GlobalTransactionData) * max_prepared_xacts;
+}
+
+void
+TwoPhaseShmemInit(void)
+{
+ bool found;
+
+ TwoPhaseState = ShmemInitStruct("Prepared Transaction Table",
+ TwoPhaseShmemSize(),
+ &found);
+ if (!IsUnderPostmaster)
+ {
+ GlobalTransaction gxacts;
+ int i;
+
+ Assert(!found);
+ TwoPhaseState->freeGXacts = INVALID_OFFSET;
+ TwoPhaseState->numPrepXacts = 0;
+
+ /*
+ * Initialize the linked list of free GlobalTransactionData structs
+ */
+ gxacts = (GlobalTransaction)
+ ((char *) TwoPhaseState +
+ MAXALIGN(offsetof(TwoPhaseStateData, prepXacts) +
+ sizeof(GlobalTransaction) * max_prepared_xacts));
+ for (i = 0; i < max_prepared_xacts; i++)
+ {
+ gxacts[i].proc.links.next = TwoPhaseState->freeGXacts;
+ TwoPhaseState->freeGXacts = MAKE_OFFSET(&gxacts[i]);
+ }
+ }
+ else
+ Assert(found);
+}
+
+
+/*
+ * MarkAsPreparing
+ * Reserve the GID for the given transaction.
+ *
+ * Internally, this creates a gxact struct and puts it into the active array.
+ * NOTE: this is also used when reloading a gxact after a crash; so avoid
+ * assuming that we can use very much backend context.
+ */
+GlobalTransaction
+MarkAsPreparing(TransactionId xid, Oid databaseid, char *gid, AclId owner)
+{
+ GlobalTransaction gxact;
+ int i;
+
+ if (strlen(gid) >= GIDSIZE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("global transaction identifier \"%s\" is too long",
+ gid)));
+
+ LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
+
+ /*
+ * First, find and recycle any gxacts that failed during prepare.
+ * We do this partly to ensure we don't mistakenly say their GIDs
+ * are still reserved, and partly so we don't fail on out-of-slots
+ * unnecessarily.
+ */
+ for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
+ {
+ gxact = TwoPhaseState->prepXacts[i];
+ if (!gxact->valid && !TransactionIdIsActive(gxact->locking_xid))
+ {
+ /* It's dead Jim ... remove from the active array */
+ TwoPhaseState->numPrepXacts--;
+ TwoPhaseState->prepXacts[i] = TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts];
+ /* and put it back in the freelist */
+ gxact->proc.links.next = TwoPhaseState->freeGXacts;
+ TwoPhaseState->freeGXacts = MAKE_OFFSET(gxact);
+ /* Back up index count too, so we don't miss scanning one */
+ i--;
+ }
+ }
+
+ /* Check for conflicting GID */
+ for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
+ {
+ gxact = TwoPhaseState->prepXacts[i];
+ if (strcmp(gxact->gid, gid) == 0)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DUPLICATE_OBJECT),
+ errmsg("global transaction identifier \"%s\" is already in use",
+ gid)));
+ }
+ }
+
+ /* Get a free gxact from the freelist */
+ if (TwoPhaseState->freeGXacts == INVALID_OFFSET)
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("maximum number of prepared transactions reached"),
+ errhint("Increase max_prepared_transactions (currently %d).",
+ max_prepared_xacts)));
+ gxact = (GlobalTransaction) MAKE_PTR(TwoPhaseState->freeGXacts);
+ TwoPhaseState->freeGXacts = gxact->proc.links.next;
+
+ /* Initialize it */
+ MemSet(&gxact->proc, 0, sizeof(PGPROC));
+ SHMQueueElemInit(&(gxact->proc.links));
+ gxact->proc.waitStatus = STATUS_OK;
+ gxact->proc.xid = xid;
+ gxact->proc.xmin = InvalidTransactionId;
+ gxact->proc.pid = 0;
+ gxact->proc.databaseId = databaseid;
+ gxact->proc.lwWaiting = false;
+ gxact->proc.lwExclusive = false;
+ gxact->proc.lwWaitLink = NULL;
+ gxact->proc.waitLock = NULL;
+ gxact->proc.waitProcLock = NULL;
+ SHMQueueInit(&(gxact->proc.procLocks));
+ /* subxid data must be filled later by GXactLoadSubxactData */
+ gxact->proc.subxids.overflowed = false;
+ gxact->proc.subxids.nxids = 0;
+
+ gxact->owner = owner;
+ gxact->locking_xid = xid;
+ gxact->valid = false;
+ strcpy(gxact->gid, gid);
+
+ /* And insert it into the active array */
+ Assert(TwoPhaseState->numPrepXacts < max_prepared_xacts);
+ TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts++] = gxact;
+
+ LWLockRelease(TwoPhaseStateLock);
+
+ return gxact;
+}
+
+/*
+ * GXactLoadSubxactData
+ *
+ * If the transaction being persisted had any subtransactions, this must
+ * be called before MarkAsPrepared() to load information into the dummy
+ * PGPROC.
+ */
+static void
+GXactLoadSubxactData(GlobalTransaction gxact, int nsubxacts,
+ TransactionId *children)
+{
+ /* We need no extra lock since the GXACT isn't valid yet */
+ if (nsubxacts > PGPROC_MAX_CACHED_SUBXIDS)
+ {
+ gxact->proc.subxids.overflowed = true;
+ nsubxacts = PGPROC_MAX_CACHED_SUBXIDS;
+ }
+ if (nsubxacts > 0)
+ {
+ memcpy(gxact->proc.subxids.xids, children,
+ nsubxacts * sizeof(TransactionId));
+ gxact->proc.subxids.nxids = nsubxacts;
+ }
+}
+
+/*
+ * MarkAsPrepared
+ * Mark the GXACT as fully valid, and enter it into the global ProcArray.
+ */
+void
+MarkAsPrepared(GlobalTransaction gxact)
+{
+ /* Lock here may be overkill, but I'm not convinced of that ... */
+ LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
+ Assert(!gxact->valid);
+ gxact->valid = true;
+ LWLockRelease(TwoPhaseStateLock);
+
+ /*
+ * Put it into the global ProcArray so TransactionIdInProgress considers
+ * the XID as still running.
+ */
+ ProcArrayAdd(&gxact->proc);
+}
+
+/*
+ * LockGXact
+ * Locate the prepared transaction and mark it busy for COMMIT or PREPARE.
+ */
+static GlobalTransaction
+LockGXact(char *gid, AclId user)
+{
+ int i;
+
+ LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
+
+ for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
+ {
+ GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
+
+ /* Ignore not-yet-valid GIDs */
+ if (!gxact->valid)
+ continue;
+ if (strcmp(gxact->gid, gid) != 0)
+ continue;
+
+ /* Found it, but has someone else got it locked? */
+ if (TransactionIdIsValid(gxact->locking_xid))
+ {
+ if (TransactionIdIsActive(gxact->locking_xid))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("prepared transaction with gid \"%s\" is busy",
+ gid)));
+ gxact->locking_xid = InvalidTransactionId;
+ }
+
+ if (user != gxact->owner && !superuser_arg(user))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("permission denied to finish prepared transaction"),
+ errhint("Must be superuser or the user that prepared the transaction.")));
+
+ /* OK for me to lock it */
+ gxact->locking_xid = GetTopTransactionId();
+
+ LWLockRelease(TwoPhaseStateLock);
+
+ return gxact;
+ }
+
+ LWLockRelease(TwoPhaseStateLock);
+
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("prepared transaction with gid \"%s\" does not exist",
+ gid)));
+
+ /* NOTREACHED */
+ return NULL;
+}
+
+/*
+ * RemoveGXact
+ * Remove the prepared transaction from the shared memory array.
+ *
+ * NB: caller should have already removed it from ProcArray
+ */
+static void
+RemoveGXact(GlobalTransaction gxact)
+{
+ int i;
+
+ LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
+
+ for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
+ {
+ if (gxact == TwoPhaseState->prepXacts[i])
+ {
+ /* remove from the active array */
+ TwoPhaseState->numPrepXacts--;
+ TwoPhaseState->prepXacts[i] = TwoPhaseState->prepXacts[TwoPhaseState->numPrepXacts];
+
+ /* and put it back in the freelist */
+ gxact->proc.links.next = TwoPhaseState->freeGXacts;
+ TwoPhaseState->freeGXacts = MAKE_OFFSET(gxact);
+
+ LWLockRelease(TwoPhaseStateLock);
+
+ return;
+ }
+ }
+
+ LWLockRelease(TwoPhaseStateLock);
+
+ elog(ERROR, "failed to find %p in GlobalTransaction array", gxact);
+}
+
+/*
+ * Returns an array of all prepared transactions for the user-level
+ * function pg_prepared_xact.
+ *
+ * The returned array and all its elements are copies of internal data
+ * structures, to minimize the time we need to hold the TwoPhaseStateLock.
+ *
+ * WARNING -- we return even those transactions that are not fully prepared
+ * yet. The caller should filter them out if he doesn't want them.
+ *
+ * The returned array is palloc'd.
+ */
+static int
+GetPreparedTransactionList(GlobalTransaction *gxacts)
+{
+ GlobalTransaction array;
+ int num;
+ int i;
+
+ LWLockAcquire(TwoPhaseStateLock, LW_SHARED);
+
+ if (TwoPhaseState->numPrepXacts == 0)
+ {
+ LWLockRelease(TwoPhaseStateLock);
+
+ *gxacts = NULL;
+ return 0;
+ }
+
+ num = TwoPhaseState->numPrepXacts;
+ array = (GlobalTransaction) palloc(sizeof(GlobalTransactionData) * num);
+ *gxacts = array;
+ for (i = 0; i < num; i++)
+ memcpy(array + i, TwoPhaseState->prepXacts[i],
+ sizeof(GlobalTransactionData));
+
+ LWLockRelease(TwoPhaseStateLock);
+
+ return num;
+}
+
+
+/* Working status for pg_prepared_xact */
+typedef struct
+{
+ GlobalTransaction array;
+ int ngxacts;
+ int currIdx;
+} Working_State;
+
+/*
+ * pg_prepared_xact
+ * Produce a view with one row per prepared transaction.
+ *
+ * This function is here so we don't have to export the
+ * GlobalTransactionData struct definition.
+ */
+Datum
+pg_prepared_xact(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ Working_State *status;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ TupleDesc tupdesc;
+ MemoryContext oldcontext;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * Switch to memory context appropriate for multiple function
+ * calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* build tupdesc for result tuples */
+ /* this had better match pg_prepared_xacts view in system_views.sql */
+ tupdesc = CreateTemplateTupleDesc(4, false);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "transaction",
+ XIDOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "gid",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "ownerid",
+ INT4OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "dbid",
+ OIDOID, -1, 0);
+
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+ /*
+ * Collect all the 2PC status information that we will format and
+ * send out as a result set.
+ */
+ status = (Working_State *) palloc(sizeof(Working_State));
+ funcctx->user_fctx = (void *) status;
+
+ status->ngxacts = GetPreparedTransactionList(&status->array);
+ status->currIdx = 0;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ status = (Working_State *) funcctx->user_fctx;
+
+ while (status->array != NULL && status->currIdx < status->ngxacts)
+ {
+ GlobalTransaction gxact = &status->array[status->currIdx++];
+ Datum values[4];
+ bool nulls[4];
+ HeapTuple tuple;
+ Datum result;
+
+ if (!gxact->valid)
+ continue;
+
+ /*
+ * Form tuple with appropriate data.
+ */
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ values[0] = TransactionIdGetDatum(gxact->proc.xid);
+ values[1] = DirectFunctionCall1(textin, CStringGetDatum(gxact->gid));
+ values[2] = Int32GetDatum(gxact->owner);
+ values[3] = ObjectIdGetDatum(gxact->proc.databaseId);
+
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+ result = HeapTupleGetDatum(tuple);
+ SRF_RETURN_NEXT(funcctx, result);
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * TwoPhaseGetDummyProc
+ * Get the PGPROC that represents a prepared transaction specified by XID
+ */
+PGPROC *
+TwoPhaseGetDummyProc(TransactionId xid)
+{
+ PGPROC *result = NULL;
+ int i;
+
+ static TransactionId cached_xid = InvalidTransactionId;
+ static PGPROC *cached_proc = NULL;
+
+ /*
+ * During a recovery, COMMIT PREPARED, or ABORT PREPARED, we'll be called
+ * repeatedly for the same XID. We can save work with a simple cache.
+ */
+ if (xid == cached_xid)
+ return cached_proc;
+
+ LWLockAcquire(TwoPhaseStateLock, LW_SHARED);
+
+ for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
+ {
+ GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
+
+ if (gxact->proc.xid == xid)
+ {
+ result = &gxact->proc;
+ break;
+ }
+ }
+
+ LWLockRelease(TwoPhaseStateLock);
+
+ if (result == NULL) /* should not happen */
+ elog(ERROR, "failed to find dummy PGPROC for xid %u", xid);
+
+ cached_xid = xid;
+ cached_proc = result;
+
+ return result;
+}
+
+/************************************************************************/
+/* State file support */
+/************************************************************************/
+
+#define TwoPhaseFilePath(path, xid) \
+ snprintf(path, MAXPGPATH, "%s/%s/%08X", DataDir, TWOPHASE_DIR, xid)
+
+/*
+ * 2PC state file format:
+ *
+ * 1. TwoPhaseFileHeader
+ * 2. TransactionId[] (subtransactions)
+ * 3. RelFileNode[] (files to be deleted at commit)
+ * 4. RelFileNode[] (files to be deleted at abort)
+ * 5. TwoPhaseRecordOnDisk
+ * 6. ...
+ * 7. TwoPhaseRecordOnDisk (end sentinel, rmid == TWOPHASE_RM_END_ID)
+ * 8. CRC32
+ *
+ * Each segment except the final CRC32 is MAXALIGN'd.
+ */
+
+/*
+ * Header for a 2PC state file
+ */
+#define TWOPHASE_MAGIC 0x57F94530 /* format identifier */
+
+typedef struct TwoPhaseFileHeader
+{
+ uint32 magic; /* format identifier */
+ uint32 total_len; /* actual file length */
+ TransactionId xid; /* original transaction XID */
+ Oid database; /* OID of database it was in */
+ AclId owner; /* user running the transaction */
+ int32 nsubxacts; /* number of following subxact XIDs */
+ int32 ncommitrels; /* number of delete-on-commit rels */
+ int32 nabortrels; /* number of delete-on-abort rels */
+ char gid[GIDSIZE]; /* GID for transaction */
+} TwoPhaseFileHeader;
+
+/*
+ * Header for each record in a state file
+ *
+ * NOTE: len counts only the rmgr data, not the TwoPhaseRecordOnDisk header.
+ * The rmgr data will be stored starting on a MAXALIGN boundary.
+ */
+typedef struct TwoPhaseRecordOnDisk
+{
+ uint32 len; /* length of rmgr data */
+ TwoPhaseRmgrId rmid; /* resource manager for this record */
+ uint16 info; /* flag bits for use by rmgr */
+} TwoPhaseRecordOnDisk;
+
+/*
+ * During prepare, the state file is assembled in memory before writing it
+ * to WAL and the actual state file. We use a chain of XLogRecData blocks
+ * so that we will be able to pass the state file contents directly to
+ * XLogInsert.
+ */
+static struct xllist
+{
+ XLogRecData *head; /* first data block in the chain */
+ XLogRecData *tail; /* last block in chain */
+ uint32 bytes_free; /* free bytes left in tail block */
+ uint32 total_len; /* total data bytes in chain */
+} records;
+
+
+/*
+ * Append a block of data to records data structure.
+ *
+ * NB: each block is padded to a MAXALIGN multiple. This must be
+ * accounted for when the file is later read!
+ *
+ * The data is copied, so the caller is free to modify it afterwards.
+ */
+static void
+save_state_data(const void *data, uint32 len)
+{
+ uint32 padlen = MAXALIGN(len);
+
+ if (padlen > records.bytes_free)
+ {
+ records.tail->next = palloc0(sizeof(XLogRecData));
+ records.tail = records.tail->next;
+ records.tail->buffer = InvalidBuffer;
+ records.tail->len = 0;
+ records.tail->next = NULL;
+
+ records.bytes_free = Max(padlen, 512);
+ records.tail->data = palloc(records.bytes_free);
+ }
+
+ memcpy(((char *) records.tail->data) + records.tail->len, data, len);
+ records.tail->len += padlen;
+ records.bytes_free -= padlen;
+ records.total_len += padlen;
+}
+
+/*
+ * Start preparing a state file.
+ *
+ * Initializes data structure and inserts the 2PC file header record.
+ */
+void
+StartPrepare(GlobalTransaction gxact)
+{
+ TransactionId xid = gxact->proc.xid;
+ TwoPhaseFileHeader hdr;
+ TransactionId *children;
+ RelFileNode *commitrels;
+ RelFileNode *abortrels;
+
+ /* Initialize linked list */
+ records.head = palloc0(sizeof(XLogRecData));
+ records.head->buffer = InvalidBuffer;
+ records.head->len = 0;
+ records.head->next = NULL;
+
+ records.bytes_free = Max(sizeof(TwoPhaseFileHeader), 512);
+ records.head->data = palloc(records.bytes_free);
+
+ records.tail = records.head;
+
+ records.total_len = 0;
+
+ /* Create header */
+ hdr.magic = TWOPHASE_MAGIC;
+ hdr.total_len = 0; /* EndPrepare will fill this in */
+ hdr.xid = xid;
+ hdr.database = MyDatabaseId;
+ hdr.owner = GetUserId();
+ hdr.nsubxacts = xactGetCommittedChildren(&children);
+ hdr.ncommitrels = smgrGetPendingDeletes(true, &commitrels);
+ hdr.nabortrels = smgrGetPendingDeletes(false, &abortrels);
+ StrNCpy(hdr.gid, gxact->gid, GIDSIZE);
+
+ save_state_data(&hdr, sizeof(TwoPhaseFileHeader));
+
+ /* Add the additional info about subxacts and deletable files */
+ if (hdr.nsubxacts > 0)
+ {
+ save_state_data(children, hdr.nsubxacts * sizeof(TransactionId));
+ /* While we have the child-xact data, stuff it in the gxact too */
+ GXactLoadSubxactData(gxact, hdr.nsubxacts, children);
+ pfree(children);
+ }
+ if (hdr.ncommitrels > 0)
+ {
+ save_state_data(commitrels, hdr.ncommitrels * sizeof(RelFileNode));
+ pfree(commitrels);
+ }
+ if (hdr.nabortrels > 0)
+ {
+ save_state_data(abortrels, hdr.nabortrels * sizeof(RelFileNode));
+ pfree(abortrels);
+ }
+}
+
+/*
+ * Finish preparing state file.
+ *
+ * Calculates CRC and writes state file to WAL and in pg_twophase directory.
+ */
+void
+EndPrepare(GlobalTransaction gxact)
+{
+ TransactionId xid = gxact->proc.xid;
+ TwoPhaseFileHeader *hdr;
+ char path[MAXPGPATH];
+ XLogRecData *record;
+ XLogRecPtr recptr;
+ pg_crc32 statefile_crc;
+ pg_crc32 bogus_crc;
+ int fd;
+
+ /* Add the end sentinel to the list of 2PC records */
+ RegisterTwoPhaseRecord(TWOPHASE_RM_END_ID, 0,
+ NULL, 0);
+
+ /* Go back and fill in total_len in the file header record */
+ hdr = (TwoPhaseFileHeader *) records.head->data;
+ Assert(hdr->magic == TWOPHASE_MAGIC);
+ hdr->total_len = records.total_len + sizeof(pg_crc32);
+
+ /*
+ * Create the 2PC state file.
+ *
+ * Note: because we use BasicOpenFile(), we are responsible for ensuring
+ * the FD gets closed in any error exit path. Once we get into the
+ * critical section, though, it doesn't matter since any failure causes
+ * PANIC anyway.
+ */
+ TwoPhaseFilePath(path, xid);
+
+ fd = BasicOpenFile(path,
+ O_CREAT | O_EXCL | O_WRONLY | PG_BINARY,
+ S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create twophase state file \"%s\": %m",
+ path)));
+
+ /* Write data to file, and calculate CRC as we pass over it */
+ INIT_CRC32(statefile_crc);
+
+ for (record = records.head; record != NULL; record = record->next)
+ {
+ COMP_CRC32(statefile_crc, record->data, record->len);
+ if ((write(fd, record->data, record->len)) != record->len)
+ {
+ close(fd);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write twophase state file: %m")));
+ }
+ }
+
+ FIN_CRC32(statefile_crc);
+
+ /*
+ * Write a deliberately bogus CRC to the state file, and flush it to disk.
+ * This is to minimize the odds of failure within the critical section
+ * below --- in particular, running out of disk space.
+ *
+ * On most filesystems, write() rather than fsync() detects out-of-space,
+ * so the fsync might be considered optional. Using it means there
+ * are three fsyncs not two associated with preparing a transaction; is
+ * the risk of an error from fsync high enough to justify that?
+ */
+ bogus_crc = ~ statefile_crc;
+
+ if ((write(fd, &bogus_crc, sizeof(pg_crc32))) != sizeof(pg_crc32))
+ {
+ close(fd);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write twophase state file: %m")));
+ }
+
+ if (pg_fsync(fd) != 0)
+ {
+ close(fd);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not fsync twophase state file: %m")));
+ }
+
+ /* Back up to prepare for rewriting the CRC */
+ if (lseek(fd, -((off_t) sizeof(pg_crc32)), SEEK_CUR) < 0)
+ {
+ close(fd);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not seek twophase state file: %m")));
+ }
+
+ /*
+ * The state file isn't valid yet, because we haven't written the correct
+ * CRC yet. Before we do that, insert entry in WAL and flush it to disk.
+ *
+ * Between the time we have written the WAL entry and the time we
+ * flush the correct state file CRC to disk, we have an inconsistency:
+ * the xact is prepared according to WAL but not according to our on-disk
+ * state. We use a critical section to force a PANIC if we are unable to
+ * complete the flush --- then, WAL replay should repair the
+ * inconsistency.
+ *
+ * We have to lock out checkpoint start here, too; otherwise a checkpoint
+ * starting immediately after the WAL record is inserted could complete
+ * before we've finished flushing, meaning that the WAL record would not
+ * get replayed if a crash follows.
+ */
+ START_CRIT_SECTION();
+
+ LWLockAcquire(CheckpointStartLock, LW_SHARED);
+
+ recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE, records.head);
+ XLogFlush(recptr);
+
+ /* If we crash now, we have prepared: WAL replay will fix things */
+
+ /* write correct CRC, flush, and close file */
+ if ((write(fd, &statefile_crc, sizeof(pg_crc32))) != sizeof(pg_crc32))
+ {
+ close(fd);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write twophase state file: %m")));
+ }
+
+ if (pg_fsync(fd) != 0)
+ {
+ close(fd);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not fsync twophase state file: %m")));
+ }
+
+ if (close(fd) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not close twophase state file: %m")));
+
+ LWLockRelease(CheckpointStartLock);
+
+ END_CRIT_SECTION();
+
+ records.tail = records.head = NULL;
+}
+
+/*
+ * Register a 2PC record to be written to state file.
+ */
+void
+RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info,
+ const void *data, uint32 len)
+{
+ TwoPhaseRecordOnDisk record;
+
+ record.rmid = rmid;
+ record.info = info;
+ record.len = len;
+ save_state_data(&record, sizeof(TwoPhaseRecordOnDisk));
+ if (len > 0)
+ save_state_data(data, len);
+}
+
+
+/*
+ * Read and validate the state file for xid.
+ *
+ * If it looks OK (has a valid magic number and CRC), return the palloc'd
+ * contents of the file. Otherwise return NULL.
+ */
+static char *
+ReadTwoPhaseFile(TransactionId xid)
+{
+ char path[MAXPGPATH];
+ char *buf;
+ TwoPhaseFileHeader *hdr;
+ int fd;
+ struct stat stat;
+ uint32 crc_offset;
+ pg_crc32 calc_crc, file_crc;
+
+ TwoPhaseFilePath(path, xid);
+
+ fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
+ if (fd < 0)
+ {
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("could not open twophase state file \"%s\": %m",
+ path)));
+ return NULL;
+ }
+
+ /*
+ * Check file length. We can determine a lower bound pretty easily.
+ * We set an upper bound mainly to avoid palloc() failure on a corrupt
+ * file.
+ */
+ if (fstat(fd, &stat))
+ {
+ close(fd);
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("could not stat twophase state file \"%s\": %m",
+ path)));
+ return NULL;
+ }
+
+ if (stat.st_size < (MAXALIGN(sizeof(TwoPhaseFileHeader)) +
+ MAXALIGN(sizeof(TwoPhaseRecordOnDisk)) +
+ sizeof(pg_crc32)) ||
+ stat.st_size > 10000000)
+ {
+ close(fd);
+ return NULL;
+ }
+
+ crc_offset = stat.st_size - sizeof(pg_crc32);
+ if (crc_offset != MAXALIGN(crc_offset))
+ {
+ close(fd);
+ return NULL;
+ }
+
+ /*
+ * OK, slurp in the file.
+ */
+ buf = (char *) palloc(stat.st_size);
+
+ if (read(fd, buf, stat.st_size) != stat.st_size)
+ {
+ close(fd);
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("could not read twophase state file \"%s\": %m",
+ path)));
+ pfree(buf);
+ return NULL;
+ }
+
+ close(fd);
+
+ hdr = (TwoPhaseFileHeader *) buf;
+ if (hdr->magic != TWOPHASE_MAGIC || hdr->total_len != stat.st_size)
+ {
+ pfree(buf);
+ return NULL;
+ }
+
+ INIT_CRC32(calc_crc);
+ COMP_CRC32(calc_crc, buf, crc_offset);
+ FIN_CRC32(calc_crc);
+
+ file_crc = *((pg_crc32 *) (buf + crc_offset));
+
+ if (!EQ_CRC32(calc_crc, file_crc))
+ {
+ pfree(buf);
+ return NULL;
+ }
+
+ return buf;
+}
+
+
+/*
+ * FinishPreparedTransaction: execute COMMIT PREPARED or ROLLBACK PREPARED
+ */
+void
+FinishPreparedTransaction(char *gid, bool isCommit)
+{
+ GlobalTransaction gxact;
+ TransactionId xid;
+ char *buf;
+ char *bufptr;
+ TwoPhaseFileHeader *hdr;
+ TransactionId *children;
+ RelFileNode *commitrels;
+ RelFileNode *abortrels;
+ int i;
+
+ /*
+ * Validate the GID, and lock the GXACT to ensure that two backends
+ * do not try to commit the same GID at once.
+ */
+ gxact = LockGXact(gid, GetUserId());
+ xid = gxact->proc.xid;
+
+ /*
+ * Read and validate the state file
+ */
+ buf = ReadTwoPhaseFile(xid);
+ if (buf == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("twophase state file for transaction %u is corrupt",
+ xid)));
+
+ /*
+ * Disassemble the header area
+ */
+ hdr = (TwoPhaseFileHeader *) buf;
+ Assert(TransactionIdEquals(hdr->xid, xid));
+ bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
+ children = (TransactionId *) bufptr;
+ bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
+ commitrels = (RelFileNode *) bufptr;
+ bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode));
+ abortrels = (RelFileNode *) bufptr;
+ bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));
+
+ /*
+ * The order of operations here is critical: make the XLOG entry for
+ * commit or abort, then mark the transaction committed or aborted in
+ * pg_clog, then remove its PGPROC from the global ProcArray (which
+ * means TransactionIdIsInProgress will stop saying the prepared xact
+ * is in progress), then run the post-commit or post-abort callbacks.
+ * The callbacks will release the locks the transaction held.
+ */
+ if (isCommit)
+ RecordTransactionCommitPrepared(xid,
+ hdr->nsubxacts, children,
+ hdr->ncommitrels, commitrels);
+ else
+ RecordTransactionAbortPrepared(xid,
+ hdr->nsubxacts, children,
+ hdr->nabortrels, abortrels);
+
+ ProcArrayRemove(&gxact->proc);
+
+ /*
+ * In case we fail while running the callbacks, mark the gxact invalid
+ * so no one else will try to commit/rollback, and so it can be recycled
+ * properly later. It is still locked by our XID so it won't go away yet.
+ */
+ gxact->valid = false;
+
+ if (isCommit)
+ ProcessRecords(bufptr, xid, twophase_postcommit_callbacks);
+ else
+ ProcessRecords(bufptr, xid, twophase_postabort_callbacks);
+
+ /*
+ * We also have to remove any files that were supposed to be dropped.
+ * NB: this code knows that we couldn't be dropping any temp rels ...
+ */
+ if (isCommit)
+ {
+ for (i = 0; i < hdr->ncommitrels; i++)
+ smgrdounlink(smgropen(commitrels[i]), false, false);
+ }
+ else
+ {
+ for (i = 0; i < hdr->nabortrels; i++)
+ smgrdounlink(smgropen(abortrels[i]), false, false);
+ }
+
+ pgstat_count_xact_commit();
+
+ /*
+ * And now we can clean up our mess.
+ */
+ RemoveTwoPhaseFile(xid, true);
+
+ RemoveGXact(gxact);
+
+ pfree(buf);
+}
+
+/*
+ * Scan a 2PC state file (already read into memory by ReadTwoPhaseFile)
+ * and call the indicated callbacks for each 2PC record.
+ */
+static void
+ProcessRecords(char *bufptr, TransactionId xid,
+ const TwoPhaseCallback callbacks[])
+{
+ for (;;)
+ {
+ TwoPhaseRecordOnDisk *record = (TwoPhaseRecordOnDisk *) bufptr;
+
+ Assert(record->rmid <= TWOPHASE_RM_MAX_ID);
+ if (record->rmid == TWOPHASE_RM_END_ID)
+ break;
+
+ bufptr += MAXALIGN(sizeof(TwoPhaseRecordOnDisk));
+
+ if (callbacks[record->rmid] != NULL)
+ callbacks[record->rmid](xid, record->info,
+ (void *) bufptr, record->len);
+
+ bufptr += MAXALIGN(record->len);
+ }
+}
+
+/*
+ * Remove the 2PC file for the specified XID.
+ *
+ * If giveWarning is false, do not complain about file-not-present;
+ * this is an expected case during WAL replay.
+ */
+void
+RemoveTwoPhaseFile(TransactionId xid, bool giveWarning)
+{
+ char path[MAXPGPATH];
+
+ TwoPhaseFilePath(path, xid);
+ if (unlink(path))
+ if (errno != ENOENT || giveWarning)
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("could not remove two-phase state file \"%s\": %m",
+ path)));
+}
+
+/*
+ * Recreates a state file. This is used in WAL replay.
+ *
+ * Note: content and len don't include CRC.
+ */
+void
+RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
+{
+ char path[MAXPGPATH];
+ pg_crc32 statefile_crc;
+ int fd;
+
+ /* Recompute CRC */
+ INIT_CRC32(statefile_crc);
+ COMP_CRC32(statefile_crc, content, len);
+ FIN_CRC32(statefile_crc);
+
+ TwoPhaseFilePath(path, xid);
+
+ fd = BasicOpenFile(path,
+ O_CREAT | O_TRUNC | O_WRONLY | PG_BINARY,
+ S_IRUSR | S_IWUSR);
+ if (fd < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not recreate twophase state file \"%s\": %m",
+ path)));
+
+ /* Write content and CRC */
+ if (write(fd, content, len) != len)
+ {
+ close(fd);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write twophase state file: %m")));
+ }
+ if (write(fd, &statefile_crc, sizeof(pg_crc32)) != sizeof(pg_crc32))
+ {
+ close(fd);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write twophase state file: %m")));
+ }
+
+ /* Sync and close the file */
+ if (pg_fsync(fd) != 0)
+ {
+ close(fd);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not fsync twophase state file: %m")));
+ }
+
+ if (close(fd) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not close twophase state file: %m")));
+}
+
+/*
+ * PrescanPreparedTransactions
+ *
+ * Scan the pg_twophase directory and determine the range of valid XIDs
+ * present. This is run during database startup, after we have completed
+ * reading WAL. ShmemVariableCache->nextXid has been set to one more than
+ * the highest XID for which evidence exists in WAL.
+ *
+ * We throw away any prepared xacts with main XID beyond nextXid --- if any
+ * are present, it suggests that the DBA has done a PITR recovery to an
+ * earlier point in time without cleaning out pg_twophase. We dare not
+ * try to recover such prepared xacts since they likely depend on database
+ * state that doesn't exist now.
+ *
+ * However, we will advance nextXid beyond any subxact XIDs belonging to
+ * valid prepared xacts. We need to do this since subxact commit doesn't
+ * write a WAL entry, and so there might be no evidence in WAL of those
+ * subxact XIDs.
+ *
+ * Our other responsibility is to determine and return the oldest valid XID
+ * among the prepared xacts (if none, return ShmemVariableCache->nextXid).
+ * This is needed to synchronize pg_subtrans startup properly.
+ */
+TransactionId
+PrescanPreparedTransactions(void)
+{
+ TransactionId origNextXid = ShmemVariableCache->nextXid;
+ TransactionId result = origNextXid;
+ char dir[MAXPGPATH];
+ DIR *cldir;
+ struct dirent *clde;
+
+ snprintf(dir, MAXPGPATH, "%s/%s", DataDir, TWOPHASE_DIR);
+
+ cldir = AllocateDir(dir);
+ if (cldir == NULL)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open directory \"%s\": %m", dir)));
+
+ errno = 0;
+ while ((clde = readdir(cldir)) != NULL)
+ {
+ if (strlen(clde->d_name) == 8 &&
+ strspn(clde->d_name, "0123456789ABCDEF") == 8)
+ {
+ TransactionId xid;
+ char *buf;
+ TwoPhaseFileHeader *hdr;
+ TransactionId *subxids;
+ int i;
+
+ xid = (TransactionId) strtoul(clde->d_name, NULL, 16);
+
+ /* Reject XID if too new */
+ if (TransactionIdFollowsOrEquals(xid, origNextXid))
+ {
+ ereport(WARNING,
+ (errmsg("removing future twophase state file \"%s\"",
+ clde->d_name)));
+ RemoveTwoPhaseFile(xid, true);
+ errno = 0;
+ continue;
+ }
+
+ /*
+ * Note: we can't check if already processed because clog
+ * subsystem isn't up yet.
+ */
+
+ /* Read and validate file */
+ buf = ReadTwoPhaseFile(xid);
+ if (buf == NULL)
+ {
+ ereport(WARNING,
+ (errmsg("removing corrupt twophase state file \"%s\"",
+ clde->d_name)));
+ RemoveTwoPhaseFile(xid, true);
+ errno = 0;
+ continue;
+ }
+
+ /* Deconstruct header */
+ hdr = (TwoPhaseFileHeader *) buf;
+ if (!TransactionIdEquals(hdr->xid, xid))
+ {
+ ereport(WARNING,
+ (errmsg("removing corrupt twophase state file \"%s\"",
+ clde->d_name)));
+ RemoveTwoPhaseFile(xid, true);
+ pfree(buf);
+ errno = 0;
+ continue;
+ }
+
+ /*
+ * OK, we think this file is valid. Incorporate xid into the
+ * running-minimum result.
+ */
+ if (TransactionIdPrecedes(xid, result))
+ result = xid;
+
+ /*
+ * Examine subtransaction XIDs ... they should all follow main
+ * XID, and they may force us to advance nextXid.
+ */
+ subxids = (TransactionId *)
+ (buf + MAXALIGN(sizeof(TwoPhaseFileHeader)));
+ for (i = 0; i < hdr->nsubxacts; i++)
+ {
+ TransactionId subxid = subxids[i];
+
+ Assert(TransactionIdFollows(subxid, xid));
+ if (TransactionIdFollowsOrEquals(subxid,
+ ShmemVariableCache->nextXid))
+ {
+ ShmemVariableCache->nextXid = subxid;
+ TransactionIdAdvance(ShmemVariableCache->nextXid);
+ }
+ }
+
+ pfree(buf);
+ }
+ errno = 0;
+ }
+#ifdef WIN32
+
+ /*
+ * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but
+ * not in released version
+ */
+ if (GetLastError() == ERROR_NO_MORE_FILES)
+ errno = 0;
+#endif
+ if (errno)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read directory \"%s\": %m", dir)));
+
+ FreeDir(cldir);
+
+ return result;
+}
+
+/*
+ * RecoverPreparedTransactions
+ *
+ * Scan the pg_twophase directory and reload shared-memory state for each
+ * prepared transaction (reacquire locks, etc). This is run during database
+ * startup.
+ */
+void
+RecoverPreparedTransactions(void)
+{
+ char dir[MAXPGPATH];
+ DIR *cldir;
+ struct dirent *clde;
+
+ snprintf(dir, MAXPGPATH, "%s/%s", DataDir, TWOPHASE_DIR);
+
+ cldir = AllocateDir(dir);
+ if (cldir == NULL)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open directory \"%s\": %m", dir)));
+
+ errno = 0;
+ while ((clde = readdir(cldir)) != NULL)
+ {
+ if (strlen(clde->d_name) == 8 &&
+ strspn(clde->d_name, "0123456789ABCDEF") == 8)
+ {
+ TransactionId xid;
+ char *buf;
+ char *bufptr;
+ TwoPhaseFileHeader *hdr;
+ TransactionId *subxids;
+ GlobalTransaction gxact;
+ int i;
+
+ xid = (TransactionId) strtoul(clde->d_name, NULL, 16);
+
+ /* Already processed? */
+ if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
+ {
+ ereport(WARNING,
+ (errmsg("removing stale twophase state file \"%s\"",
+ clde->d_name)));
+ RemoveTwoPhaseFile(xid, true);
+ errno = 0;
+ continue;
+ }
+
+ /* Read and validate file */
+ buf = ReadTwoPhaseFile(xid);
+ if (buf == NULL)
+ {
+ ereport(WARNING,
+ (errmsg("removing corrupt twophase state file \"%s\"",
+ clde->d_name)));
+ RemoveTwoPhaseFile(xid, true);
+ errno = 0;
+ continue;
+ }
+
+ ereport(LOG,
+ (errmsg("recovering prepared transaction %u", xid)));
+
+ /* Deconstruct header */
+ hdr = (TwoPhaseFileHeader *) buf;
+ Assert(TransactionIdEquals(hdr->xid, xid));
+ bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
+ subxids = (TransactionId *) bufptr;
+ bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
+ bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode));
+ bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));
+
+ /*
+ * Reconstruct subtrans state for the transaction --- needed
+ * because pg_subtrans is not preserved over a restart
+ */
+ for (i = 0; i < hdr->nsubxacts; i++)
+ SubTransSetParent(subxids[i], xid);
+
+ /*
+ * Recreate its GXACT and dummy PGPROC
+ */
+ gxact = MarkAsPreparing(xid, hdr->database, hdr->gid, hdr->owner);
+ GXactLoadSubxactData(gxact, hdr->nsubxacts, subxids);
+ MarkAsPrepared(gxact);
+
+ /*
+ * Recover other state (notably locks) using resource managers
+ */
+ ProcessRecords(bufptr, xid, twophase_recover_callbacks);
+
+ pfree(buf);
+ }
+ errno = 0;
+ }
+#ifdef WIN32
+
+ /*
+ * This fix is in mingw cvs (runtime/mingwex/dirent.c rev 1.4), but
+ * not in released version
+ */
+ if (GetLastError() == ERROR_NO_MORE_FILES)
+ errno = 0;
+#endif
+ if (errno)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read directory \"%s\": %m", dir)));
+
+ FreeDir(cldir);
+}
+
+/*
+ * RecordTransactionCommitPrepared
+ *
+ * This is basically the same as RecordTransactionCommit: in particular,
+ * we must take the CheckpointStartLock to avoid a race condition.
+ *
+ * We know the transaction made at least one XLOG entry (its PREPARE),
+ * so it is never possible to optimize out the commit record.
+ */
+static void
+RecordTransactionCommitPrepared(TransactionId xid,
+ int nchildren,
+ TransactionId *children,
+ int nrels,
+ RelFileNode *rels)
+{
+ XLogRecData rdata[3];
+ int lastrdata = 0;
+ xl_xact_commit_prepared xlrec;
+ XLogRecPtr recptr;
+
+ START_CRIT_SECTION();
+
+ /* See notes in RecordTransactionCommit */
+ LWLockAcquire(CheckpointStartLock, LW_SHARED);
+
+ /* Emit the XLOG commit record */
+ xlrec.xid = xid;
+ xlrec.crec.xtime = time(NULL);
+ xlrec.crec.nrels = nrels;
+ xlrec.crec.nsubxacts = nchildren;
+ rdata[0].data = (char *) (&xlrec);
+ rdata[0].len = MinSizeOfXactCommitPrepared;
+ rdata[0].buffer = InvalidBuffer;
+ /* dump rels to delete */
+ if (nrels > 0)
+ {
+ rdata[0].next = &(rdata[1]);
+ rdata[1].data = (char *) rels;
+ rdata[1].len = nrels * sizeof(RelFileNode);
+ rdata[1].buffer = InvalidBuffer;
+ lastrdata = 1;
+ }
+ /* dump committed child Xids */
+ if (nchildren > 0)
+ {
+ rdata[lastrdata].next = &(rdata[2]);
+ rdata[2].data = (char *) children;
+ rdata[2].len = nchildren * sizeof(TransactionId);
+ rdata[2].buffer = InvalidBuffer;
+ lastrdata = 2;
+ }
+ rdata[lastrdata].next = NULL;
+
+ recptr = XLogInsert(RM_XACT_ID,
+ XLOG_XACT_COMMIT_PREPARED | XLOG_NO_TRAN,
+ rdata);
+
+ /* we don't currently try to sleep before flush here ... */
+
+ /* Flush XLOG to disk */
+ XLogFlush(recptr);
+
+ /* Mark the transaction committed in pg_clog */
+ TransactionIdCommit(xid);
+ /* to avoid race conditions, the parent must commit first */
+ TransactionIdCommitTree(nchildren, children);
+
+ /* Checkpoint is allowed again */
+ LWLockRelease(CheckpointStartLock);
+
+ END_CRIT_SECTION();
+}
+
+/*
+ * RecordTransactionAbortPrepared
+ *
+ * This is basically the same as RecordTransactionAbort.
+ *
+ * We know the transaction made at least one XLOG entry (its PREPARE),
+ * so it is never possible to optimize out the abort record.
+ */
+static void
+RecordTransactionAbortPrepared(TransactionId xid,
+ int nchildren,
+ TransactionId *children,
+ int nrels,
+ RelFileNode *rels)
+{
+ XLogRecData rdata[3];
+ int lastrdata = 0;
+ xl_xact_abort_prepared xlrec;
+ XLogRecPtr recptr;
+
+ /*
+ * Catch the scenario where we aborted partway through
+ * RecordTransactionCommitPrepared ...
+ */
+ if (TransactionIdDidCommit(xid))
+ elog(PANIC, "cannot abort transaction %u, it was already committed",
+ xid);
+
+ START_CRIT_SECTION();
+
+ /* Emit the XLOG abort record */
+ xlrec.xid = xid;
+ xlrec.arec.xtime = time(NULL);
+ xlrec.arec.nrels = nrels;
+ xlrec.arec.nsubxacts = nchildren;
+ rdata[0].data = (char *) (&xlrec);
+ rdata[0].len = MinSizeOfXactAbortPrepared;
+ rdata[0].buffer = InvalidBuffer;
+ /* dump rels to delete */
+ if (nrels > 0)
+ {
+ rdata[0].next = &(rdata[1]);
+ rdata[1].data = (char *) rels;
+ rdata[1].len = nrels * sizeof(RelFileNode);
+ rdata[1].buffer = InvalidBuffer;
+ lastrdata = 1;
+ }
+ /* dump committed child Xids */
+ if (nchildren > 0)
+ {
+ rdata[lastrdata].next = &(rdata[2]);
+ rdata[2].data = (char *) children;
+ rdata[2].len = nchildren * sizeof(TransactionId);
+ rdata[2].buffer = InvalidBuffer;
+ lastrdata = 2;
+ }
+ rdata[lastrdata].next = NULL;
+
+ recptr = XLogInsert(RM_XACT_ID,
+ XLOG_XACT_ABORT_PREPARED | XLOG_NO_TRAN,
+ rdata);
+
+ /* Always flush, since we're about to remove the 2PC state file */
+ XLogFlush(recptr);
+
+ /*
+ * Mark the transaction aborted in clog. This is not absolutely
+ * necessary but we may as well do it while we are here.
+ */
+ TransactionIdAbort(xid);
+ TransactionIdAbortTree(nchildren, children);
+
+ END_CRIT_SECTION();
+}
+
diff --git a/src/backend/access/transam/twophase_rmgr.c b/src/backend/access/transam/twophase_rmgr.c
new file mode 100644
index 00000000000..e78f8b2fbb3
--- /dev/null
+++ b/src/backend/access/transam/twophase_rmgr.c
@@ -0,0 +1,49 @@
+/*-------------------------------------------------------------------------
+ *
+ * twophase_rmgr.c
+ * Two-phase-commit resource managers tables
+ *
+ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/access/transam/twophase_rmgr.c,v 1.1 2005/06/17 22:32:42 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/twophase_rmgr.h"
+#include "commands/async.h"
+#include "storage/lock.h"
+#include "utils/flatfiles.h"
+#include "utils/inval.h"
+
+
+const TwoPhaseCallback twophase_recover_callbacks[TWOPHASE_RM_MAX_ID + 1] =
+{
+ NULL, /* END ID */
+ lock_twophase_recover, /* Lock */
+ NULL, /* Inval */
+ NULL, /* flat file update */
+ NULL /* notify/listen */
+};
+
+const TwoPhaseCallback twophase_postcommit_callbacks[TWOPHASE_RM_MAX_ID + 1] =
+{
+ NULL, /* END ID */
+ lock_twophase_postcommit, /* Lock */
+ inval_twophase_postcommit, /* Inval */
+ flatfile_twophase_postcommit, /* flat file update */
+ notify_twophase_postcommit /* notify/listen */
+};
+
+const TwoPhaseCallback twophase_postabort_callbacks[TWOPHASE_RM_MAX_ID + 1] =
+{
+ NULL, /* END ID */
+ lock_twophase_postabort, /* Lock */
+ NULL, /* Inval */
+ NULL, /* flat file update */
+ NULL /* notify/listen */
+};
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c
index 040a4ab0b79..74163b7f576 100644
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.204 2005/06/06 20:22:57 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.205 2005/06/17 22:32:42 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -22,6 +22,7 @@
#include "access/multixact.h"
#include "access/subtrans.h"
+#include "access/twophase.h"
#include "access/xact.h"
#include "catalog/heap.h"
#include "catalog/index.h"
@@ -68,7 +69,8 @@ typedef enum TransState
TRANS_START,
TRANS_INPROGRESS,
TRANS_COMMIT,
- TRANS_ABORT
+ TRANS_ABORT,
+ TRANS_PREPARE
} TransState;
/*
@@ -90,6 +92,7 @@ typedef enum TBlockState
TBLOCK_ABORT, /* failed xact, awaiting ROLLBACK */
TBLOCK_ABORT_END, /* failed xact, ROLLBACK received */
TBLOCK_ABORT_PENDING, /* live xact, ROLLBACK received */
+ TBLOCK_PREPARE, /* live xact, PREPARE received */
/* subtransaction states */
TBLOCK_SUBBEGIN, /* starting a subtransaction */
@@ -172,6 +175,12 @@ static CommandId currentCommandId;
static AbsoluteTime xactStartTime; /* integer part */
static int xactStartTimeUsec; /* microsecond part */
+/*
+ * GID to be used for preparing the current transaction. This is also
+ * global to a whole transaction, so we don't keep it in the state stack.
+ */
+static char *prepareGID;
+
/*
* List of add-on start- and end-of-xact callbacks
@@ -267,10 +276,12 @@ IsTransactionState(void)
return true;
case TRANS_ABORT:
return true;
+ case TRANS_PREPARE:
+ return true;
}
/*
- * Shouldn't get here, but lint is not happy with this...
+ * Shouldn't get here, but lint is not happy without this...
*/
return false;
}
@@ -660,12 +671,12 @@ void
RecordTransactionCommit(void)
{
int nrels;
- RelFileNode *rptr;
+ RelFileNode *rels;
int nchildren;
TransactionId *children;
/* Get data needed for commit record */
- nrels = smgrGetPendingDeletes(true, &rptr);
+ nrels = smgrGetPendingDeletes(true, &rels);
nchildren = xactGetCommittedChildren(&children);
/*
@@ -726,7 +737,7 @@ RecordTransactionCommit(void)
if (nrels > 0)
{
rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) rptr;
+ rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
@@ -809,12 +820,9 @@ RecordTransactionCommit(void)
MyXactMadeXLogEntry = false;
MyXactMadeTempRelUpdate = false;
- /* Show myself as out of the transaction in PGPROC array */
- MyProc->logRec.xrecoff = 0;
-
/* And clean up local data */
- if (rptr)
- pfree(rptr);
+ if (rels)
+ pfree(rels);
if (children)
pfree(children);
}
@@ -970,12 +978,12 @@ static void
RecordTransactionAbort(void)
{
int nrels;
- RelFileNode *rptr;
+ RelFileNode *rels;
int nchildren;
TransactionId *children;
/* Get data needed for abort record */
- nrels = smgrGetPendingDeletes(false, &rptr);
+ nrels = smgrGetPendingDeletes(false, &rels);
nchildren = xactGetCommittedChildren(&children);
/*
@@ -1026,7 +1034,7 @@ RecordTransactionAbort(void)
if (nrels > 0)
{
rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) rptr;
+ rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
@@ -1069,12 +1077,9 @@ RecordTransactionAbort(void)
MyXactMadeXLogEntry = false;
MyXactMadeTempRelUpdate = false;
- /* Show myself as out of the transaction in PGPROC array */
- MyProc->logRec.xrecoff = 0;
-
/* And clean up local data */
- if (rptr)
- pfree(rptr);
+ if (rels)
+ pfree(rels);
if (children)
pfree(children);
}
@@ -1166,13 +1171,13 @@ static void
RecordSubTransactionAbort(void)
{
int nrels;
- RelFileNode *rptr;
+ RelFileNode *rels;
TransactionId xid = GetCurrentTransactionId();
int nchildren;
TransactionId *children;
/* Get data needed for abort record */
- nrels = smgrGetPendingDeletes(false, &rptr);
+ nrels = smgrGetPendingDeletes(false, &rels);
nchildren = xactGetCommittedChildren(&children);
/*
@@ -1212,7 +1217,7 @@ RecordSubTransactionAbort(void)
if (nrels > 0)
{
rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) rptr;
+ rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
@@ -1256,8 +1261,8 @@ RecordSubTransactionAbort(void)
XidCacheRemoveRunningXids(xid, nchildren, children);
/* And clean up local data */
- if (rptr)
- pfree(rptr);
+ if (rels)
+ pfree(rels);
if (children)
pfree(children);
}
@@ -1419,8 +1424,11 @@ StartTransaction(void)
ShowTransactionState("StartTransaction");
}
+
/*
* CommitTransaction
+ *
+ * NB: if you change this routine, better look at PrepareTransaction too!
*/
static void
CommitTransaction(void)
@@ -1510,6 +1518,8 @@ CommitTransaction(void)
* xid 0 as running as well, or it will be able to see two tuple versions
* - one deleted by xid 1 and one inserted by xid 0. See notes in
* GetSnapshotData.
+ *
+ * Note: MyProc may be null during bootstrap.
*----------
*/
if (MyProc != NULL)
@@ -1608,6 +1618,225 @@ CommitTransaction(void)
RESUME_INTERRUPTS();
}
+
+/*
+ * PrepareTransaction
+ *
+ * NB: if you change this routine, better look at CommitTransaction too!
+ */
+static void
+PrepareTransaction(void)
+{
+ TransactionState s = CurrentTransactionState;
+ TransactionId xid = GetCurrentTransactionId();
+ GlobalTransaction gxact;
+
+ ShowTransactionState("PrepareTransaction");
+
+ /*
+ * check the current transaction state
+ */
+ if (s->state != TRANS_INPROGRESS)
+ elog(WARNING, "PrepareTransaction while in %s state",
+ TransStateAsString(s->state));
+ Assert(s->parent == NULL);
+
+ /*
+ * Do pre-commit processing (most of this stuff requires database
+ * access, and in fact could still cause an error...)
+ *
+ * It is possible for PrepareHoldablePortals to invoke functions that
+ * queue deferred triggers, and it's also possible that triggers create
+ * holdable cursors. So we have to loop until there's nothing left to
+ * do.
+ */
+ for (;;)
+ {
+ /*
+ * Fire all currently pending deferred triggers.
+ */
+ AfterTriggerFireDeferred();
+
+ /*
+ * Convert any open holdable cursors into static portals. If there
+ * weren't any, we are done ... otherwise loop back to check if they
+ * queued deferred triggers. Lather, rinse, repeat.
+ */
+ if (!PrepareHoldablePortals())
+ break;
+ }
+
+ /* Now we can shut down the deferred-trigger manager */
+ AfterTriggerEndXact(true);
+
+ /* Close any open regular cursors */
+ AtCommit_Portals();
+
+ /*
+ * Let ON COMMIT management do its thing (must happen after closing
+ * cursors, to avoid dangling-reference problems)
+ */
+ PreCommit_on_commit_actions();
+
+ /* close large objects before lower-level cleanup */
+ AtEOXact_LargeObject(true);
+
+ /* NOTIFY and flatfiles will be handled below */
+
+ /* Prevent cancel/die interrupt while cleaning up */
+ HOLD_INTERRUPTS();
+
+ /*
+ * set the current transaction state information appropriately during
+ * the processing
+ */
+ s->state = TRANS_PREPARE;
+
+ /* Tell bufmgr and smgr to prepare for commit */
+ BufmgrCommit();
+
+ /*
+ * Reserve the GID for this transaction. This could fail if the
+ * requested GID is invalid or already in use.
+ */
+ gxact = MarkAsPreparing(xid, MyDatabaseId, prepareGID, GetUserId());
+ prepareGID = NULL;
+
+ /*
+ * Collect data for the 2PC state file. Note that in general, no actual
+ * state change should happen in the called modules during this step,
+ * since it's still possible to fail before commit, and in that case we
+ * want transaction abort to be able to clean up. (In particular, the
+ * AtPrepare routines may error out if they find cases they cannot
+ * handle.) State cleanup should happen in the PostPrepare routines
+ * below. However, some modules can go ahead and clear state here
+ * because they wouldn't do anything with it during abort anyway.
+ *
+ * Note: because the 2PC state file records will be replayed in the same
+ * order they are made, the order of these calls has to match the order
+ * in which we want things to happen during COMMIT PREPARED or
+ * ROLLBACK PREPARED; in particular, pay attention to whether things
+ * should happen before or after releasing the transaction's locks.
+ */
+ StartPrepare(gxact);
+
+ AtPrepare_Notify();
+ AtPrepare_UpdateFlatFiles();
+ AtPrepare_Inval();
+ AtPrepare_Locks();
+
+ /*
+ * Here is where we really truly prepare.
+ *
+ * We have to record transaction prepares even if we didn't
+ * make any updates, because the transaction manager might
+ * get confused if we lose a global transaction.
+ */
+ EndPrepare(gxact);
+
+ /*
+ * Mark the prepared transaction as valid. As soon as we mark ourselves
+ * not running in MyProc below, others can commit/rollback the xact.
+ *
+ * NB: a side effect of this is to make a dummy ProcArray entry for the
+ * prepared XID. This must happen before we clear the XID from MyProc,
+ * else there is a window where the XID is not running according to
+ * TransactionIdInProgress, and onlookers would be entitled to assume
+ * the xact crashed. Instead we have a window where the same XID
+ * appears twice in ProcArray, which is OK.
+ */
+ MarkAsPrepared(gxact);
+
+ /*
+ * Now we clean up backend-internal state and release internal
+ * resources.
+ */
+
+ /* Break the chain of back-links in the XLOG records I output */
+ MyLastRecPtr.xrecoff = 0;
+ MyXactMadeXLogEntry = false;
+ MyXactMadeTempRelUpdate = false;
+
+ /*
+ * Let others know about no transaction in progress by me. This has
+ * to be done *after* the prepared transaction has been marked valid,
+ * else someone may think it is unlocked and recyclable.
+ */
+
+ /* Lock ProcArrayLock because that's what GetSnapshotData uses. */
+ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ MyProc->xid = InvalidTransactionId;
+ MyProc->xmin = InvalidTransactionId;
+
+ /* Clear the subtransaction-XID cache too while holding the lock */
+ MyProc->subxids.nxids = 0;
+ MyProc->subxids.overflowed = false;
+
+ LWLockRelease(ProcArrayLock);
+
+ /*
+ * This is all post-transaction cleanup. Note that if an error is raised
+ * here, it's too late to abort the transaction. This should be just
+ * noncritical resource releasing. See notes in CommitTransaction.
+ */
+
+ CallXactCallbacks(XACT_EVENT_PREPARE);
+
+ ResourceOwnerRelease(TopTransactionResourceOwner,
+ RESOURCE_RELEASE_BEFORE_LOCKS,
+ true, true);
+
+ /* Check we've released all buffer pins */
+ AtEOXact_Buffers(true);
+
+ /* notify and flatfiles don't need a postprepare call */
+
+ PostPrepare_Inval();
+
+ PostPrepare_smgr();
+
+ AtEOXact_MultiXact();
+
+ PostPrepare_Locks(xid);
+
+ ResourceOwnerRelease(TopTransactionResourceOwner,
+ RESOURCE_RELEASE_LOCKS,
+ true, true);
+ ResourceOwnerRelease(TopTransactionResourceOwner,
+ RESOURCE_RELEASE_AFTER_LOCKS,
+ true, true);
+
+ /* PREPARE acts the same as COMMIT as far as GUC is concerned */
+ AtEOXact_GUC(true, false);
+ AtEOXact_SPI(true);
+ AtEOXact_on_commit_actions(true);
+ AtEOXact_Namespace(true);
+ /* smgrcommit already done */
+ AtEOXact_Files();
+
+ CurrentResourceOwner = NULL;
+ ResourceOwnerDelete(TopTransactionResourceOwner);
+ s->curTransactionOwner = NULL;
+ CurTransactionResourceOwner = NULL;
+ TopTransactionResourceOwner = NULL;
+
+ AtCommit_Memory();
+
+ s->transactionId = InvalidTransactionId;
+ s->subTransactionId = InvalidSubTransactionId;
+ s->nestingLevel = 0;
+ s->childXids = NIL;
+
+ /*
+ * done with 1st phase commit processing, set current transaction
+ * state back to default
+ */
+ s->state = TRANS_DEFAULT;
+
+ RESUME_INTERRUPTS();
+}
+
+
/*
* AbortTransaction
*/
@@ -1640,7 +1869,7 @@ AbortTransaction(void)
/*
* check the current transaction state
*/
- if (s->state != TRANS_INPROGRESS)
+ if (s->state != TRANS_INPROGRESS && s->state != TRANS_PREPARE)
elog(WARNING, "AbortTransaction while in %s state",
TransStateAsString(s->state));
Assert(s->parent == NULL);
@@ -1833,6 +2062,7 @@ StartTransactionCommand(void)
case TBLOCK_SUBABORT_PENDING:
case TBLOCK_SUBRESTART:
case TBLOCK_SUBABORT_RESTART:
+ case TBLOCK_PREPARE:
elog(ERROR, "StartTransactionCommand: unexpected state %s",
BlockStateAsString(s->blockState));
break;
@@ -1935,6 +2165,15 @@ CommitTransactionCommand(void)
break;
/*
+ * We are completing a "PREPARE TRANSACTION" command. Do it and
+ * return to the idle state.
+ */
+ case TBLOCK_PREPARE:
+ PrepareTransaction();
+ s->blockState = TBLOCK_DEFAULT;
+ break;
+
+ /*
* We were just issued a SAVEPOINT inside a transaction block.
* Start a subtransaction. (DefineSavepoint already did
* PushTransaction, so as to have someplace to put the
@@ -1964,6 +2203,12 @@ CommitTransactionCommand(void)
CommitTransaction();
s->blockState = TBLOCK_DEFAULT;
}
+ else if (s->blockState == TBLOCK_PREPARE)
+ {
+ Assert(s->parent == NULL);
+ PrepareTransaction();
+ s->blockState = TBLOCK_DEFAULT;
+ }
else
{
Assert(s->blockState == TBLOCK_INPROGRESS ||
@@ -2156,6 +2401,17 @@ AbortCurrentTransaction(void)
break;
/*
+ * Here, we failed while trying to PREPARE. Clean up the
+ * transaction and return to idle state (we do not want to
+ * stay in the transaction).
+ */
+ case TBLOCK_PREPARE:
+ AbortTransaction();
+ CleanupTransaction();
+ s->blockState = TBLOCK_DEFAULT;
+ break;
+
+ /*
* We got an error inside a subtransaction. Abort just the
* subtransaction, and go to the persistent SUBABORT state
* until we get ROLLBACK.
@@ -2487,6 +2743,7 @@ BeginTransactionBlock(void)
case TBLOCK_SUBABORT_PENDING:
case TBLOCK_SUBRESTART:
case TBLOCK_SUBABORT_RESTART:
+ case TBLOCK_PREPARE:
elog(FATAL, "BeginTransactionBlock: unexpected state %s",
BlockStateAsString(s->blockState));
break;
@@ -2494,6 +2751,57 @@ BeginTransactionBlock(void)
}
/*
+ * PrepareTransactionBlock
+ * This executes a PREPARE command.
+ *
+ * Since PREPARE may actually do a ROLLBACK, the result indicates what
+ * happened: TRUE for PREPARE, FALSE for ROLLBACK.
+ *
+ * Note that we don't actually do anything here except change blockState.
+ * The real work will be done in the upcoming PrepareTransaction().
+ * We do it this way because it's not convenient to change memory context,
+ * resource owner, etc while executing inside a Portal.
+ */
+bool
+PrepareTransactionBlock(char *gid)
+{
+ TransactionState s;
+ bool result;
+
+ /* Set up to commit the current transaction */
+ result = EndTransactionBlock();
+
+ /* If successful, change outer tblock state to PREPARE */
+ if (result)
+ {
+ s = CurrentTransactionState;
+
+ while (s->parent != NULL)
+ s = s->parent;
+
+ if (s->blockState == TBLOCK_END)
+ {
+ /* Save GID where PrepareTransaction can find it again */
+ prepareGID = MemoryContextStrdup(TopTransactionContext, gid);
+
+ s->blockState = TBLOCK_PREPARE;
+ }
+ else
+ {
+ /*
+ * ignore case where we are not in a transaction;
+ * EndTransactionBlock already issued a warning.
+ */
+ Assert(s->blockState == TBLOCK_STARTED);
+ /* Don't send back a PREPARE result tag... */
+ result = false;
+ }
+ }
+
+ return result;
+}
+
+/*
* EndTransactionBlock
* This executes a COMMIT command.
*
@@ -2603,6 +2911,7 @@ EndTransactionBlock(void)
case TBLOCK_SUBABORT_PENDING:
case TBLOCK_SUBRESTART:
case TBLOCK_SUBABORT_RESTART:
+ case TBLOCK_PREPARE:
elog(FATAL, "EndTransactionBlock: unexpected state %s",
BlockStateAsString(s->blockState));
break;
@@ -2694,6 +3003,7 @@ UserAbortTransactionBlock(void)
case TBLOCK_SUBABORT_PENDING:
case TBLOCK_SUBRESTART:
case TBLOCK_SUBABORT_RESTART:
+ case TBLOCK_PREPARE:
elog(FATAL, "UserAbortTransactionBlock: unexpected state %s",
BlockStateAsString(s->blockState));
break;
@@ -2740,6 +3050,7 @@ DefineSavepoint(char *name)
case TBLOCK_SUBABORT_PENDING:
case TBLOCK_SUBRESTART:
case TBLOCK_SUBABORT_RESTART:
+ case TBLOCK_PREPARE:
elog(FATAL, "DefineSavepoint: unexpected state %s",
BlockStateAsString(s->blockState));
break;
@@ -2795,6 +3106,7 @@ ReleaseSavepoint(List *options)
case TBLOCK_SUBABORT_PENDING:
case TBLOCK_SUBRESTART:
case TBLOCK_SUBABORT_RESTART:
+ case TBLOCK_PREPARE:
elog(FATAL, "ReleaseSavepoint: unexpected state %s",
BlockStateAsString(s->blockState));
break;
@@ -2892,6 +3204,7 @@ RollbackToSavepoint(List *options)
case TBLOCK_SUBABORT_PENDING:
case TBLOCK_SUBRESTART:
case TBLOCK_SUBABORT_RESTART:
+ case TBLOCK_PREPARE:
elog(FATAL, "RollbackToSavepoint: unexpected state %s",
BlockStateAsString(s->blockState));
break;
@@ -2999,6 +3312,7 @@ BeginInternalSubTransaction(char *name)
case TBLOCK_SUBABORT_PENDING:
case TBLOCK_SUBRESTART:
case TBLOCK_SUBABORT_RESTART:
+ case TBLOCK_PREPARE:
elog(FATAL, "BeginInternalSubTransaction: unexpected state %s",
BlockStateAsString(s->blockState));
break;
@@ -3064,6 +3378,7 @@ RollbackAndReleaseCurrentSubTransaction(void)
case TBLOCK_SUBABORT_PENDING:
case TBLOCK_SUBRESTART:
case TBLOCK_SUBABORT_RESTART:
+ case TBLOCK_PREPARE:
elog(FATAL, "RollbackAndReleaseCurrentSubTransaction: unexpected state %s",
BlockStateAsString(s->blockState));
break;
@@ -3111,6 +3426,7 @@ AbortOutOfAnyTransaction(void)
case TBLOCK_INPROGRESS:
case TBLOCK_END:
case TBLOCK_ABORT_PENDING:
+ case TBLOCK_PREPARE:
/* In a transaction, so clean up */
AbortTransaction();
CleanupTransaction();
@@ -3202,6 +3518,7 @@ TransactionBlockStatusCode(void)
case TBLOCK_SUBINPROGRESS:
case TBLOCK_END:
case TBLOCK_SUBEND:
+ case TBLOCK_PREPARE:
return 'T'; /* in transaction */
case TBLOCK_ABORT:
case TBLOCK_SUBABORT:
@@ -3684,6 +4001,8 @@ BlockStateAsString(TBlockState blockState)
return "ABORT END";
case TBLOCK_ABORT_PENDING:
return "ABORT PEND";
+ case TBLOCK_PREPARE:
+ return "PREPARE";
case TBLOCK_SUBBEGIN:
return "SUB BEGIN";
case TBLOCK_SUBINPROGRESS:
@@ -3717,12 +4036,14 @@ TransStateAsString(TransState state)
return "DEFAULT";
case TRANS_START:
return "START";
+ case TRANS_INPROGRESS:
+ return "INPROGR";
case TRANS_COMMIT:
return "COMMIT";
case TRANS_ABORT:
return "ABORT";
- case TRANS_INPROGRESS:
- return "INPROGR";
+ case TRANS_PREPARE:
+ return "PREPARE";
}
return "UNRECOGNIZED";
}
@@ -3767,6 +4088,76 @@ xactGetCommittedChildren(TransactionId **ptr)
* XLOG support routines
*/
+static void
+xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid)
+{
+ TransactionId *sub_xids;
+ TransactionId max_xid;
+ int i;
+
+ TransactionIdCommit(xid);
+
+ /* Mark committed subtransactions as committed */
+ sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
+ TransactionIdCommitTree(xlrec->nsubxacts, sub_xids);
+
+ /* Make sure nextXid is beyond any XID mentioned in the record */
+ max_xid = xid;
+ for (i = 0; i < xlrec->nsubxacts; i++)
+ {
+ if (TransactionIdPrecedes(max_xid, sub_xids[i]))
+ max_xid = sub_xids[i];
+ }
+ if (TransactionIdFollowsOrEquals(max_xid,
+ ShmemVariableCache->nextXid))
+ {
+ ShmemVariableCache->nextXid = max_xid;
+ TransactionIdAdvance(ShmemVariableCache->nextXid);
+ }
+
+ /* Make sure files supposed to be dropped are dropped */
+ for (i = 0; i < xlrec->nrels; i++)
+ {
+ XLogCloseRelation(xlrec->xnodes[i]);
+ smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
+ }
+}
+
+static void
+xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
+{
+ TransactionId *sub_xids;
+ TransactionId max_xid;
+ int i;
+
+ TransactionIdAbort(xid);
+
+ /* Mark subtransactions as aborted */
+ sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
+ TransactionIdAbortTree(xlrec->nsubxacts, sub_xids);
+
+ /* Make sure nextXid is beyond any XID mentioned in the record */
+ max_xid = xid;
+ for (i = 0; i < xlrec->nsubxacts; i++)
+ {
+ if (TransactionIdPrecedes(max_xid, sub_xids[i]))
+ max_xid = sub_xids[i];
+ }
+ if (TransactionIdFollowsOrEquals(max_xid,
+ ShmemVariableCache->nextXid))
+ {
+ ShmemVariableCache->nextXid = max_xid;
+ TransactionIdAdvance(ShmemVariableCache->nextXid);
+ }
+
+ /* Make sure files supposed to be dropped are dropped */
+ for (i = 0; i < xlrec->nrels; i++)
+ {
+ XLogCloseRelation(xlrec->xnodes[i]);
+ smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
+ }
+}
+
void
xact_redo(XLogRecPtr lsn, XLogRecord *record)
{
@@ -3775,138 +4166,137 @@ xact_redo(XLogRecPtr lsn, XLogRecord *record)
if (info == XLOG_XACT_COMMIT)
{
xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
- TransactionId *sub_xids;
- TransactionId max_xid;
- int i;
- TransactionIdCommit(record->xl_xid);
+ xact_redo_commit(xlrec, record->xl_xid);
+ }
+ else if (info == XLOG_XACT_ABORT)
+ {
+ xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
- /* Mark committed subtransactions as committed */
- sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
- TransactionIdCommitTree(xlrec->nsubxacts, sub_xids);
+ xact_redo_abort(xlrec, record->xl_xid);
+ }
+ else if (info == XLOG_XACT_PREPARE)
+ {
+ /* the record contents are exactly the 2PC file */
+ RecreateTwoPhaseFile(record->xl_xid,
+ XLogRecGetData(record), record->xl_len);
+ }
+ else if (info == XLOG_XACT_COMMIT_PREPARED)
+ {
+ xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) XLogRecGetData(record);
- /* Make sure nextXid is beyond any XID mentioned in the record */
- max_xid = record->xl_xid;
- for (i = 0; i < xlrec->nsubxacts; i++)
- {
- if (TransactionIdPrecedes(max_xid, sub_xids[i]))
- max_xid = sub_xids[i];
- }
- if (TransactionIdFollowsOrEquals(max_xid,
- ShmemVariableCache->nextXid))
- {
- ShmemVariableCache->nextXid = max_xid;
- TransactionIdAdvance(ShmemVariableCache->nextXid);
- }
+ xact_redo_commit(&xlrec->crec, xlrec->xid);
+ RemoveTwoPhaseFile(xlrec->xid, false);
+ }
+ else if (info == XLOG_XACT_ABORT_PREPARED)
+ {
+ xl_xact_abort_prepared *xlrec = (xl_xact_abort_prepared *) XLogRecGetData(record);
- /* Make sure files supposed to be dropped are dropped */
+ xact_redo_abort(&xlrec->arec, xlrec->xid);
+ RemoveTwoPhaseFile(xlrec->xid, false);
+ }
+ else
+ elog(PANIC, "xact_redo: unknown op code %u", info);
+}
+
+static void
+xact_desc_commit(char *buf, xl_xact_commit *xlrec)
+{
+ struct tm *tm = localtime(&xlrec->xtime);
+ int i;
+
+ sprintf(buf + strlen(buf), "%04u-%02u-%02u %02u:%02u:%02u",
+ tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
+ tm->tm_hour, tm->tm_min, tm->tm_sec);
+ if (xlrec->nrels > 0)
+ {
+ sprintf(buf + strlen(buf), "; rels:");
for (i = 0; i < xlrec->nrels; i++)
{
- XLogCloseRelation(xlrec->xnodes[i]);
- smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
+ RelFileNode rnode = xlrec->xnodes[i];
+
+ sprintf(buf + strlen(buf), " %u/%u/%u",
+ rnode.spcNode, rnode.dbNode, rnode.relNode);
}
}
- else if (info == XLOG_XACT_ABORT)
+ if (xlrec->nsubxacts > 0)
{
- xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
- TransactionId *sub_xids;
- TransactionId max_xid;
- int i;
-
- TransactionIdAbort(record->xl_xid);
-
- /* Mark subtransactions as aborted */
- sub_xids = (TransactionId *) &(xlrec->xnodes[xlrec->nrels]);
- TransactionIdAbortTree(xlrec->nsubxacts, sub_xids);
+ TransactionId *xacts = (TransactionId *)
+ &xlrec->xnodes[xlrec->nrels];
- /* Make sure nextXid is beyond any XID mentioned in the record */
- max_xid = record->xl_xid;
+ sprintf(buf + strlen(buf), "; subxacts:");
for (i = 0; i < xlrec->nsubxacts; i++)
- {
- if (TransactionIdPrecedes(max_xid, sub_xids[i]))
- max_xid = sub_xids[i];
- }
- if (TransactionIdFollowsOrEquals(max_xid,
- ShmemVariableCache->nextXid))
- {
- ShmemVariableCache->nextXid = max_xid;
- TransactionIdAdvance(ShmemVariableCache->nextXid);
- }
+ sprintf(buf + strlen(buf), " %u", xacts[i]);
+ }
+}
+
+static void
+xact_desc_abort(char *buf, xl_xact_abort *xlrec)
+{
+ struct tm *tm = localtime(&xlrec->xtime);
+ int i;
- /* Make sure files supposed to be dropped are dropped */
+ sprintf(buf + strlen(buf), "%04u-%02u-%02u %02u:%02u:%02u",
+ tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
+ tm->tm_hour, tm->tm_min, tm->tm_sec);
+ if (xlrec->nrels > 0)
+ {
+ sprintf(buf + strlen(buf), "; rels:");
for (i = 0; i < xlrec->nrels; i++)
{
- XLogCloseRelation(xlrec->xnodes[i]);
- smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
+ RelFileNode rnode = xlrec->xnodes[i];
+
+ sprintf(buf + strlen(buf), " %u/%u/%u",
+ rnode.spcNode, rnode.dbNode, rnode.relNode);
}
}
- else
- elog(PANIC, "xact_redo: unknown op code %u", info);
+ if (xlrec->nsubxacts > 0)
+ {
+ TransactionId *xacts = (TransactionId *)
+ &xlrec->xnodes[xlrec->nrels];
+
+ sprintf(buf + strlen(buf), "; subxacts:");
+ for (i = 0; i < xlrec->nsubxacts; i++)
+ sprintf(buf + strlen(buf), " %u", xacts[i]);
+ }
}
void
xact_desc(char *buf, uint8 xl_info, char *rec)
{
uint8 info = xl_info & ~XLR_INFO_MASK;
- int i;
if (info == XLOG_XACT_COMMIT)
{
xl_xact_commit *xlrec = (xl_xact_commit *) rec;
- struct tm *tm = localtime(&xlrec->xtime);
-
- sprintf(buf + strlen(buf), "commit: %04u-%02u-%02u %02u:%02u:%02u",
- tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
- tm->tm_hour, tm->tm_min, tm->tm_sec);
- if (xlrec->nrels > 0)
- {
- sprintf(buf + strlen(buf), "; rels:");
- for (i = 0; i < xlrec->nrels; i++)
- {
- RelFileNode rnode = xlrec->xnodes[i];
- sprintf(buf + strlen(buf), " %u/%u/%u",
- rnode.spcNode, rnode.dbNode, rnode.relNode);
- }
- }
- if (xlrec->nsubxacts > 0)
- {
- TransactionId *xacts = (TransactionId *)
- &xlrec->xnodes[xlrec->nrels];
-
- sprintf(buf + strlen(buf), "; subxacts:");
- for (i = 0; i < xlrec->nsubxacts; i++)
- sprintf(buf + strlen(buf), " %u", xacts[i]);
- }
+ strcat(buf, "commit: ");
+ xact_desc_commit(buf, xlrec);
}
else if (info == XLOG_XACT_ABORT)
{
xl_xact_abort *xlrec = (xl_xact_abort *) rec;
- struct tm *tm = localtime(&xlrec->xtime);
- sprintf(buf + strlen(buf), "abort: %04u-%02u-%02u %02u:%02u:%02u",
- tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
- tm->tm_hour, tm->tm_min, tm->tm_sec);
- if (xlrec->nrels > 0)
- {
- sprintf(buf + strlen(buf), "; rels:");
- for (i = 0; i < xlrec->nrels; i++)
- {
- RelFileNode rnode = xlrec->xnodes[i];
+ strcat(buf, "abort: ");
+ xact_desc_abort(buf, xlrec);
+ }
+ else if (info == XLOG_XACT_PREPARE)
+ {
+ strcat(buf, "prepare");
+ }
+ else if (info == XLOG_XACT_COMMIT_PREPARED)
+ {
+ xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) rec;
- sprintf(buf + strlen(buf), " %u/%u/%u",
- rnode.spcNode, rnode.dbNode, rnode.relNode);
- }
- }
- if (xlrec->nsubxacts > 0)
- {
- TransactionId *xacts = (TransactionId *)
- &xlrec->xnodes[xlrec->nrels];
+ sprintf(buf + strlen(buf), "commit %u: ", xlrec->xid);
+ xact_desc_commit(buf, &xlrec->crec);
+ }
+ else if (info == XLOG_XACT_ABORT_PREPARED)
+ {
+ xl_xact_abort_prepared *xlrec = (xl_xact_abort_prepared *) rec;
- sprintf(buf + strlen(buf), "; subxacts:");
- for (i = 0; i < xlrec->nsubxacts; i++)
- sprintf(buf + strlen(buf), " %u", xacts[i]);
- }
+ sprintf(buf + strlen(buf), "abort %u: ", xlrec->xid);
+ xact_desc_abort(buf, &xlrec->arec);
}
else
strcat(buf, "UNKNOWN");
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index c5469d174f2..15b82ee9be8 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.200 2005/06/15 01:36:08 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.201 2005/06/17 22:32:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -25,6 +25,7 @@
#include "access/clog.h"
#include "access/multixact.h"
#include "access/subtrans.h"
+#include "access/twophase.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "access/xlog_internal.h"
@@ -814,18 +815,6 @@ begin:;
/* Compute record's XLOG location */
INSERT_RECPTR(RecPtr, Insert, curridx);
- /* If first XLOG record of transaction, save it in PGPROC array */
- if (MyLastRecPtr.xrecoff == 0 && !no_tran)
- {
- /*
- * We do not acquire ProcArrayLock here because of possible deadlock.
- * Anyone who wants to inspect other procs' logRec must acquire
- * WALInsertLock, instead. A better solution would be a per-PROC
- * spinlock, but no time for that before 7.2 --- tgl 12/19/01.
- */
- MyProc->logRec = RecPtr;
- }
-
#ifdef WAL_DEBUG
if (XLOG_DEBUG)
{
@@ -3827,6 +3816,7 @@ BootStrapXLOG(void)
BootStrapCLOG();
BootStrapSUBTRANS();
BootStrapMultiXact();
+
free(buffer);
}
@@ -4268,6 +4258,7 @@ StartupXLOG(void)
uint32 endLogSeg;
XLogRecord *record;
uint32 freespace;
+ TransactionId oldestActiveXID;
CritSectionCount++;
@@ -4678,33 +4669,8 @@ StartupXLOG(void)
XLogCtl->Write.curridx = NextBufIdx(0);
}
-#ifdef NOT_USED
- /* UNDO */
- if (InRecovery)
- {
- RecPtr = ReadRecPtr;
- if (XLByteLT(checkPoint.undo, RecPtr))
- {
- ereport(LOG,
- (errmsg("undo starts at %X/%X",
- RecPtr.xlogid, RecPtr.xrecoff)));
- do
- {
- record = ReadRecord(&RecPtr, PANIC);
- if (TransactionIdIsValid(record->xl_xid) &&
- !TransactionIdDidCommit(record->xl_xid))
- RmgrTable[record->xl_rmid].rm_undo(EndRecPtr, record);
- RecPtr = record->xl_prev;
- } while (XLByteLE(checkPoint.undo, RecPtr));
- ereport(LOG,
- (errmsg("undo done at %X/%X",
- ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
- }
- else
- ereport(LOG,
- (errmsg("undo is not required")));
- }
-#endif
+ /* Pre-scan prepared transactions to find out the range of XIDs present */
+ oldestActiveXID = PrescanPreparedTransactions();
if (InRecovery)
{
@@ -4767,9 +4733,12 @@ StartupXLOG(void)
/* Start up the commit log and related stuff, too */
StartupCLOG();
- StartupSUBTRANS();
+ StartupSUBTRANS(oldestActiveXID);
StartupMultiXact();
+ /* Reload shared-memory state for prepared transactions */
+ RecoverPreparedTransactions();
+
ereport(LOG,
(errmsg("database system is ready")));
CritSectionCount--;
@@ -5096,31 +5065,6 @@ CreateCheckPoint(bool shutdown, bool force)
}
/*
- * Get UNDO record ptr - this is oldest of PGPROC->logRec values. We
- * do this while holding insert lock to ensure that we won't miss any
- * about-to-commit transactions (UNDO must include all xacts that have
- * commits after REDO point).
- *
- * XXX temporarily ifdef'd out to avoid three-way deadlock condition:
- * GetUndoRecPtr needs to grab ProcArrayLock to ensure that it is looking
- * at a stable set of proc records, but grabbing ProcArrayLock while
- * holding WALInsertLock is no good. GetNewTransactionId may cause a
- * WAL record to be written while holding XidGenLock, and
- * GetSnapshotData needs to get XidGenLock while holding ProcArrayLock,
- * so there's a risk of deadlock. Need to find a better solution. See
- * pgsql-hackers discussion of 17-Dec-01.
- *
- * XXX actually, the whole UNDO code is dead code and unlikely to ever be
- * revived, so the lack of a good solution here is not troubling.
- */
-#ifdef NOT_USED
- checkPoint.undo = GetUndoRecPtr();
-
- if (shutdown && checkPoint.undo.xrecoff != 0)
- elog(PANIC, "active transaction while database system is shutting down");
-#endif
-
- /*
* Now we can release insert lock and checkpoint start lock, allowing
* other xacts to proceed even while we are flushing disk buffers.
*/
@@ -5195,22 +5139,8 @@ CreateCheckPoint(bool shutdown, bool force)
/*
* Select point at which we can truncate the log, which we base on the
* prior checkpoint's earliest info.
- *
- * With UNDO support: oldest item is redo or undo, whichever is older;
- * but watch out for case that undo = 0.
- *
- * Without UNDO support: just use the redo pointer. This allows xlog
- * space to be freed much faster when there are long-running
- * transactions.
*/
-#ifdef NOT_USED
- if (ControlFile->checkPointCopy.undo.xrecoff != 0 &&
- XLByteLT(ControlFile->checkPointCopy.undo,
- ControlFile->checkPointCopy.redo))
- XLByteToSeg(ControlFile->checkPointCopy.undo, _logId, _logSeg);
- else
-#endif
- XLByteToSeg(ControlFile->checkPointCopy.redo, _logId, _logSeg);
+ XLByteToSeg(ControlFile->checkPointCopy.redo, _logId, _logSeg);
/*
* Update the control file.
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index d66bb780b4f..2e9a3ecf7c7 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -3,7 +3,7 @@
*
* Copyright (c) 1996-2005, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.13 2005/05/17 21:46:09 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/catalog/system_views.sql,v 1.14 2005/06/17 22:32:43 tgl Exp $
*/
CREATE VIEW pg_user AS
@@ -102,6 +102,39 @@ CREATE VIEW pg_stats AS
REVOKE ALL on pg_statistic FROM public;
+CREATE VIEW pg_locks AS
+ SELECT *
+ FROM pg_lock_status() AS L
+ (locktype text, database oid, relation oid, page int4, tuple int2,
+ transaction xid, classid oid, objid oid, objsubid int2,
+ pid int4, mode text, granted boolean);
+
+CREATE VIEW pg_prepared_xacts AS
+ SELECT P.transaction, P.gid, U.usename AS owner, D.datname AS database
+ FROM pg_prepared_xact() AS P
+ (transaction xid, gid text, ownerid int4, dbid oid)
+ LEFT JOIN pg_database D ON P.dbid = D.oid
+ LEFT JOIN pg_shadow U ON P.ownerid = U.usesysid;
+
+CREATE VIEW pg_settings AS
+ SELECT *
+ FROM pg_show_all_settings() AS A
+ (name text, setting text, category text, short_desc text, extra_desc text,
+ context text, vartype text, source text, min_val text, max_val text);
+
+CREATE RULE pg_settings_u AS
+ ON UPDATE TO pg_settings
+ WHERE new.name = old.name DO
+ SELECT set_config(old.name, new.setting, 'f');
+
+CREATE RULE pg_settings_n AS
+ ON UPDATE TO pg_settings
+ DO INSTEAD NOTHING;
+
+GRANT SELECT, UPDATE ON pg_settings TO PUBLIC;
+
+-- Statistics views
+
CREATE VIEW pg_stat_all_tables AS
SELECT
C.oid AS relid,
@@ -258,27 +291,3 @@ CREATE VIEW pg_stat_database AS
pg_stat_get_db_blocks_hit(D.oid) AS blks_read,
pg_stat_get_db_blocks_hit(D.oid) AS blks_hit
FROM pg_database D;
-
-CREATE VIEW pg_locks AS
- SELECT *
- FROM pg_lock_status() AS L
- (locktype text, database oid, relation oid, page int4, tuple int2,
- transaction xid, classid oid, objid oid, objsubid int2,
- pid int4, mode text, granted boolean);
-
-CREATE VIEW pg_settings AS
- SELECT *
- FROM pg_show_all_settings() AS A
- (name text, setting text, category text, short_desc text, extra_desc text,
- context text, vartype text, source text, min_val text, max_val text);
-
-CREATE RULE pg_settings_u AS
- ON UPDATE TO pg_settings
- WHERE new.name = old.name DO
- SELECT set_config(old.name, new.setting, 'f');
-
-CREATE RULE pg_settings_n AS
- ON UPDATE TO pg_settings
- DO INSTEAD NOTHING;
-
-GRANT SELECT, UPDATE ON pg_settings TO PUBLIC;
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
index 9914c724b71..142b02dfaf8 100644
--- a/src/backend/commands/async.c
+++ b/src/backend/commands/async.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/async.c,v 1.122 2005/05/06 17:24:53 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/async.c,v 1.123 2005/06/17 22:32:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -78,6 +78,7 @@
#include <netinet/in.h>
#include "access/heapam.h"
+#include "access/twophase_rmgr.h"
#include "catalog/pg_listener.h"
#include "commands/async.h"
#include "libpq/libpq.h"
@@ -407,6 +408,36 @@ Async_UnlistenOnExit(int code, Datum arg)
CommitTransactionCommand();
}
+
+/*
+ *--------------------------------------------------------------
+ * AtPrepare_Notify
+ *
+ * This is called at the prepare phase of a two-phase
+ * transaction. Save the state for possible commit later.
+ *--------------------------------------------------------------
+ */
+void
+AtPrepare_Notify(void)
+{
+ ListCell *p;
+
+ foreach(p, pendingNotifies)
+ {
+ const char *relname = (const char *) lfirst(p);
+
+ RegisterTwoPhaseRecord(TWOPHASE_RM_NOTIFY_ID, 0,
+ relname, strlen(relname) + 1);
+ }
+
+ /*
+ * We can clear the state immediately, rather than needing a separate
+ * PostPrepare call, because if the transaction fails we'd just
+ * discard the state anyway.
+ */
+ ClearPendingNotifies();
+}
+
/*
*--------------------------------------------------------------
* AtCommit_Notify
@@ -1016,8 +1047,9 @@ AsyncExistsPendingNotify(const char *relname)
foreach(p, pendingNotifies)
{
- /* Use NAMEDATALEN for relname comparison. DZ - 26-08-1996 */
- if (strncmp((const char *) lfirst(p), relname, NAMEDATALEN) == 0)
+ const char *prelname = (const char *) lfirst(p);
+
+ if (strcmp(prelname, relname) == 0)
return true;
}
@@ -1037,3 +1069,22 @@ ClearPendingNotifies(void)
*/
pendingNotifies = NIL;
}
+
+/*
+ * 2PC processing routine for COMMIT PREPARED case.
+ *
+ * (We don't have to do anything for ROLLBACK PREPARED.)
+ */
+void
+notify_twophase_postcommit(TransactionId xid, uint16 info,
+ void *recdata, uint32 len)
+{
+ /*
+ * Set up to issue the NOTIFY at the end of my own
+ * current transaction. (XXX this has some issues if my own
+ * transaction later rolls back, or if there is any significant
+ * delay before I commit. OK for now because we disallow
+ * COMMIT PREPARED inside a transaction block.)
+ */
+ Async_Notify((char *) recdata);
+}
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index 25a7056500b..2e0d8dbc1a2 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.306 2005/06/09 04:18:58 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.307 2005/06/17 22:32:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -2085,6 +2085,7 @@ _copyTransactionStmt(TransactionStmt *from)
COPY_SCALAR_FIELD(kind);
COPY_NODE_FIELD(options);
+ COPY_STRING_FIELD(gid);
return newnode;
}
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index e625ca7f32c..a991cf5eed3 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -18,7 +18,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.243 2005/06/09 04:18:58 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.244 2005/06/17 22:32:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1053,6 +1053,7 @@ _equalTransactionStmt(TransactionStmt *a, TransactionStmt *b)
{
COMPARE_SCALAR_FIELD(kind);
COMPARE_NODE_FIELD(options);
+ COMPARE_STRING_FIELD(gid);
return true;
}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 6ee53327865..d12fa9fa052 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.494 2005/06/15 19:44:05 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.495 2005/06/17 22:32:44 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@@ -387,7 +387,7 @@ static void doNegateFloat(Value *v);
ORDER OUT_P OUTER_P OVERLAPS OVERLAY OWNER
PARTIAL PASSWORD PLACING POSITION
- PRECISION PRESERVE PREPARE PRIMARY
+ PRECISION PRESERVE PREPARE PREPARED PRIMARY
PRIOR PRIVILEGES PROCEDURAL PROCEDURE
QUOTE
@@ -4121,6 +4121,27 @@ TransactionStmt:
(Node *)makeString($4)));
$$ = (Node *)n;
}
+ | PREPARE TRANSACTION Sconst
+ {
+ TransactionStmt *n = makeNode(TransactionStmt);
+ n->kind = TRANS_STMT_PREPARE;
+ n->gid = $3;
+ $$ = (Node *)n;
+ }
+ | COMMIT PREPARED Sconst
+ {
+ TransactionStmt *n = makeNode(TransactionStmt);
+ n->kind = TRANS_STMT_COMMIT_PREPARED;
+ n->gid = $3;
+ $$ = (Node *)n;
+ }
+ | ROLLBACK PREPARED Sconst
+ {
+ TransactionStmt *n = makeNode(TransactionStmt);
+ n->kind = TRANS_STMT_ROLLBACK_PREPARED;
+ n->gid = $3;
+ $$ = (Node *)n;
+ }
;
opt_transaction: WORK {}
@@ -6334,19 +6355,18 @@ a_expr: c_expr { $$ = $1; }
{
$$ = (Node *) makeSimpleA_Expr(AEXPR_OF, "!=", $1, (Node *) $6);
}
- | a_expr BETWEEN opt_asymmetric b_expr AND b_expr %prec BETWEEN
+ | a_expr BETWEEN opt_asymmetric b_expr AND b_expr %prec BETWEEN
{
$$ = (Node *) makeA_Expr(AEXPR_AND, NIL,
(Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $4),
(Node *) makeSimpleA_Expr(AEXPR_OP, "<=", $1, $6));
}
- | a_expr NOT BETWEEN opt_asymmetric b_expr AND b_expr %prec BETWEEN
+ | a_expr NOT BETWEEN opt_asymmetric b_expr AND b_expr %prec BETWEEN
{
$$ = (Node *) makeA_Expr(AEXPR_OR, NIL,
(Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $5),
(Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $7));
}
-
| a_expr BETWEEN SYMMETRIC b_expr AND b_expr %prec BETWEEN
{
$$ = (Node *) makeA_Expr(AEXPR_OR, NIL,
@@ -6367,8 +6387,6 @@ a_expr: c_expr { $$ = $1; }
(Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $7),
(Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $5)));
}
-
-
| a_expr IN_P in_expr
{
/* in_expr returns a SubLink or a list of a_exprs */
@@ -6467,11 +6485,6 @@ a_expr: c_expr { $$ = $1; }
}
;
-opt_asymmetric: ASYMMETRIC {}
- | /*EMPTY*/ {}
- ;
-
-
/*
* Restricted expressions
*
@@ -7401,6 +7414,10 @@ opt_indirection:
| opt_indirection indirection_el { $$ = lappend($1, $2); }
;
+opt_asymmetric: ASYMMETRIC
+ | /*EMPTY*/
+ ;
+
/*****************************************************************************
*
@@ -7855,6 +7872,7 @@ unreserved_keyword:
| PARTIAL
| PASSWORD
| PREPARE
+ | PREPARED
| PRESERVE
| PRIOR
| PRIVILEGES
diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c
index 9296067dfae..1e0c63a9ce2 100644
--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.156 2005/06/14 23:47:39 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.157 2005/06/17 22:32:44 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -243,6 +243,7 @@ static const ScanKeyword ScanKeywords[] = {
{"position", POSITION},
{"precision", PRECISION},
{"prepare", PREPARE},
+ {"prepared", PREPARED},
{"preserve", PRESERVE},
{"primary", PRIMARY},
{"prior", PRIOR},
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 03057dbcb21..6ca13944f8a 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -37,7 +37,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.453 2005/06/14 21:04:39 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.454 2005/06/17 22:32:44 tgl Exp $
*
* NOTES
*
@@ -252,7 +252,7 @@ static void reg_reply(DNSServiceRegistrationReplyErrorType errorCode,
static void pmdaemonize(void);
static Port *ConnCreate(int serverFd);
static void ConnFree(Port *port);
-static void reset_shared(unsigned short port);
+static void reset_shared(int port);
static void SIGHUP_handler(SIGNAL_ARGS);
static void pmdie(SIGNAL_ARGS);
static void reaper(SIGNAL_ARGS);
@@ -1783,7 +1783,7 @@ ClosePostmasterPorts(bool am_syslogger)
* reset_shared -- reset shared memory and semaphores
*/
static void
-reset_shared(unsigned short port)
+reset_shared(int port)
{
/*
* Create or re-create shared memory and semaphores.
@@ -1793,7 +1793,7 @@ reset_shared(unsigned short port)
* used to determine IPC keys. This helps ensure that we will clean
* up dead IPC objects if the postmaster crashes and is restarted.
*/
- CreateSharedMemoryAndSemaphores(false, MaxBackends, port);
+ CreateSharedMemoryAndSemaphores(false, port);
}
@@ -3182,7 +3182,7 @@ SubPostmasterMain(int argc, char *argv[])
/* BackendRun will close sockets */
/* Attach process to shared data structures */
- CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
+ CreateSharedMemoryAndSemaphores(false, 0);
#ifdef USE_SSL
/*
@@ -3203,7 +3203,7 @@ SubPostmasterMain(int argc, char *argv[])
ClosePostmasterPorts(false);
/* Attach process to shared data structures */
- CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
+ CreateSharedMemoryAndSemaphores(false, 0);
BootstrapMain(argc - 2, argv + 2);
proc_exit(0);
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 22333a1f558..0761a8fdf51 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.76 2005/05/19 21:35:46 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/ipc/ipci.c,v 1.77 2005/06/17 22:32:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -17,6 +17,7 @@
#include "access/clog.h"
#include "access/multixact.h"
#include "access/subtrans.h"
+#include "access/twophase.h"
#include "access/xlog.h"
#include "miscadmin.h"
#include "postmaster/bgwriter.h"
@@ -54,9 +55,7 @@
* memory. This is true for a standalone backend, false for a postmaster.
*/
void
-CreateSharedMemoryAndSemaphores(bool makePrivate,
- int maxBackends,
- int port)
+CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
{
PGShmemHeader *seghdr = NULL;
@@ -72,15 +71,16 @@ CreateSharedMemoryAndSemaphores(bool makePrivate,
*/
size = hash_estimate_size(SHMEM_INDEX_SIZE, sizeof(ShmemIndexEnt));
size += BufferShmemSize();
- size += LockShmemSize(maxBackends);
- size += ProcGlobalShmemSize(maxBackends);
+ size += LockShmemSize();
+ size += ProcGlobalShmemSize();
size += XLOGShmemSize();
size += CLOGShmemSize();
size += SUBTRANSShmemSize();
+ size += TwoPhaseShmemSize();
size += MultiXactShmemSize();
size += LWLockShmemSize();
- size += ProcArrayShmemSize(maxBackends);
- size += SInvalShmemSize(maxBackends);
+ size += ProcArrayShmemSize();
+ size += SInvalShmemSize(MaxBackends);
size += FreeSpaceShmemSize();
size += BgWriterShmemSize();
#ifdef EXEC_BACKEND
@@ -100,7 +100,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate,
/*
* Create semaphores
*/
- numSemas = ProcGlobalSemas(maxBackends);
+ numSemas = ProcGlobalSemas();
numSemas += SpinlockSemas();
PGReserveSemaphores(numSemas, port);
}
@@ -144,6 +144,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate,
XLOGShmemInit();
CLOGShmemInit();
SUBTRANSShmemInit();
+ TwoPhaseShmemInit();
MultiXactShmemInit();
InitBufferPool();
@@ -151,18 +152,18 @@ CreateSharedMemoryAndSemaphores(bool makePrivate,
* Set up lock manager
*/
InitLocks();
- InitLockTable(maxBackends);
+ InitLockTable();
/*
* Set up process table
*/
- InitProcGlobal(maxBackends);
- CreateSharedProcArray(maxBackends);
+ InitProcGlobal();
+ CreateSharedProcArray();
/*
* Set up shared-inval messaging
*/
- CreateSharedInvalidationState(maxBackends);
+ CreateSharedInvalidationState(MaxBackends);
/*
* Set up free-space map
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 23b198b1497..7c2766e6285 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -11,6 +11,11 @@
* Because of various subtle race conditions it is critical that a backend
* hold the correct locks while setting or clearing its MyProc->xid field.
* See notes in GetSnapshotData.
+ *
+ * The process array now also includes PGPROC structures representing
+ * prepared transactions. The xid and subxids fields of these are valid,
+ * as is the procLocks list. They can be distinguished from regular backend
+ * PGPROCs at need by checking for pid == 0.
*
*
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
@@ -18,13 +23,14 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.2 2005/05/19 23:57:11 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/ipc/procarray.c,v 1.3 2005/06/17 22:32:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/subtrans.h"
+#include "access/twophase.h"
#include "miscadmin.h"
#include "storage/proc.h"
#include "storage/procarray.h"
@@ -76,25 +82,23 @@ static void DisplayXidCache(void);
* Report shared-memory space needed by CreateSharedProcArray.
*/
int
-ProcArrayShmemSize(int maxBackends)
+ProcArrayShmemSize(void)
{
- /* sizeof(ProcArrayStruct) includes the first array element */
- return MAXALIGN(sizeof(ProcArrayStruct) +
- (maxBackends - 1) * sizeof(PGPROC *));
+ return MAXALIGN(offsetof(ProcArrayStruct, procs) +
+ (MaxBackends + max_prepared_xacts) * sizeof(PGPROC *));
}
/*
* Initialize the shared PGPROC array during postmaster startup.
*/
void
-CreateSharedProcArray(int maxBackends)
+CreateSharedProcArray(void)
{
bool found;
/* Create or attach to the ProcArray shared structure */
procArray = (ProcArrayStruct *)
- ShmemInitStruct("Proc Array", ProcArrayShmemSize(maxBackends),
- &found);
+ ShmemInitStruct("Proc Array", ProcArrayShmemSize(), &found);
if (!found)
{
@@ -102,18 +106,15 @@ CreateSharedProcArray(int maxBackends)
* We're the first - initialize.
*/
procArray->numProcs = 0;
- procArray->maxProcs = maxBackends;
+ procArray->maxProcs = MaxBackends + max_prepared_xacts;
}
}
/*
- * Add my own PGPROC (found in the global MyProc) to the shared array.
- *
- * This must be called during backend startup, after fully initializing
- * the contents of MyProc.
+ * Add the specified PGPROC to the shared array.
*/
void
-ProcArrayAddMyself(void)
+ProcArrayAdd(PGPROC *proc)
{
ProcArrayStruct *arrayP = procArray;
@@ -132,32 +133,32 @@ ProcArrayAddMyself(void)
errmsg("sorry, too many clients already")));
}
- arrayP->procs[arrayP->numProcs] = MyProc;
+ arrayP->procs[arrayP->numProcs] = proc;
arrayP->numProcs++;
LWLockRelease(ProcArrayLock);
}
/*
- * Remove my own PGPROC (found in the global MyProc) from the shared array.
- *
- * This must be called during backend shutdown.
+ * Remove the specified PGPROC from the shared array.
*/
void
-ProcArrayRemoveMyself(void)
+ProcArrayRemove(PGPROC *proc)
{
ProcArrayStruct *arrayP = procArray;
int index;
#ifdef XIDCACHE_DEBUG
- DisplayXidCache();
+ /* dump stats at backend shutdown, but not prepared-xact end */
+ if (proc->pid != 0)
+ DisplayXidCache();
#endif
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
for (index = 0; index < arrayP->numProcs; index++)
{
- if (arrayP->procs[index] == MyProc)
+ if (arrayP->procs[index] == proc)
{
arrayP->procs[index] = arrayP->procs[arrayP->numProcs - 1];
arrayP->numProcs--;
@@ -169,7 +170,7 @@ ProcArrayRemoveMyself(void)
/* Ooops */
LWLockRelease(ProcArrayLock);
- elog(LOG, "failed to find my own proc %p in ProcArray", MyProc);
+ elog(LOG, "failed to find proc %p in ProcArray", proc);
}
@@ -330,6 +331,55 @@ result_known:
}
/*
+ * TransactionIdIsActive -- is xid the top-level XID of an active backend?
+ *
+ * This differs from TransactionIdIsInProgress in that it ignores prepared
+ * transactions. Also, we ignore subtransactions since that's not needed
+ * for current uses.
+ */
+bool
+TransactionIdIsActive(TransactionId xid)
+{
+ bool result = false;
+ ProcArrayStruct *arrayP = procArray;
+ int i;
+
+ /*
+ * Don't bother checking a transaction older than RecentXmin; it
+ * could not possibly still be running.
+ */
+ if (TransactionIdPrecedes(xid, RecentXmin))
+ return false;
+
+ LWLockAcquire(ProcArrayLock, LW_SHARED);
+
+ for (i = 0; i < arrayP->numProcs; i++)
+ {
+ PGPROC *proc = arrayP->procs[i];
+
+ /* Fetch xid just once - see GetNewTransactionId */
+ TransactionId pxid = proc->xid;
+
+ if (!TransactionIdIsValid(pxid))
+ continue;
+
+ if (proc->pid == 0)
+ continue; /* ignore prepared transactions */
+
+ if (TransactionIdEquals(pxid, xid))
+ {
+ result = true;
+ break;
+ }
+ }
+
+ LWLockRelease(ProcArrayLock);
+
+ return result;
+}
+
+
+/*
* GetOldestXmin -- returns oldest transaction that was running
* when any current transaction was started.
*
@@ -441,12 +491,12 @@ GetSnapshotData(Snapshot snapshot, bool serializable)
TransactionIdIsValid(MyProc->xmin));
/*
- * Allocating space for MaxBackends xids is usually overkill;
+ * Allocating space for maxProcs xids is usually overkill;
* numProcs would be sufficient. But it seems better to do the
* malloc while not holding the lock, so we can't look at numProcs.
*
* This does open a possibility for avoiding repeated malloc/free: since
- * MaxBackends does not change at runtime, we can simply reuse the
+ * maxProcs does not change at runtime, we can simply reuse the
* previous xip array if any. (This relies on the fact that all
* callers pass static SnapshotData structs.)
*/
@@ -456,7 +506,7 @@ GetSnapshotData(Snapshot snapshot, bool serializable)
* First call for this snapshot
*/
snapshot->xip = (TransactionId *)
- malloc(MaxBackends * sizeof(TransactionId));
+ malloc(arrayP->maxProcs * sizeof(TransactionId));
if (snapshot->xip == NULL)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
@@ -602,14 +652,21 @@ DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself)
/*
* BackendPidGetProc -- get a backend's PGPROC given its PID
+ *
+ * Returns NULL if not found. Note that it is up to the caller to be
+ * sure that the question remains meaningful for long enough for the
+ * answer to be used ...
*/
-struct PGPROC *
+PGPROC *
BackendPidGetProc(int pid)
{
PGPROC *result = NULL;
ProcArrayStruct *arrayP = procArray;
int index;
+ if (pid == 0) /* never match dummy PGPROCs */
+ return NULL;
+
LWLockAcquire(ProcArrayLock, LW_SHARED);
for (index = 0; index < arrayP->numProcs; index++)
@@ -642,10 +699,8 @@ IsBackendPid(int pid)
* active transactions. This is used as a heuristic to decide if
* a pre-XLOG-flush delay is worthwhile during commit.
*
- * An active transaction is something that has written at least one XLOG
- * record; read-only transactions don't count. Also, do not count backends
- * that are blocked waiting for locks, since they are not going to get to
- * run until someone else commits.
+ * Do not count backends that are blocked waiting for locks, since they are
+ * not going to get to run until someone else commits.
*/
int
CountActiveBackends(void)
@@ -656,7 +711,7 @@ CountActiveBackends(void)
/*
* Note: for speed, we don't acquire ProcArrayLock. This is a little bit
- * bogus, but since we are only testing xrecoff for zero or nonzero,
+ * bogus, but since we are only testing fields for zero or nonzero,
* it should be OK. The result is only used for heuristic purposes
* anyway...
*/
@@ -666,7 +721,9 @@ CountActiveBackends(void)
if (proc == MyProc)
continue; /* do not count myself */
- if (proc->logRec.xrecoff == 0)
+ if (proc->pid == 0)
+ continue; /* do not count prepared xacts */
+ if (proc->xid == InvalidTransactionId)
continue; /* do not count if not in a transaction */
if (proc->waitLock != NULL)
continue; /* do not count if blocked on a lock */
@@ -676,25 +733,6 @@ CountActiveBackends(void)
return count;
}
-/*
- * CountEmptyBackendSlots - count empty slots in backend process table
- *
- * Acquiring the lock here is almost certainly overkill, but just in
- * case fetching an int is not atomic on your machine ...
- */
-int
-CountEmptyBackendSlots(void)
-{
- int count;
-
- LWLockAcquire(ProcArrayLock, LW_SHARED);
-
- count = procArray->maxProcs - procArray->numProcs;
-
- LWLockRelease(ProcArrayLock);
-
- return count;
-}
#define XidCacheRemove(i) \
do { \
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index 351ef27bee7..91afd4ed9a5 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.76 2005/06/14 22:15:32 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.77 2005/06/17 22:32:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -77,7 +77,7 @@ static LOCKMETHODID LockTableId = INVALID_LOCKMETHOD;
* Create the lock table described by LockConflicts
*/
void
-InitLockTable(int maxBackends)
+InitLockTable(void)
{
LOCKMETHODID LongTermTableId;
@@ -91,8 +91,7 @@ InitLockTable(int maxBackends)
/* number of lock modes is lengthof()-1 because of dummy zero */
LockTableId = LockMethodTableInit("LockTable",
LockConflicts,
- lengthof(LockConflicts) - 1,
- maxBackends);
+ lengthof(LockConflicts) - 1);
if (!LockMethodIsValid(LockTableId))
elog(ERROR, "could not initialize lock table");
Assert(LockTableId == DEFAULT_LOCKMETHOD);
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 08e579486e0..22d389488d8 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.155 2005/06/14 22:15:32 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/lmgr/lock.c,v 1.156 2005/06/17 22:32:45 tgl Exp $
*
* NOTES
* Outside modules can create a lock table and acquire/release
@@ -33,6 +33,8 @@
#include <signal.h>
#include <unistd.h>
+#include "access/twophase.h"
+#include "access/twophase_rmgr.h"
#include "access/xact.h"
#include "miscadmin.h"
#include "storage/proc.h"
@@ -44,7 +46,15 @@
/* This configuration variable is used to set the lock table size */
int max_locks_per_xact; /* set by guc.c */
-#define NLOCKENTS(maxBackends) (max_locks_per_xact * (maxBackends))
+#define NLOCKENTS() (max_locks_per_xact * (MaxBackends + max_prepared_xacts))
+
+
+/* Record that's written to 2PC state file when a lock is persisted */
+typedef struct TwoPhaseLockRecord
+{
+ LOCKTAG locktag;
+ LOCKMODE lockmode;
+} TwoPhaseLockRecord;
/*
@@ -168,8 +178,7 @@ static void CleanUpLock(LOCKMETHODID lockmethodid, LOCK *lock,
/*
- * InitLocks -- Init the lock module. Create a private data
- * structure for constructing conflict masks.
+ * InitLocks -- Init the lock module. Nothing to do here at present.
*/
void
InitLocks(void)
@@ -222,8 +231,7 @@ LockMethodInit(LockMethod lockMethodTable,
LOCKMETHODID
LockMethodTableInit(const char *tabName,
const LOCKMASK *conflictsP,
- int numModes,
- int maxBackends)
+ int numModes)
{
LockMethod newLockMethod;
LOCKMETHODID lockmethodid;
@@ -239,7 +247,7 @@ LockMethodTableInit(const char *tabName,
numModes, MAX_LOCKMODES - 1);
/* Compute init/max size to request for lock hashtables */
- max_table_size = NLOCKENTS(maxBackends);
+ max_table_size = NLOCKENTS();
init_table_size = max_table_size / 2;
/* Allocate a string for the shmem index table lookups. */
@@ -1418,10 +1426,10 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
while (proclock)
{
bool wakeupNeeded = false;
- PROCLOCK *nextHolder;
+ PROCLOCK *nextplock;
/* Get link first, since we may unlink/delete this proclock */
- nextHolder = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink,
+ nextplock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink,
offsetof(PROCLOCK, procLink));
Assert(proclock->tag.proc == MAKE_OFFSET(MyProc));
@@ -1474,7 +1482,7 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
CleanUpLock(lockmethodid, lock, proclock, wakeupNeeded);
next_item:
- proclock = nextHolder;
+ proclock = nextplock;
}
LWLockRelease(masterLock);
@@ -1606,13 +1614,261 @@ LockReassignCurrentOwner(void)
/*
+ * AtPrepare_Locks
+ * Do the preparatory work for a PREPARE: make 2PC state file records
+ * for all locks currently held.
+ *
+ * User locks are non-transactional and are therefore ignored.
+ *
+ * There are some special cases that we error out on: we can't be holding
+ * any session locks (should be OK since only VACUUM uses those) and we
+ * can't be holding any locks on temporary objects (since that would mess
+ * up the current backend if it tries to exit before the prepared xact is
+ * committed).
+ */
+void
+AtPrepare_Locks(void)
+{
+ LOCKMETHODID lockmethodid = DEFAULT_LOCKMETHOD;
+ HASH_SEQ_STATUS status;
+ LOCALLOCK *locallock;
+
+ /*
+ * We don't need to touch shared memory for this --- all the necessary
+ * state information is in the locallock table.
+ */
+ hash_seq_init(&status, LockMethodLocalHash[lockmethodid]);
+
+ while ((locallock = (LOCALLOCK *) hash_seq_search(&status)) != NULL)
+ {
+ TwoPhaseLockRecord record;
+ LOCALLOCKOWNER *lockOwners = locallock->lockOwners;
+ int i;
+
+ /* Ignore items that are not of the lockmethod to be processed */
+ if (LOCALLOCK_LOCKMETHOD(*locallock) != lockmethodid)
+ continue;
+
+ /* Ignore it if we don't actually hold the lock */
+ if (locallock->nLocks <= 0)
+ continue;
+
+ /* Scan to verify there are no session locks */
+ for (i = locallock->numLockOwners - 1; i >= 0; i--)
+ {
+ /* elog not ereport since this should not happen */
+ if (lockOwners[i].owner == NULL)
+ elog(ERROR, "cannot PREPARE when session locks exist");
+ }
+
+ /* Can't handle it if the lock is on a temporary object */
+ if (locallock->isTempObject)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot PREPARE a transaction that has operated on temporary tables")));
+
+ /*
+ * Create a 2PC record.
+ */
+ memcpy(&(record.locktag), &(locallock->tag.lock), sizeof(LOCKTAG));
+ record.lockmode = locallock->tag.mode;
+
+ RegisterTwoPhaseRecord(TWOPHASE_RM_LOCK_ID, 0,
+ &record, sizeof(TwoPhaseLockRecord));
+ }
+}
+
+/*
+ * PostPrepare_Locks
+ * Clean up after successful PREPARE
+ *
+ * Here, we want to transfer ownership of our locks to a dummy PGPROC
+ * that's now associated with the prepared transaction, and we want to
+ * clean out the corresponding entries in the LOCALLOCK table.
+ *
+ * Note: by removing the LOCALLOCK entries, we are leaving dangling
+ * pointers in the transaction's resource owner. This is OK at the
+ * moment since resowner.c doesn't try to free locks retail at a toplevel
+ * transaction commit or abort. We could alternatively zero out nLocks
+ * and leave the LOCALLOCK entries to be garbage-collected by LockReleaseAll,
+ * but that probably costs more cycles.
+ */
+void
+PostPrepare_Locks(TransactionId xid)
+{
+ PGPROC *newproc = TwoPhaseGetDummyProc(xid);
+ LOCKMETHODID lockmethodid = DEFAULT_LOCKMETHOD;
+ HASH_SEQ_STATUS status;
+ SHM_QUEUE *procLocks = &(MyProc->procLocks);
+ LWLockId masterLock;
+ LockMethod lockMethodTable;
+ int numLockModes;
+ LOCALLOCK *locallock;
+ PROCLOCK *proclock;
+ PROCLOCKTAG proclocktag;
+ bool found;
+ LOCK *lock;
+
+ /* This is a critical section: any error means big trouble */
+ START_CRIT_SECTION();
+
+ lockMethodTable = LockMethods[lockmethodid];
+ if (!lockMethodTable)
+ elog(ERROR, "unrecognized lock method: %d", lockmethodid);
+
+ numLockModes = lockMethodTable->numLockModes;
+ masterLock = lockMethodTable->masterLock;
+
+ /*
+ * First we run through the locallock table and get rid of unwanted
+ * entries, then we scan the process's proclocks and transfer them
+ * to the target proc.
+ *
+ * We do this separately because we may have multiple locallock
+ * entries pointing to the same proclock, and we daren't end up with
+ * any dangling pointers.
+ */
+ hash_seq_init(&status, LockMethodLocalHash[lockmethodid]);
+
+ while ((locallock = (LOCALLOCK *) hash_seq_search(&status)) != NULL)
+ {
+ if (locallock->proclock == NULL || locallock->lock == NULL)
+ {
+ /*
+ * We must've run out of shared memory while trying to set up
+ * this lock. Just forget the local entry.
+ */
+ Assert(locallock->nLocks == 0);
+ RemoveLocalLock(locallock);
+ continue;
+ }
+
+ /* Ignore items that are not of the lockmethod to be removed */
+ if (LOCALLOCK_LOCKMETHOD(*locallock) != lockmethodid)
+ continue;
+
+ /* We already checked there are no session locks */
+
+ /* Mark the proclock to show we need to release this lockmode */
+ if (locallock->nLocks > 0)
+ locallock->proclock->releaseMask |= LOCKBIT_ON(locallock->tag.mode);
+
+ /* And remove the locallock hashtable entry */
+ RemoveLocalLock(locallock);
+ }
+
+ LWLockAcquire(masterLock, LW_EXCLUSIVE);
+
+ proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks,
+ offsetof(PROCLOCK, procLink));
+
+ while (proclock)
+ {
+ PROCLOCK *nextplock;
+ LOCKMASK holdMask;
+ PROCLOCK *newproclock;
+
+ /* Get link first, since we may unlink/delete this proclock */
+ nextplock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->procLink,
+ offsetof(PROCLOCK, procLink));
+
+ Assert(proclock->tag.proc == MAKE_OFFSET(MyProc));
+
+ lock = (LOCK *) MAKE_PTR(proclock->tag.lock);
+
+ /* Ignore items that are not of the lockmethod to be removed */
+ if (LOCK_LOCKMETHOD(*lock) != lockmethodid)
+ goto next_item;
+
+ PROCLOCK_PRINT("PostPrepare_Locks", proclock);
+ LOCK_PRINT("PostPrepare_Locks", lock, 0);
+ Assert(lock->nRequested >= 0);
+ Assert(lock->nGranted >= 0);
+ Assert(lock->nGranted <= lock->nRequested);
+ Assert((proclock->holdMask & ~lock->grantMask) == 0);
+
+ /*
+ * Since there were no session locks, we should be releasing all locks
+ */
+ if (proclock->releaseMask != proclock->holdMask)
+ elog(PANIC, "we seem to have dropped a bit somewhere");
+
+ holdMask = proclock->holdMask;
+
+ /*
+ * We cannot simply modify proclock->tag.proc to reassign ownership
+ * of the lock, because that's part of the hash key and the proclock
+ * would then be in the wrong hash chain. So, unlink and delete the
+ * old proclock; create a new one with the right contents; and link
+ * it into place. We do it in this order to be certain we won't
+ * run out of shared memory (the way dynahash.c works, the deleted
+ * object is certain to be available for reallocation).
+ */
+ SHMQueueDelete(&proclock->lockLink);
+ SHMQueueDelete(&proclock->procLink);
+ if (!hash_search(LockMethodProcLockHash[lockmethodid],
+ (void *) &(proclock->tag),
+ HASH_REMOVE, NULL))
+ elog(PANIC, "proclock table corrupted");
+
+ /*
+ * Create the hash key for the new proclock table.
+ */
+ MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG));
+ proclocktag.lock = MAKE_OFFSET(lock);
+ proclocktag.proc = MAKE_OFFSET(newproc);
+
+ newproclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[lockmethodid],
+ (void *) &proclocktag,
+ HASH_ENTER_NULL, &found);
+ if (!newproclock)
+ ereport(PANIC, /* should not happen */
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of shared memory"),
+ errdetail("Not enough memory for reassigning the prepared transaction's locks.")));
+
+ /*
+ * If new, initialize the new entry
+ */
+ if (!found)
+ {
+ newproclock->holdMask = 0;
+ newproclock->releaseMask = 0;
+ /* Add new proclock to appropriate lists */
+ SHMQueueInsertBefore(&lock->procLocks, &newproclock->lockLink);
+ SHMQueueInsertBefore(&newproc->procLocks, &newproclock->procLink);
+ PROCLOCK_PRINT("PostPrepare_Locks: new", newproclock);
+ }
+ else
+ {
+ PROCLOCK_PRINT("PostPrepare_Locks: found", newproclock);
+ Assert((newproclock->holdMask & ~lock->grantMask) == 0);
+ }
+
+ /*
+ * Pass over the identified lock ownership.
+ */
+ Assert((newproclock->holdMask & holdMask) == 0);
+ newproclock->holdMask |= holdMask;
+
+next_item:
+ proclock = nextplock;
+ }
+
+ LWLockRelease(masterLock);
+
+ END_CRIT_SECTION();
+}
+
+
+/*
* Estimate shared-memory space used for lock tables
*/
int
-LockShmemSize(int maxBackends)
+LockShmemSize(void)
{
int size = 0;
- long max_table_size = NLOCKENTS(maxBackends);
+ long max_table_size = NLOCKENTS();
/* lock method headers */
size += MAX_LOCK_METHODS * MAXALIGN(sizeof(LockMethodData));
@@ -1704,21 +1960,19 @@ GetLockmodeName(LOCKMODE mode)
#ifdef LOCK_DEBUG
/*
- * Dump all locks in the MyProc->procLocks list.
+ * Dump all locks in the given proc's procLocks list.
*
* Must have already acquired the masterLock.
*/
void
-DumpLocks(void)
+DumpLocks(PGPROC *proc)
{
- PGPROC *proc;
SHM_QUEUE *procLocks;
PROCLOCK *proclock;
LOCK *lock;
int lockmethodid = DEFAULT_LOCKMETHOD;
LockMethod lockMethodTable;
- proc = MyProc;
if (proc == NULL)
return;
@@ -1793,3 +2047,254 @@ DumpAllLocks(void)
}
#endif /* LOCK_DEBUG */
+
+/*
+ * LOCK 2PC resource manager's routines
+ */
+
+/*
+ * Re-acquire a lock belonging to a transaction that was prepared.
+ *
+ * Because this function is run at db startup, re-acquiring the locks should
+ * never conflict with running transactions because there are none. We
+ * assume that the lock state represented by the stored 2PC files is legal.
+ */
+void
+lock_twophase_recover(TransactionId xid, uint16 info,
+ void *recdata, uint32 len)
+{
+ TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata;
+ PGPROC *proc = TwoPhaseGetDummyProc(xid);
+ LOCKTAG *locktag;
+ LOCKMODE lockmode;
+ LOCKMETHODID lockmethodid;
+ LOCK *lock;
+ PROCLOCK *proclock;
+ PROCLOCKTAG proclocktag;
+ bool found;
+ LWLockId masterLock;
+ LockMethod lockMethodTable;
+
+ Assert(len == sizeof(TwoPhaseLockRecord));
+ locktag = &rec->locktag;
+ lockmode = rec->lockmode;
+ lockmethodid = locktag->locktag_lockmethodid;
+
+ Assert(lockmethodid < NumLockMethods);
+ lockMethodTable = LockMethods[lockmethodid];
+ if (!lockMethodTable)
+ elog(ERROR, "unrecognized lock method: %d", lockmethodid);
+
+ masterLock = lockMethodTable->masterLock;
+
+ LWLockAcquire(masterLock, LW_EXCLUSIVE);
+
+ /*
+ * Find or create a lock with this tag.
+ */
+ lock = (LOCK *) hash_search(LockMethodLockHash[lockmethodid],
+ (void *) locktag,
+ HASH_ENTER_NULL, &found);
+ if (!lock)
+ {
+ LWLockRelease(masterLock);
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of shared memory"),
+ errhint("You may need to increase max_locks_per_transaction.")));
+ }
+
+ /*
+ * if it's a new lock object, initialize it
+ */
+ if (!found)
+ {
+ lock->grantMask = 0;
+ lock->waitMask = 0;
+ SHMQueueInit(&(lock->procLocks));
+ ProcQueueInit(&(lock->waitProcs));
+ lock->nRequested = 0;
+ lock->nGranted = 0;
+ MemSet(lock->requested, 0, sizeof(int) * MAX_LOCKMODES);
+ MemSet(lock->granted, 0, sizeof(int) * MAX_LOCKMODES);
+ LOCK_PRINT("lock_twophase_recover: new", lock, lockmode);
+ }
+ else
+ {
+ LOCK_PRINT("lock_twophase_recover: found", lock, lockmode);
+ Assert((lock->nRequested >= 0) && (lock->requested[lockmode] >= 0));
+ Assert((lock->nGranted >= 0) && (lock->granted[lockmode] >= 0));
+ Assert(lock->nGranted <= lock->nRequested);
+ }
+
+ /*
+ * Create the hash key for the proclock table.
+ */
+ MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG)); /* must clear padding */
+ proclocktag.lock = MAKE_OFFSET(lock);
+ proclocktag.proc = MAKE_OFFSET(proc);
+
+ /*
+ * Find or create a proclock entry with this tag
+ */
+ proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[lockmethodid],
+ (void *) &proclocktag,
+ HASH_ENTER_NULL, &found);
+ if (!proclock)
+ {
+ /* Ooops, not enough shmem for the proclock */
+ if (lock->nRequested == 0)
+ {
+ /*
+ * There are no other requestors of this lock, so garbage-collect
+ * the lock object. We *must* do this to avoid a permanent leak
+ * of shared memory, because there won't be anything to cause
+ * anyone to release the lock object later.
+ */
+ Assert(SHMQueueEmpty(&(lock->procLocks)));
+ if (!hash_search(LockMethodLockHash[lockmethodid],
+ (void *) &(lock->tag),
+ HASH_REMOVE, NULL))
+ elog(PANIC, "lock table corrupted");
+ }
+ LWLockRelease(masterLock);
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of shared memory"),
+ errhint("You may need to increase max_locks_per_transaction.")));
+ }
+
+ /*
+ * If new, initialize the new entry
+ */
+ if (!found)
+ {
+ proclock->holdMask = 0;
+ proclock->releaseMask = 0;
+ /* Add proclock to appropriate lists */
+ SHMQueueInsertBefore(&lock->procLocks, &proclock->lockLink);
+ SHMQueueInsertBefore(&proc->procLocks, &proclock->procLink);
+ PROCLOCK_PRINT("lock_twophase_recover: new", proclock);
+ }
+ else
+ {
+ PROCLOCK_PRINT("lock_twophase_recover: found", proclock);
+ Assert((proclock->holdMask & ~lock->grantMask) == 0);
+ }
+
+ /*
+ * lock->nRequested and lock->requested[] count the total number of
+ * requests, whether granted or waiting, so increment those
+ * immediately.
+ */
+ lock->nRequested++;
+ lock->requested[lockmode]++;
+ Assert((lock->nRequested > 0) && (lock->requested[lockmode] > 0));
+
+ /*
+ * We shouldn't already hold the desired lock.
+ */
+ if (proclock->holdMask & LOCKBIT_ON(lockmode))
+ elog(ERROR, "lock %s on object %u/%u/%u is already held",
+ lock_mode_names[lockmode],
+ lock->tag.locktag_field1, lock->tag.locktag_field2,
+ lock->tag.locktag_field3);
+
+ /*
+ * We ignore any possible conflicts and just grant ourselves the lock.
+ */
+ GrantLock(lock, proclock, lockmode);
+
+ LWLockRelease(masterLock);
+}
+
+/*
+ * 2PC processing routine for COMMIT PREPARED case.
+ *
+ * Find and release the lock indicated by the 2PC record.
+ */
+void
+lock_twophase_postcommit(TransactionId xid, uint16 info,
+ void *recdata, uint32 len)
+{
+ TwoPhaseLockRecord *rec = (TwoPhaseLockRecord *) recdata;
+ PGPROC *proc = TwoPhaseGetDummyProc(xid);
+ LOCKTAG *locktag;
+ LOCKMODE lockmode;
+ LOCKMETHODID lockmethodid;
+ PROCLOCKTAG proclocktag;
+ LOCK *lock;
+ PROCLOCK *proclock;
+ LWLockId masterLock;
+ LockMethod lockMethodTable;
+ bool wakeupNeeded;
+
+ Assert(len == sizeof(TwoPhaseLockRecord));
+ locktag = &rec->locktag;
+ lockmode = rec->lockmode;
+ lockmethodid = locktag->locktag_lockmethodid;
+
+ Assert(lockmethodid < NumLockMethods);
+ lockMethodTable = LockMethods[lockmethodid];
+ if (!lockMethodTable)
+ elog(ERROR, "unrecognized lock method: %d", lockmethodid);
+
+ masterLock = lockMethodTable->masterLock;
+
+ LWLockAcquire(masterLock, LW_EXCLUSIVE);
+
+ /*
+ * Re-find the lock object (it had better be there).
+ */
+ lock = (LOCK *) hash_search(LockMethodLockHash[lockmethodid],
+ (void *) locktag,
+ HASH_FIND, NULL);
+ if (!lock)
+ elog(PANIC, "failed to re-find shared lock object");
+
+ /*
+ * Re-find the proclock object (ditto).
+ */
+ MemSet(&proclocktag, 0, sizeof(PROCLOCKTAG)); /* must clear padding */
+ proclocktag.lock = MAKE_OFFSET(lock);
+ proclocktag.proc = MAKE_OFFSET(proc);
+ proclock = (PROCLOCK *) hash_search(LockMethodProcLockHash[lockmethodid],
+ (void *) &proclocktag,
+ HASH_FIND, NULL);
+ if (!proclock)
+ elog(PANIC, "failed to re-find shared proclock object");
+
+ /*
+ * Double-check that we are actually holding a lock of the type we
+ * want to release.
+ */
+ if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
+ {
+ PROCLOCK_PRINT("lock_twophase_postcommit: WRONGTYPE", proclock);
+ LWLockRelease(masterLock);
+ elog(WARNING, "you don't own a lock of type %s",
+ lock_mode_names[lockmode]);
+ return;
+ }
+
+ /*
+ * Do the releasing. CleanUpLock will waken any now-wakable waiters.
+ */
+ wakeupNeeded = UnGrantLock(lock, lockmode, proclock, lockMethodTable);
+
+ CleanUpLock(lockmethodid, lock, proclock, wakeupNeeded);
+
+ LWLockRelease(masterLock);
+}
+
+/*
+ * 2PC processing routine for ROLLBACK PREPARED case.
+ *
+ * This is actually just the same as the COMMIT case.
+ */
+void
+lock_twophase_postabort(TransactionId xid, uint16 info,
+ void *recdata, uint32 len)
+{
+ lock_twophase_postcommit(xid, info, recdata, len);
+}
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index b62283cffb4..227c3694788 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.159 2005/06/14 22:15:32 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/lmgr/proc.c,v 1.160 2005/06/17 22:32:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -92,13 +92,13 @@ static bool CheckStatementTimeout(void);
* Report shared-memory space needed by InitProcGlobal.
*/
int
-ProcGlobalShmemSize(int maxBackends)
+ProcGlobalShmemSize(void)
{
int size = 0;
size += MAXALIGN(sizeof(PROC_HDR)); /* ProcGlobal */
size += MAXALIGN(NUM_DUMMY_PROCS * sizeof(PGPROC)); /* DummyProcs */
- size += MAXALIGN(maxBackends * sizeof(PGPROC)); /* MyProcs */
+ size += MAXALIGN(MaxBackends * sizeof(PGPROC)); /* MyProcs */
size += MAXALIGN(sizeof(slock_t)); /* ProcStructLock */
return size;
@@ -108,10 +108,10 @@ ProcGlobalShmemSize(int maxBackends)
* Report number of semaphores needed by InitProcGlobal.
*/
int
-ProcGlobalSemas(int maxBackends)
+ProcGlobalSemas(void)
{
/* We need a sema per backend, plus one for each dummy process. */
- return maxBackends + NUM_DUMMY_PROCS;
+ return MaxBackends + NUM_DUMMY_PROCS;
}
/*
@@ -134,7 +134,7 @@ ProcGlobalSemas(int maxBackends)
* postmaster, not in backends.
*/
void
-InitProcGlobal(int maxBackends)
+InitProcGlobal(void)
{
bool foundProcGlobal,
foundDummy;
@@ -170,13 +170,13 @@ InitProcGlobal(int maxBackends)
* Pre-create the PGPROC structures and create a semaphore for
* each.
*/
- procs = (PGPROC *) ShmemAlloc(maxBackends * sizeof(PGPROC));
+ procs = (PGPROC *) ShmemAlloc(MaxBackends * sizeof(PGPROC));
if (!procs)
ereport(FATAL,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of shared memory")));
- MemSet(procs, 0, maxBackends * sizeof(PGPROC));
- for (i = 0; i < maxBackends; i++)
+ MemSet(procs, 0, MaxBackends * sizeof(PGPROC));
+ for (i = 0; i < MaxBackends; i++)
{
PGSemaphoreCreate(&(procs[i].sem));
procs[i].links.next = ProcGlobal->freeProcs;
@@ -254,7 +254,6 @@ InitProcess(void)
MyProc->xmin = InvalidTransactionId;
MyProc->pid = MyProcPid;
MyProc->databaseId = MyDatabaseId;
- MyProc->logRec.xrecoff = 0;
MyProc->lwWaiting = false;
MyProc->lwExclusive = false;
MyProc->lwWaitLink = NULL;
@@ -265,7 +264,7 @@ InitProcess(void)
/*
* Add our PGPROC to the PGPROC array in shared memory.
*/
- ProcArrayAddMyself();
+ ProcArrayAdd(MyProc);
/*
* Arrange to clean up at backend exit.
@@ -332,7 +331,6 @@ InitDummyProcess(int proctype)
MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId;
MyProc->databaseId = MyDatabaseId;
- MyProc->logRec.xrecoff = 0;
MyProc->lwWaiting = false;
MyProc->lwExclusive = false;
MyProc->lwWaitLink = NULL;
@@ -353,6 +351,35 @@ InitDummyProcess(int proctype)
}
/*
+ * Check whether there are at least N free PGPROC objects.
+ *
+ * Note: this is designed on the assumption that N will generally be small.
+ */
+bool
+HaveNFreeProcs(int n)
+{
+ SHMEM_OFFSET offset;
+ PGPROC *proc;
+ /* use volatile pointer to prevent code rearrangement */
+ volatile PROC_HDR *procglobal = ProcGlobal;
+
+ SpinLockAcquire(ProcStructLock);
+
+ offset = procglobal->freeProcs;
+
+ while (n > 0 && offset != INVALID_OFFSET)
+ {
+ proc = (PGPROC *) MAKE_PTR(offset);
+ offset = proc->links.next;
+ n--;
+ }
+
+ SpinLockRelease(ProcStructLock);
+
+ return (n <= 0);
+}
+
+/*
* Cancel any pending wait for lock, when aborting a transaction.
*
* Returns true if we had been waiting for a lock, else false.
@@ -478,7 +505,7 @@ ProcKill(int code, Datum arg)
#endif
/* Remove our PGPROC from the PGPROC array in shared memory */
- ProcArrayRemoveMyself();
+ ProcArrayRemove(MyProc);
SpinLockAcquire(ProcStructLock);
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 13ad72a3755..2c8cf07eec8 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.89 2005/06/06 20:22:58 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.90 2005/06/17 22:32:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -434,7 +434,7 @@ smgrscheduleunlink(SMgrRelation reln, bool isTemp)
* during transactional operations, since it can't be undone.
*
* If isRedo is true, it is okay for the underlying file to be gone
- * already. (In practice isRedo will always be true.)
+ * already.
*
* This also implies smgrclose() on the SMgrRelation object.
*/
@@ -677,6 +677,30 @@ smgrimmedsync(SMgrRelation reln)
reln->smgr_rnode.relNode)));
}
+
+/*
+ * PostPrepare_smgr -- Clean up after a successful PREPARE
+ *
+ * What we have to do here is throw away the in-memory state about pending
+ * relation deletes. It's all been recorded in the 2PC state file and
+ * it's no longer smgr's job to worry about it.
+ */
+void
+PostPrepare_smgr(void)
+{
+ PendingRelDelete *pending;
+ PendingRelDelete *next;
+
+ for (pending = pendingDeletes; pending != NULL; pending = next)
+ {
+ next = pending->next;
+ pendingDeletes = next;
+ /* must explicitly free the list entry */
+ pfree(pending);
+ }
+}
+
+
/*
* smgrDoPendingDeletes() -- Take care of relation deletes at end of xact.
*
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index f1c65bacc43..454bc2577e1 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.448 2005/06/14 21:04:40 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/tcop/postgres.c,v 1.449 2005/06/17 22:32:46 tgl Exp $
*
* NOTES
* this is the "main" module of the postgres backend and
@@ -930,6 +930,7 @@ exec_simple_query(const char *query_string)
TransactionStmt *stmt = (TransactionStmt *) parsetree;
if (stmt->kind == TRANS_STMT_COMMIT ||
+ stmt->kind == TRANS_STMT_PREPARE ||
stmt->kind == TRANS_STMT_ROLLBACK ||
stmt->kind == TRANS_STMT_ROLLBACK_TO)
allowit = true;
@@ -1261,6 +1262,7 @@ exec_parse_message(const char *query_string, /* string to execute */
TransactionStmt *stmt = (TransactionStmt *) parsetree;
if (stmt->kind == TRANS_STMT_COMMIT ||
+ stmt->kind == TRANS_STMT_PREPARE ||
stmt->kind == TRANS_STMT_ROLLBACK ||
stmt->kind == TRANS_STMT_ROLLBACK_TO)
allowit = true;
@@ -1751,6 +1753,7 @@ exec_execute_message(const char *portal_name, long max_rows)
is_trans_stmt = true;
if (stmt->kind == TRANS_STMT_COMMIT ||
+ stmt->kind == TRANS_STMT_PREPARE ||
stmt->kind == TRANS_STMT_ROLLBACK ||
stmt->kind == TRANS_STMT_ROLLBACK_TO)
is_trans_exit = true;
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 73be822a83e..f948b0f854e 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -10,13 +10,14 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.236 2005/04/28 21:47:15 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.237 2005/06/17 22:32:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/heapam.h"
+#include "access/twophase.h"
#include "catalog/catalog.h"
#include "catalog/namespace.h"
#include "catalog/pg_shadow.h"
@@ -383,11 +384,11 @@ ProcessUtility(Node *parsetree,
if (strcmp(item->defname, "transaction_isolation") == 0)
SetPGVariable("transaction_isolation",
list_make1(item->arg),
- false);
+ true);
else if (strcmp(item->defname, "transaction_read_only") == 0)
SetPGVariable("transaction_read_only",
list_make1(item->arg),
- false);
+ true);
}
}
break;
@@ -401,6 +402,25 @@ ProcessUtility(Node *parsetree,
}
break;
+ case TRANS_STMT_PREPARE:
+ if (!PrepareTransactionBlock(stmt->gid))
+ {
+ /* report unsuccessful commit in completionTag */
+ if (completionTag)
+ strcpy(completionTag, "ROLLBACK");
+ }
+ break;
+
+ case TRANS_STMT_COMMIT_PREPARED:
+ PreventTransactionChain(stmt, "COMMIT PREPARED");
+ FinishPreparedTransaction(stmt->gid, true);
+ break;
+
+ case TRANS_STMT_ROLLBACK_PREPARED:
+ PreventTransactionChain(stmt, "ROLLBACK PREPARED");
+ FinishPreparedTransaction(stmt->gid, false);
+ break;
+
case TRANS_STMT_ROLLBACK:
UserAbortTransactionBlock();
break;
@@ -1215,6 +1235,18 @@ CreateCommandTag(Node *parsetree)
tag = "RELEASE";
break;
+ case TRANS_STMT_PREPARE:
+ tag = "PREPARE TRANSACTION";
+ break;
+
+ case TRANS_STMT_COMMIT_PREPARED:
+ tag = "COMMIT PREPARED";
+ break;
+
+ case TRANS_STMT_ROLLBACK_PREPARED:
+ tag = "ROLLBACK PREPARED";
+ break;
+
default:
tag = "???";
break;
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c
index 61b7522f8c9..da0ffad16b2 100644
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -80,12 +80,13 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.71 2005/04/14 01:38:19 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.72 2005/06/17 22:32:46 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
+#include "access/twophase_rmgr.h"
#include "access/xact.h"
#include "catalog/catalog.h"
#include "miscadmin.h"
@@ -171,6 +172,13 @@ static struct CACHECALLBACK
static int cache_callback_count = 0;
+/* info values for 2PC callback */
+#define TWOPHASE_INFO_MSG 0 /* SharedInvalidationMessage */
+#define TWOPHASE_INFO_FILE_BEFORE 1 /* relcache file inval */
+#define TWOPHASE_INFO_FILE_AFTER 2 /* relcache file inval */
+
+static void PersistInvalidationMessage(SharedInvalidationMessage *msg);
+
/* ----------------------------------------------------------------
* Invalidation list support functions
@@ -637,6 +645,56 @@ AtStart_Inval(void)
}
/*
+ * AtPrepare_Inval
+ * Save the inval lists state at 2PC transaction prepare.
+ *
+ * In this phase we just generate 2PC records for all the pending invalidation
+ * work.
+ */
+void
+AtPrepare_Inval(void)
+{
+ /* Must be at top of stack */
+ Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL);
+
+ /*
+ * Relcache init file invalidation requires processing both before
+ * and after we send the SI messages.
+ */
+ if (transInvalInfo->RelcacheInitFileInval)
+ RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_FILE_BEFORE,
+ NULL, 0);
+
+ AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
+ &transInvalInfo->CurrentCmdInvalidMsgs);
+
+ ProcessInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
+ PersistInvalidationMessage);
+
+ if (transInvalInfo->RelcacheInitFileInval)
+ RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_FILE_AFTER,
+ NULL, 0);
+}
+
+/*
+ * PostPrepare_Inval
+ * Clean up after successful PREPARE.
+ *
+ * Here, we want to act as though the transaction aborted, so that we will
+ * undo any syscache changes it made, thereby bringing us into sync with the
+ * outside world, which doesn't believe the transaction committed yet.
+ *
+ * If the prepared transaction is later aborted, there is nothing more to
+ * do; if it commits, we will receive the consequent inval messages just
+ * like everyone else.
+ */
+void
+PostPrepare_Inval(void)
+{
+ AtEOXact_Inval(false);
+}
+
+/*
* AtSubStart_Inval
* Initialize inval lists at start of a subtransaction.
*/
@@ -655,6 +713,47 @@ AtSubStart_Inval(void)
}
/*
+ * PersistInvalidationMessage
+ * Write an invalidation message to the 2PC state file.
+ */
+static void
+PersistInvalidationMessage(SharedInvalidationMessage *msg)
+{
+ RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_MSG,
+ msg, sizeof(SharedInvalidationMessage));
+}
+
+/*
+ * inval_twophase_postcommit
+ * Process an invalidation message from the 2PC state file.
+ */
+void
+inval_twophase_postcommit(TransactionId xid, uint16 info,
+ void *recdata, uint32 len)
+{
+ SharedInvalidationMessage *msg;
+
+ switch (info)
+ {
+ case TWOPHASE_INFO_MSG:
+ msg = (SharedInvalidationMessage *) recdata;
+ Assert(len == sizeof(SharedInvalidationMessage));
+ SendSharedInvalidMessage(msg);
+ break;
+ case TWOPHASE_INFO_FILE_BEFORE:
+ RelationCacheInitFileInvalidate(true);
+ break;
+ case TWOPHASE_INFO_FILE_AFTER:
+ RelationCacheInitFileInvalidate(false);
+ break;
+ default:
+ Assert(false);
+ break;
+ }
+}
+
+
+/*
* AtEOXact_Inval
* Process queued-up invalidation messages at end of main transaction.
*
diff --git a/src/backend/utils/init/flatfiles.c b/src/backend/utils/init/flatfiles.c
index 49495abfdd6..fcbc99189ff 100644
--- a/src/backend/utils/init/flatfiles.c
+++ b/src/backend/utils/init/flatfiles.c
@@ -22,7 +22,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/utils/init/flatfiles.c,v 1.7 2005/06/06 17:01:24 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/init/flatfiles.c,v 1.8 2005/06/17 22:32:47 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -32,6 +32,7 @@
#include <unistd.h>
#include "access/heapam.h"
+#include "access/twophase_rmgr.h"
#include "catalog/pg_database.h"
#include "catalog/pg_group.h"
#include "catalog/pg_namespace.h"
@@ -48,10 +49,16 @@
#include "utils/syscache.h"
+/* Actual names of the flat files (within $PGDATA/global/) */
#define DATABASE_FLAT_FILE "pg_database"
#define GROUP_FLAT_FILE "pg_group"
#define USER_FLAT_FILE "pg_pwd"
+/* Info bits in a flatfiles 2PC record */
+#define FF_BIT_DATABASE 1
+#define FF_BIT_GROUP 2
+#define FF_BIT_USER 4
+
/*
* The need-to-update-files flags are SubTransactionIds that show
@@ -757,6 +764,43 @@ AtEOXact_UpdateFlatFiles(bool isCommit)
SendPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE);
}
+
+/*
+ * This routine is called during transaction prepare.
+ *
+ * Record which files need to be refreshed if this transaction later
+ * commits.
+ *
+ * Note: it's OK to clear the flags immediately, since if the PREPARE fails
+ * further on, we'd only reset the flags anyway. So there's no need for a
+ * separate PostPrepare call.
+ */
+void
+AtPrepare_UpdateFlatFiles(void)
+{
+ uint16 info = 0;
+
+ if (database_file_update_subid != InvalidSubTransactionId)
+ {
+ database_file_update_subid = InvalidSubTransactionId;
+ info |= FF_BIT_DATABASE;
+ }
+ if (group_file_update_subid != InvalidSubTransactionId)
+ {
+ group_file_update_subid = InvalidSubTransactionId;
+ info |= FF_BIT_GROUP;
+ }
+ if (user_file_update_subid != InvalidSubTransactionId)
+ {
+ user_file_update_subid = InvalidSubTransactionId;
+ info |= FF_BIT_USER;
+ }
+ if (info != 0)
+ RegisterTwoPhaseRecord(TWOPHASE_RM_FLATFILES_ID, info,
+ NULL, 0);
+}
+
+
/*
* AtEOSubXact_UpdateFlatFiles
*
@@ -831,3 +875,28 @@ flatfile_update_trigger(PG_FUNCTION_ARGS)
return PointerGetDatum(NULL);
}
+
+
+/*
+ * 2PC processing routine for COMMIT PREPARED case.
+ *
+ * (We don't have to do anything for ROLLBACK PREPARED.)
+ */
+void
+flatfile_twophase_postcommit(TransactionId xid, uint16 info,
+ void *recdata, uint32 len)
+{
+ /*
+ * Set flags to do the needed file updates at the end of my own
+ * current transaction. (XXX this has some issues if my own
+ * transaction later rolls back, or if there is any significant
+ * delay before I commit. OK for now because we disallow
+ * COMMIT PREPARED inside a transaction block.)
+ */
+ if (info & FF_BIT_DATABASE)
+ database_file_update_needed();
+ if (info & FF_BIT_GROUP)
+ group_file_update_needed();
+ if (info & FF_BIT_USER)
+ user_file_update_needed();
+}
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index 7130365d4fd..72fba39df46 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/init/postinit.c,v 1.147 2005/05/19 21:35:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/init/postinit.c,v 1.148 2005/06/17 22:32:47 tgl Exp $
*
*
*-------------------------------------------------------------------------
@@ -232,7 +232,7 @@ InitCommunication(void)
* We're running a postgres bootstrap process or a standalone
* backend. Create private "shmem" and semaphores.
*/
- CreateSharedMemoryAndSemaphores(true, MaxBackends, 0);
+ CreateSharedMemoryAndSemaphores(true, 0);
}
}
@@ -456,7 +456,7 @@ InitPostgres(const char *dbname, const char *username)
*/
if (!am_superuser &&
ReservedBackends > 0 &&
- CountEmptyBackendSlots() < ReservedBackends)
+ !HaveNFreeProcs(ReservedBackends))
ereport(FATAL,
(errcode(ERRCODE_TOO_MANY_CONNECTIONS),
errmsg("connection limit exceeded for non-superusers")));
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 8ee03e4d5e3..05ce93cf58f 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -10,7 +10,7 @@
* Written by Peter Eisentraut <peter_e@gmx.net>.
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.267 2005/06/16 20:47:20 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.268 2005/06/17 22:32:47 tgl Exp $
*
*--------------------------------------------------------------------
*/
@@ -25,6 +25,7 @@
#include "utils/guc.h"
#include "utils/guc_tables.h"
+#include "access/twophase.h"
#include "catalog/namespace.h"
#include "catalog/pg_type.h"
#include "commands/async.h"
@@ -1102,6 +1103,15 @@ static struct config_int ConfigureNamesInt[] =
1000, 25, INT_MAX, NULL, NULL
},
+ {
+ {"max_prepared_transactions", PGC_POSTMASTER, RESOURCES,
+ gettext_noop("Sets the maximum number of simultaneously prepared transactions."),
+ NULL
+ },
+ &max_prepared_xacts,
+ 50, 0, 10000, NULL, NULL
+ },
+
#ifdef LOCK_DEBUG
{
{"trace_lock_oidmin", PGC_SUSET, DEVELOPER_OPTIONS,
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 5d957ee247d..4c5a1ed59e3 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -79,6 +79,7 @@
#shared_buffers = 1000 # min 16, at least max_connections*2, 8KB each
#temp_buffers = 1000 # min 100, 8KB each
+#max_prepared_transactions = 50 # 0-10000
#work_mem = 1024 # min 64, size in KB
#maintenance_work_mem = 16384 # min 1024, size in KB
#max_stack_depth = 2048 # min 100, size in KB
diff --git a/src/backend/utils/mmgr/portalmem.c b/src/backend/utils/mmgr/portalmem.c
index 26c5dd02a31..b55a3430256 100644
--- a/src/backend/utils/mmgr/portalmem.c
+++ b/src/backend/utils/mmgr/portalmem.c
@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/mmgr/portalmem.c,v 1.80 2005/05/29 04:23:06 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mmgr/portalmem.c,v 1.81 2005/06/17 22:32:47 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -467,6 +467,48 @@ CommitHoldablePortals(void)
}
/*
+ * Pre-prepare processing for portals.
+ *
+ * Currently we refuse PREPARE if the transaction created any holdable
+ * cursors, since it's quite unclear what to do with one. However, this
+ * has the same API as CommitHoldablePortals and is invoked in the same
+ * way by xact.c, so that we can easily do something reasonable if anyone
+ * comes up with something reasonable to do.
+ *
+ * Returns TRUE if any holdable cursors were processed, FALSE if not.
+ */
+bool
+PrepareHoldablePortals(void)
+{
+ bool result = false;
+ HASH_SEQ_STATUS status;
+ PortalHashEnt *hentry;
+
+ hash_seq_init(&status, PortalHashTable);
+
+ while ((hentry = (PortalHashEnt *) hash_seq_search(&status)) != NULL)
+ {
+ Portal portal = hentry->portal;
+
+ /* Is it a holdable portal created in the current xact? */
+ if ((portal->cursorOptions & CURSOR_OPT_HOLD) &&
+ portal->createSubid != InvalidSubTransactionId &&
+ portal->status == PORTAL_READY)
+ {
+ /*
+ * We are exiting the transaction that created a holdable
+ * cursor. Can't do PREPARE.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot PREPARE a transaction that has created a cursor WITH HOLD")));
+ }
+ }
+
+ return result;
+}
+
+/*
* Pre-commit processing for portals.
*
* Remove all non-holdable portals created in this transaction.
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index f4951c3f1bb..9e42c902f1c 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -39,7 +39,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
* Portions taken from FreeBSD.
*
- * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.83 2005/04/30 08:08:51 neilc Exp $
+ * $PostgreSQL: pgsql/src/bin/initdb/initdb.c,v 1.84 2005/06/17 22:32:47 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -2124,6 +2124,7 @@ main(int argc, char *argv[])
"pg_xlog/archive_status",
"pg_clog",
"pg_subtrans",
+ "pg_twophase",
"pg_multixact/members",
"pg_multixact/offsets",
"base",
diff --git a/src/bin/psql/common.c b/src/bin/psql/common.c
index df17d0404bc..fed2275f469 100644
--- a/src/bin/psql/common.c
+++ b/src/bin/psql/common.c
@@ -3,7 +3,7 @@
*
* Copyright (c) 2000-2005, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/bin/psql/common.c,v 1.102 2005/06/14 02:57:41 momjian Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/common.c,v 1.103 2005/06/17 22:32:47 tgl Exp $
*/
#include "postgres_fe.h"
#include "common.h"
@@ -1216,6 +1216,21 @@ command_no_begin(const char *query)
return true;
if (wordlen == 8 && pg_strncasecmp(query, "rollback", 8) == 0)
return true;
+ if (wordlen == 7 && pg_strncasecmp(query, "prepare", 7) == 0)
+ {
+ /* PREPARE TRANSACTION is a TC command, PREPARE foo is not */
+ query += wordlen;
+
+ query = skip_white_space(query);
+
+ wordlen = 0;
+ while (isalpha((unsigned char) query[wordlen]))
+ wordlen += PQmblen(&query[wordlen], pset.encoding);
+
+ if (wordlen == 11 && pg_strncasecmp(query, "transaction", 11) == 0)
+ return true;
+ return false;
+ }
/*
* Commands not allowed within transactions. The statements checked
diff --git a/src/include/access/subtrans.h b/src/include/access/subtrans.h
index ac884775a09..2810e438d54 100644
--- a/src/include/access/subtrans.h
+++ b/src/include/access/subtrans.h
@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/subtrans.h,v 1.5 2004/12/31 22:03:21 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/access/subtrans.h,v 1.6 2005/06/17 22:32:48 tgl Exp $
*/
#ifndef SUBTRANS_H
#define SUBTRANS_H
@@ -18,7 +18,7 @@ extern TransactionId SubTransGetTopmostTransaction(TransactionId xid);
extern int SUBTRANSShmemSize(void);
extern void SUBTRANSShmemInit(void);
extern void BootStrapSUBTRANS(void);
-extern void StartupSUBTRANS(void);
+extern void StartupSUBTRANS(TransactionId oldestActiveXID);
extern void ShutdownSUBTRANS(void);
extern void CheckPointSUBTRANS(void);
extern void ExtendSUBTRANS(TransactionId newestXact);
diff --git a/src/include/access/twophase.h b/src/include/access/twophase.h
new file mode 100644
index 00000000000..ac2e05f33ce
--- /dev/null
+++ b/src/include/access/twophase.h
@@ -0,0 +1,49 @@
+/*-------------------------------------------------------------------------
+ *
+ * twophase.h
+ * Two-phase-commit related declarations.
+ *
+ *
+ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $PostgreSQL: pgsql/src/include/access/twophase.h,v 1.1 2005/06/17 22:32:48 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TWOPHASE_H
+#define TWOPHASE_H
+
+#include "storage/lock.h"
+
+
+/*
+ * GlobalTransactionData is defined in twophase.c; other places have no
+ * business knowing the internal definition.
+ */
+typedef struct GlobalTransactionData *GlobalTransaction;
+
+/* GUC variable */
+extern int max_prepared_xacts;
+
+extern int TwoPhaseShmemSize(void);
+extern void TwoPhaseShmemInit(void);
+
+extern PGPROC *TwoPhaseGetDummyProc(TransactionId xid);
+
+extern GlobalTransaction MarkAsPreparing(TransactionId xid, Oid databaseid,
+ char *gid, AclId owner);
+extern void MarkAsPrepared(GlobalTransaction gxact);
+
+extern void StartPrepare(GlobalTransaction gxact);
+extern void EndPrepare(GlobalTransaction gxact);
+
+extern TransactionId PrescanPreparedTransactions(void);
+extern void RecoverPreparedTransactions(void);
+
+extern void RecreateTwoPhaseFile(TransactionId xid, void *content, int len);
+extern void RemoveTwoPhaseFile(TransactionId xid, bool giveWarning);
+
+extern void FinishPreparedTransaction(char *gid, bool isCommit);
+
+#endif /* TWOPHASE_H */
diff --git a/src/include/access/twophase_rmgr.h b/src/include/access/twophase_rmgr.h
new file mode 100644
index 00000000000..f15233ba2f6
--- /dev/null
+++ b/src/include/access/twophase_rmgr.h
@@ -0,0 +1,39 @@
+/*-------------------------------------------------------------------------
+ *
+ * twophase_rmgr.h
+ * Two-phase-commit resource managers definition
+ *
+ *
+ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * $PostgreSQL: pgsql/src/include/access/twophase_rmgr.h,v 1.1 2005/06/17 22:32:48 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef TWOPHASE_RMGR_H
+#define TWOPHASE_RMGR_H
+
+typedef void (*TwoPhaseCallback) (TransactionId xid, uint16 info,
+ void *recdata, uint32 len);
+typedef uint8 TwoPhaseRmgrId;
+
+/*
+ * Built-in resource managers
+ */
+#define TWOPHASE_RM_END_ID 0
+#define TWOPHASE_RM_LOCK_ID 1
+#define TWOPHASE_RM_INVAL_ID 2
+#define TWOPHASE_RM_FLATFILES_ID 3
+#define TWOPHASE_RM_NOTIFY_ID 4
+#define TWOPHASE_RM_MAX_ID TWOPHASE_RM_NOTIFY_ID
+
+extern const TwoPhaseCallback twophase_recover_callbacks[];
+extern const TwoPhaseCallback twophase_postcommit_callbacks[];
+extern const TwoPhaseCallback twophase_postabort_callbacks[];
+
+
+extern void RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info,
+ const void *data, uint32 len);
+
+#endif /* TWOPHASE_RMGR_H */
diff --git a/src/include/access/xact.h b/src/include/access/xact.h
index 7c949899ac4..0c6b5580688 100644
--- a/src/include/access/xact.h
+++ b/src/include/access/xact.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.76 2005/06/06 17:01:24 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.77 2005/06/17 22:32:48 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -47,7 +47,8 @@ extern bool XactReadOnly;
typedef enum
{
XACT_EVENT_COMMIT,
- XACT_EVENT_ABORT
+ XACT_EVENT_ABORT,
+ XACT_EVENT_PREPARE
} XactEvent;
typedef void (*XactCallback) (XactEvent event, void *arg);
@@ -72,8 +73,11 @@ typedef void (*SubXactCallback) (SubXactEvent event, SubTransactionId mySubid,
* XLOG allows to store some information in high 4 bits of log
* record xl_info field
*/
-#define XLOG_XACT_COMMIT 0x00
-#define XLOG_XACT_ABORT 0x20
+#define XLOG_XACT_COMMIT 0x00
+#define XLOG_XACT_PREPARE 0x10
+#define XLOG_XACT_ABORT 0x20
+#define XLOG_XACT_COMMIT_PREPARED 0x30
+#define XLOG_XACT_ABORT_PREPARED 0x40
typedef struct xl_xact_commit
{
@@ -99,6 +103,31 @@ typedef struct xl_xact_abort
#define MinSizeOfXactAbort offsetof(xl_xact_abort, xnodes)
+/*
+ * COMMIT_PREPARED and ABORT_PREPARED are identical to COMMIT/ABORT records
+ * except that we have to store the XID of the prepared transaction explicitly
+ * --- the XID in the record header will be for the transaction doing the
+ * COMMIT PREPARED or ABORT PREPARED command.
+ */
+
+typedef struct xl_xact_commit_prepared
+{
+ TransactionId xid; /* XID of prepared xact */
+ xl_xact_commit crec; /* COMMIT record */
+ /* MORE DATA FOLLOWS AT END OF STRUCT */
+} xl_xact_commit_prepared;
+
+#define MinSizeOfXactCommitPrepared offsetof(xl_xact_commit_prepared, crec.xnodes)
+
+typedef struct xl_xact_abort_prepared
+{
+ TransactionId xid; /* XID of prepared xact */
+ xl_xact_abort arec; /* ABORT record */
+ /* MORE DATA FOLLOWS AT END OF STRUCT */
+} xl_xact_abort_prepared;
+
+#define MinSizeOfXactAbortPrepared offsetof(xl_xact_abort_prepared, arec.xnodes)
+
/* ----------------
* extern definitions
@@ -121,6 +150,7 @@ extern void CommitTransactionCommand(void);
extern void AbortCurrentTransaction(void);
extern void BeginTransactionBlock(void);
extern bool EndTransactionBlock(void);
+extern bool PrepareTransactionBlock(char *gid);
extern void UserAbortTransactionBlock(void);
extern void ReleaseSavepoint(List *options);
extern void DefineSavepoint(char *name);
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 535e5bb01ba..de422a41a9f 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.276 2005/06/15 12:56:35 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.277 2005/06/17 22:32:48 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 200506151
+#define CATALOG_VERSION_NO 200506171
#endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index c06c4a7d41f..a05a4f3a62c 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.367 2005/06/14 21:04:41 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.368 2005/06/17 22:32:48 tgl Exp $
*
* NOTES
* The script catalog/genbki.sh reads this file and generates .bki
@@ -3005,6 +3005,8 @@ DATA(insert OID = 2084 ( pg_show_all_settings PGNSP PGUID 12 f f t t s 0 2249 "
DESCR("SHOW ALL as a function");
DATA(insert OID = 1371 ( pg_lock_status PGNSP PGUID 12 f f t t v 0 2249 "" _null_ _null_ _null_ pg_lock_status - _null_ ));
DESCR("view system lock information");
+DATA(insert OID = 1065 ( pg_prepared_xact PGNSP PGUID 12 f f t t v 0 2249 "" _null_ _null_ _null_ pg_prepared_xact - _null_ ));
+DESCR("view two-phase transactions");
DATA(insert OID = 2079 ( pg_table_is_visible PGNSP PGUID 12 f f t f s 1 16 "26" _null_ _null_ _null_ pg_table_is_visible - _null_ ));
DESCR("is table visible in search path?");
diff --git a/src/include/commands/async.h b/src/include/commands/async.h
index 08855ea5403..b893771b0f7 100644
--- a/src/include/commands/async.h
+++ b/src/include/commands/async.h
@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/commands/async.h,v 1.27 2004/12/31 22:03:28 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/commands/async.h,v 1.28 2005/06/17 22:32:49 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -26,6 +26,7 @@ extern void AtAbort_Notify(void);
extern void AtSubStart_Notify(void);
extern void AtSubCommit_Notify(void);
extern void AtSubAbort_Notify(void);
+extern void AtPrepare_Notify(void);
/* signal handler for inbound notifies (SIGUSR2) */
extern void NotifyInterruptHandler(SIGNAL_ARGS);
@@ -38,4 +39,7 @@ extern void NotifyInterruptHandler(SIGNAL_ARGS);
extern void EnableNotifyInterrupt(void);
extern bool DisableNotifyInterrupt(void);
+extern void notify_twophase_postcommit(TransactionId xid, uint16 info,
+ void *recdata, uint32 len);
+
#endif /* ASYNC_H */
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 04b32082ebf..993a240faa1 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.281 2005/06/05 22:32:57 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.282 2005/06/17 22:32:49 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1556,7 +1556,10 @@ typedef enum TransactionStmtKind
TRANS_STMT_ROLLBACK,
TRANS_STMT_SAVEPOINT,
TRANS_STMT_RELEASE,
- TRANS_STMT_ROLLBACK_TO
+ TRANS_STMT_ROLLBACK_TO,
+ TRANS_STMT_PREPARE,
+ TRANS_STMT_COMMIT_PREPARED,
+ TRANS_STMT_ROLLBACK_PREPARED
} TransactionStmtKind;
typedef struct TransactionStmt
@@ -1564,6 +1567,7 @@ typedef struct TransactionStmt
NodeTag type;
TransactionStmtKind kind; /* see above */
List *options; /* for BEGIN/START and savepoint commands */
+ char *gid; /* for two-phase-commit related commands */
} TransactionStmt;
/* ----------------------
diff --git a/src/include/storage/ipc.h b/src/include/storage/ipc.h
index d08d0f10be0..3d325b10af6 100644
--- a/src/include/storage/ipc.h
+++ b/src/include/storage/ipc.h
@@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/ipc.h,v 1.70 2004/12/31 22:03:42 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/storage/ipc.h,v 1.71 2005/06/17 22:32:49 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -29,8 +29,6 @@ extern void on_shmem_exit(void (*function) (int code, Datum arg), Datum arg);
extern void on_exit_reset(void);
/* ipci.c */
-extern void CreateSharedMemoryAndSemaphores(bool makePrivate,
- int maxBackends,
- int port);
+extern void CreateSharedMemoryAndSemaphores(bool makePrivate, int port);
#endif /* IPC_H */
diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h
index cdbf8ad406c..3a83c26c7df 100644
--- a/src/include/storage/lmgr.h
+++ b/src/include/storage/lmgr.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/lmgr.h,v 1.49 2005/06/14 22:15:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lmgr.h,v 1.50 2005/06/17 22:32:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -41,7 +41,7 @@
* so increase that if you want to add more modes.
*/
-extern void InitLockTable(int maxBackends);
+extern void InitLockTable(void);
extern void RelationInitLockInfo(Relation relation);
/* Lock a relation */
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index a471ff32a33..e434683d0e5 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.88 2005/06/14 22:15:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lock.h,v 1.89 2005/06/17 22:32:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -370,7 +370,7 @@ extern void InitLocks(void);
extern LockMethod GetLocksMethodTable(LOCK *lock);
extern LOCKMETHODID LockMethodTableInit(const char *tabName,
const LOCKMASK *conflictsP,
- int numModes, int maxBackends);
+ int numModes);
extern LOCKMETHODID LockMethodTableRename(LOCKMETHODID lockmethodid);
extern LockAcquireResult LockAcquire(LOCKMETHODID lockmethodid,
LOCKTAG *locktag,
@@ -383,13 +383,15 @@ extern bool LockRelease(LOCKMETHODID lockmethodid, LOCKTAG *locktag,
extern void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks);
extern void LockReleaseCurrentOwner(void);
extern void LockReassignCurrentOwner(void);
+extern void AtPrepare_Locks(void);
+extern void PostPrepare_Locks(TransactionId xid);
extern int LockCheckConflicts(LockMethod lockMethodTable,
LOCKMODE lockmode,
LOCK *lock, PROCLOCK *proclock, PGPROC *proc);
extern void GrantLock(LOCK *lock, PROCLOCK *proclock, LOCKMODE lockmode);
extern void GrantAwaitedLock(void);
extern void RemoveFromWaitQueue(PGPROC *proc);
-extern int LockShmemSize(int maxBackends);
+extern int LockShmemSize(void);
extern bool DeadLockCheck(PGPROC *proc);
extern void DeadLockReport(void);
extern void RememberSimpleDeadLock(PGPROC *proc1,
@@ -400,8 +402,15 @@ extern void InitDeadLockChecking(void);
extern LockData *GetLockStatusData(void);
extern const char *GetLockmodeName(LOCKMODE mode);
+extern void lock_twophase_recover(TransactionId xid, uint16 info,
+ void *recdata, uint32 len);
+extern void lock_twophase_postcommit(TransactionId xid, uint16 info,
+ void *recdata, uint32 len);
+extern void lock_twophase_postabort(TransactionId xid, uint16 info,
+ void *recdata, uint32 len);
+
#ifdef LOCK_DEBUG
-extern void DumpLocks(void);
+extern void DumpLocks(PGPROC *proc);
extern void DumpAllLocks(void);
#endif
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index eb45f3e39ae..b4ebdb85d59 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.19 2005/05/19 21:35:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/lwlock.h,v 1.20 2005/06/17 22:32:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -46,6 +46,7 @@ typedef enum LWLockId
MultiXactMemberControlLock,
RelCacheInitLock,
BgWriterCommLock,
+ TwoPhaseStateLock,
NumFixedLWLocks, /* must be last except for
* MaxDynamicLWLock */
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index f771d71933c..ece321028fc 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.78 2005/05/19 21:35:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/proc.h,v 1.79 2005/06/17 22:32:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -46,6 +46,13 @@ struct XidCache
* links: list link for any list the PGPROC is in. When waiting for a lock,
* the PGPROC is linked into that lock's waitProcs queue. A recycled PGPROC
* is linked into ProcGlobal's freeProcs list.
+ *
+ * Note: twophase.c also sets up a dummy PGPROC struct for each currently
+ * prepared transaction. These PGPROCs appear in the ProcArray data structure
+ * so that the prepared transactions appear to be still running and are
+ * correctly shown as holding locks. A prepared transaction PGPROC can be
+ * distinguished from a real one at need by the fact that it has pid == 0.
+ * The semaphore and lock-related fields in a prepared-xact PGPROC are unused.
*/
struct PGPROC
{
@@ -62,16 +69,9 @@ struct PGPROC
* were starting our xact: vacuum must not
* remove tuples deleted by xid >= xmin ! */
- int pid; /* This backend's process id */
+ int pid; /* This backend's process id, or 0 */
Oid databaseId; /* OID of database this backend is using */
- /*
- * XLOG location of first XLOG record written by this backend's
- * current transaction. If backend is not in a transaction or hasn't
- * yet modified anything, logRec.xrecoff is zero.
- */
- XLogRecPtr logRec;
-
/* Info about LWLock the process is currently waiting for, if any. */
bool lwWaiting; /* true if waiting for an LW lock */
bool lwExclusive; /* true if waiting for exclusive access */
@@ -120,11 +120,12 @@ extern int StatementTimeout;
/*
* Function Prototypes
*/
-extern int ProcGlobalSemas(int maxBackends);
-extern int ProcGlobalShmemSize(int maxBackends);
-extern void InitProcGlobal(int maxBackends);
+extern int ProcGlobalSemas(void);
+extern int ProcGlobalShmemSize(void);
+extern void InitProcGlobal(void);
extern void InitProcess(void);
extern void InitDummyProcess(int proctype);
+extern bool HaveNFreeProcs(int n);
extern void ProcReleaseLocks(bool isCommit);
extern void ProcQueueInit(PROC_QUEUE *queue);
diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h
index 437ac306e05..d1780bcca18 100644
--- a/src/include/storage/procarray.h
+++ b/src/include/storage/procarray.h
@@ -7,28 +7,30 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/procarray.h,v 1.1 2005/05/19 21:35:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/procarray.h,v 1.2 2005/06/17 22:32:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef PROCARRAY_H
#define PROCARRAY_H
-extern int ProcArrayShmemSize(int maxBackends);
-extern void CreateSharedProcArray(int maxBackends);
-extern void ProcArrayAddMyself(void);
-extern void ProcArrayRemoveMyself(void);
+#include "storage/lock.h"
+
+
+extern int ProcArrayShmemSize(void);
+extern void CreateSharedProcArray(void);
+extern void ProcArrayAdd(PGPROC *proc);
+extern void ProcArrayRemove(PGPROC *proc);
extern bool TransactionIdIsInProgress(TransactionId xid);
+extern bool TransactionIdIsActive(TransactionId xid);
extern TransactionId GetOldestXmin(bool allDbs);
-/* Use "struct PGPROC", not PGPROC, to avoid including proc.h here */
-extern struct PGPROC *BackendPidGetProc(int pid);
+extern PGPROC *BackendPidGetProc(int pid);
extern bool IsBackendPid(int pid);
extern bool DatabaseHasActiveBackends(Oid databaseId, bool ignoreMyself);
extern int CountActiveBackends(void);
-extern int CountEmptyBackendSlots(void);
extern void XidCacheRemoveRunningXids(TransactionId xid,
int nxids, TransactionId *xids);
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index f00a8aeb833..ab3c39fd1c6 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.51 2005/06/06 17:01:25 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.52 2005/06/17 22:32:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -79,6 +79,7 @@ extern void smgrDoPendingDeletes(bool isCommit);
extern int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr);
extern void AtSubCommit_smgr(void);
extern void AtSubAbort_smgr(void);
+extern void PostPrepare_smgr(void);
extern void smgrcommit(void);
extern void smgrabort(void);
extern void smgrsync(void);
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index b781a270c60..37423dc73c2 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.257 2005/05/27 00:57:49 neilc Exp $
+ * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.258 2005/06/17 22:32:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -825,6 +825,9 @@ extern Datum show_all_settings(PG_FUNCTION_ARGS);
/* lockfuncs.c */
extern Datum pg_lock_status(PG_FUNCTION_ARGS);
+/* access/transam/twophase.c */
+extern Datum pg_prepared_xact(PG_FUNCTION_ARGS);
+
/* catalog/pg_conversion.c */
extern Datum pg_convert_using(PG_FUNCTION_ARGS);
diff --git a/src/include/utils/flatfiles.h b/src/include/utils/flatfiles.h
index 04a901ab008..939239aa1b9 100644
--- a/src/include/utils/flatfiles.h
+++ b/src/include/utils/flatfiles.h
@@ -4,7 +4,7 @@
* Routines for maintaining "flat file" images of the shared catalogs.
*
*
- * $PostgreSQL: pgsql/src/include/utils/flatfiles.h,v 1.3 2005/05/10 22:27:30 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/flatfiles.h,v 1.4 2005/06/17 22:32:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -23,6 +23,7 @@ extern char *user_getflatfilename(void);
extern void BuildFlatFiles(bool database_only);
+extern void AtPrepare_UpdateFlatFiles(void);
extern void AtEOXact_UpdateFlatFiles(bool isCommit);
extern void AtEOSubXact_UpdateFlatFiles(bool isCommit,
SubTransactionId mySubid,
@@ -30,4 +31,7 @@ extern void AtEOSubXact_UpdateFlatFiles(bool isCommit,
extern Datum flatfile_update_trigger(PG_FUNCTION_ARGS);
+extern void flatfile_twophase_postcommit(TransactionId xid, uint16 info,
+ void *recdata, uint32 len);
+
#endif /* FLATFILES_H */
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h
index ac486e53ea0..372d34a22c3 100644
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.35 2004/12/31 22:03:46 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.36 2005/06/17 22:32:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -30,6 +30,10 @@ extern void AtEOXact_Inval(bool isCommit);
extern void AtEOSubXact_Inval(bool isCommit);
+extern void AtPrepare_Inval(void);
+
+extern void PostPrepare_Inval(void);
+
extern void CommandEndInvalidationMessages(void);
extern void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple);
@@ -47,4 +51,7 @@ extern void CacheRegisterSyscacheCallback(int cacheid,
extern void CacheRegisterRelcacheCallback(CacheCallbackFunction func,
Datum arg);
+extern void inval_twophase_postcommit(TransactionId xid, uint16 info,
+ void *recdata, uint32 len);
+
#endif /* INVAL_H */
diff --git a/src/include/utils/portal.h b/src/include/utils/portal.h
index b8bcc33f583..33de53eee86 100644
--- a/src/include/utils/portal.h
+++ b/src/include/utils/portal.h
@@ -39,7 +39,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/utils/portal.h,v 1.55 2005/04/11 19:51:16 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/portal.h,v 1.56 2005/06/17 22:32:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -183,6 +183,7 @@ typedef struct PortalData
/* Prototypes for functions in utils/mmgr/portalmem.c */
extern void EnablePortalManager(void);
extern bool CommitHoldablePortals(void);
+extern bool PrepareHoldablePortals(void);
extern void AtCommit_Portals(void);
extern void AtAbort_Portals(void);
extern void AtCleanup_Portals(void);
diff --git a/src/test/regress/expected/prepared_xacts.out b/src/test/regress/expected/prepared_xacts.out
new file mode 100644
index 00000000000..d6a165e94e4
--- /dev/null
+++ b/src/test/regress/expected/prepared_xacts.out
@@ -0,0 +1,213 @@
+--
+-- PREPARED TRANSACTIONS (two-phase commit)
+--
+-- We can't readily test persistence of prepared xacts within the
+-- regression script framework, unfortunately. Note that a crash
+-- isn't really needed ... stopping and starting the postmaster would
+-- be enough, but we can't even do that here.
+-- create a simple table that we'll use in the tests
+CREATE TABLE pxtest1 (foobar VARCHAR(10));
+INSERT INTO pxtest1 VALUES ('aaa');
+-- Test PREPARE TRANSACTION
+BEGIN;
+UPDATE pxtest1 SET foobar = 'bbb' WHERE foobar = 'aaa';
+SELECT * FROM pxtest1;
+ foobar
+--------
+ bbb
+(1 row)
+
+PREPARE TRANSACTION 'foo1';
+SELECT * FROM pxtest1;
+ foobar
+--------
+ aaa
+(1 row)
+
+-- Test pg_prepared_xacts system view
+SELECT gid FROM pg_prepared_xacts;
+ gid
+------
+ foo1
+(1 row)
+
+-- Test ROLLBACK PREPARED
+ROLLBACK PREPARED 'foo1';
+SELECT * FROM pxtest1;
+ foobar
+--------
+ aaa
+(1 row)
+
+SELECT gid FROM pg_prepared_xacts;
+ gid
+-----
+(0 rows)
+
+-- Test COMMIT PREPARED
+BEGIN;
+INSERT INTO pxtest1 VALUES ('ddd');
+SELECT * FROM pxtest1;
+ foobar
+--------
+ aaa
+ ddd
+(2 rows)
+
+PREPARE TRANSACTION 'foo2';
+SELECT * FROM pxtest1;
+ foobar
+--------
+ aaa
+(1 row)
+
+COMMIT PREPARED 'foo2';
+SELECT * FROM pxtest1;
+ foobar
+--------
+ aaa
+ ddd
+(2 rows)
+
+-- Test duplicate gids
+BEGIN;
+UPDATE pxtest1 SET foobar = 'eee' WHERE foobar = 'ddd';
+SELECT * FROM pxtest1;
+ foobar
+--------
+ aaa
+ eee
+(2 rows)
+
+PREPARE TRANSACTION 'foo3';
+SELECT gid FROM pg_prepared_xacts;
+ gid
+------
+ foo3
+(1 row)
+
+BEGIN;
+INSERT INTO pxtest1 VALUES ('fff');
+SELECT * FROM pxtest1;
+ foobar
+--------
+ aaa
+ ddd
+ fff
+(3 rows)
+
+-- This should fail, because the gid foo3 is already in use
+PREPARE TRANSACTION 'foo3';
+ERROR: global transaction identifier "foo3" is already in use
+SELECT * FROM pxtest1;
+ foobar
+--------
+ aaa
+ ddd
+(2 rows)
+
+ROLLBACK PREPARED 'foo3';
+SELECT * FROM pxtest1;
+ foobar
+--------
+ aaa
+ ddd
+(2 rows)
+
+-- Clean up
+DROP TABLE pxtest1;
+-- Test subtransactions
+BEGIN;
+ CREATE TABLE pxtest2 (a int);
+ INSERT INTO pxtest2 VALUES (1);
+ SAVEPOINT a;
+ INSERT INTO pxtest2 VALUES (2);
+ ROLLBACK TO a;
+ SAVEPOINT b;
+ INSERT INTO pxtest2 VALUES (3);
+PREPARE TRANSACTION 'regress-one';
+CREATE TABLE pxtest3(fff int);
+-- Test shared invalidation
+BEGIN;
+ DROP TABLE pxtest3;
+ CREATE TABLE pxtest4 (a int);
+ INSERT INTO pxtest4 VALUES (1);
+ INSERT INTO pxtest4 VALUES (2);
+ DECLARE foo CURSOR FOR SELECT * FROM pxtest4;
+ -- Fetch 1 tuple, keeping the cursor open
+ FETCH 1 FROM foo;
+ a
+---
+ 1
+(1 row)
+
+PREPARE TRANSACTION 'regress-two';
+-- No such cursor
+FETCH 1 FROM foo;
+ERROR: cursor "foo" does not exist
+-- Table doesn't exist, the creation hasn't been committed yet
+SELECT * FROM pxtest2;
+ERROR: relation "pxtest2" does not exist
+-- There should be two prepared transactions
+SELECT gid FROM pg_prepared_xacts;
+ gid
+-------------
+ regress-one
+ regress-two
+(2 rows)
+
+-- pxtest3 should be locked because of the pending DROP
+set statement_timeout to 1000;
+SELECT * FROM pxtest3;
+ERROR: canceling query due to user request
+reset statement_timeout;
+-- Disconnect, we will continue testing in a different backend
+\c -
+-- There should still be two prepared transactions
+SELECT gid FROM pg_prepared_xacts;
+ gid
+-------------
+ regress-one
+ regress-two
+(2 rows)
+
+-- pxtest3 should still be locked because of the pending DROP
+set statement_timeout to 1000;
+SELECT * FROM pxtest3;
+ERROR: canceling query due to user request
+reset statement_timeout;
+-- Commit table creation
+COMMIT PREPARED 'regress-one';
+\d pxtest2
+ Table "public.pxtest2"
+ Column | Type | Modifiers
+--------+---------+-----------
+ a | integer |
+
+SELECT * FROM pxtest2;
+ a
+---
+ 1
+ 3
+(2 rows)
+
+-- There should be one prepared transaction
+SELECT gid FROM pg_prepared_xacts;
+ gid
+-------------
+ regress-two
+(1 row)
+
+-- Commit table drop
+COMMIT PREPARED 'regress-two';
+SELECT * FROM pxtest3;
+ERROR: relation "pxtest3" does not exist
+-- There should be no prepared transactions
+SELECT gid FROM pg_prepared_xacts;
+ gid
+-----
+(0 rows)
+
+-- Clean up
+DROP TABLE pxtest2;
+DROP TABLE pxtest4;
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 5eccf2d44a0..acd2d25b35d 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1279,6 +1279,7 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
iexit | SELECT ih.name, ih.thepath, interpt_pp(ih.thepath, r.thepath) AS exit FROM ihighway ih, ramp r WHERE (ih.thepath ## r.thepath);
pg_indexes | SELECT n.nspname AS schemaname, c.relname AS tablename, i.relname AS indexname, t.spcname AS "tablespace", pg_get_indexdef(i.oid) AS indexdef FROM ((((pg_index x JOIN pg_class c ON ((c.oid = x.indrelid))) JOIN pg_class i ON ((i.oid = x.indexrelid))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) LEFT JOIN pg_tablespace t ON ((t.oid = i.reltablespace))) WHERE ((c.relkind = 'r'::"char") AND (i.relkind = 'i'::"char"));
pg_locks | SELECT l.locktype, l."database", l.relation, l.page, l.tuple, l."transaction", l.classid, l.objid, l.objsubid, l.pid, l."mode", l.granted FROM pg_lock_status() l(locktype text, "database" oid, relation oid, page integer, tuple smallint, "transaction" xid, classid oid, objid oid, objsubid smallint, pid integer, "mode" text, granted boolean);
+ pg_prepared_xacts | SELECT p."transaction", p.gid, u.usename AS "owner", d.datname AS "database" FROM ((pg_prepared_xact() p("transaction" xid, gid text, ownerid integer, dbid oid) LEFT JOIN pg_database d ON ((p.dbid = d.oid))) LEFT JOIN pg_shadow u ON ((p.ownerid = u.usesysid)));
pg_rules | SELECT n.nspname AS schemaname, c.relname AS tablename, r.rulename, pg_get_ruledef(r.oid) AS definition FROM ((pg_rewrite r JOIN pg_class c ON ((c.oid = r.ev_class))) LEFT JOIN pg_namespace n ON ((n.oid = c.relnamespace))) WHERE (r.rulename <> '_RETURN'::name);
pg_settings | SELECT a.name, a.setting, a.category, a.short_desc, a.extra_desc, a.context, a.vartype, a.source, a.min_val, a.max_val FROM pg_show_all_settings() a(name text, setting text, category text, short_desc text, extra_desc text, context text, vartype text, source text, min_val text, max_val text);
pg_stat_activity | SELECT d.oid AS datid, d.datname, pg_stat_get_backend_pid(s.backendid) AS procpid, pg_stat_get_backend_userid(s.backendid) AS usesysid, u.usename, pg_stat_get_backend_activity(s.backendid) AS current_query, pg_stat_get_backend_activity_start(s.backendid) AS query_start, pg_stat_get_backend_start(s.backendid) AS backend_start, pg_stat_get_backend_client_addr(s.backendid) AS client_addr, pg_stat_get_backend_client_port(s.backendid) AS client_port FROM pg_database d, (SELECT pg_stat_get_backend_idset() AS backendid) s, pg_shadow u WHERE ((pg_stat_get_backend_dbid(s.backendid) = d.oid) AND (pg_stat_get_backend_userid(s.backendid) = u.usesysid));
@@ -1316,7 +1317,7 @@ SELECT viewname, definition FROM pg_views WHERE schemaname <> 'information_schem
shoelace_obsolete | SELECT shoelace.sl_name, shoelace.sl_avail, shoelace.sl_color, shoelace.sl_len, shoelace.sl_unit, shoelace.sl_len_cm FROM shoelace WHERE (NOT (EXISTS (SELECT shoe.shoename FROM shoe WHERE (shoe.slcolor = shoelace.sl_color))));
street | SELECT r.name, r.thepath, c.cname FROM ONLY road r, real_city c WHERE (c.outline ## r.thepath);
toyemp | SELECT emp.name, emp.age, emp."location", (12 * emp.salary) AS annualsal FROM emp;
-(40 rows)
+(41 rows)
SELECT tablename, rulename, definition FROM pg_rules
ORDER BY tablename, rulename;
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index b3a4de3b56f..9a3f7927328 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -60,7 +60,7 @@ ignore: random
# ----------
# The fourth group of parallel test
# ----------
-test: select_into select_distinct select_distinct_on select_implicit select_having subselect union case join aggregates transactions random portals arrays btree_index hash_index update namespace
+test: select_into select_distinct select_distinct_on select_implicit select_having subselect union case join aggregates transactions random portals arrays btree_index hash_index update namespace prepared_xacts
test: privileges
test: misc
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index f8621b404ac..bb60dc0a105 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -1,4 +1,4 @@
-# $PostgreSQL: pgsql/src/test/regress/serial_schedule,v 1.26 2004/06/18 06:14:25 tgl Exp $
+# $PostgreSQL: pgsql/src/test/regress/serial_schedule,v 1.27 2005/06/17 22:32:50 tgl Exp $
# This should probably be in an order similar to parallel_schedule.
test: boolean
test: char
@@ -74,6 +74,7 @@ test: btree_index
test: hash_index
test: update
test: namespace
+test: prepared_xacts
test: privileges
test: misc
test: select_views
diff --git a/src/test/regress/sql/prepared_xacts.sql b/src/test/regress/sql/prepared_xacts.sql
new file mode 100644
index 00000000000..39de88faab6
--- /dev/null
+++ b/src/test/regress/sql/prepared_xacts.sql
@@ -0,0 +1,137 @@
+--
+-- PREPARED TRANSACTIONS (two-phase commit)
+--
+-- We can't readily test persistence of prepared xacts within the
+-- regression script framework, unfortunately. Note that a crash
+-- isn't really needed ... stopping and starting the postmaster would
+-- be enough, but we can't even do that here.
+
+
+-- create a simple table that we'll use in the tests
+CREATE TABLE pxtest1 (foobar VARCHAR(10));
+
+INSERT INTO pxtest1 VALUES ('aaa');
+
+
+-- Test PREPARE TRANSACTION
+BEGIN;
+UPDATE pxtest1 SET foobar = 'bbb' WHERE foobar = 'aaa';
+SELECT * FROM pxtest1;
+PREPARE TRANSACTION 'foo1';
+
+SELECT * FROM pxtest1;
+
+-- Test pg_prepared_xacts system view
+SELECT gid FROM pg_prepared_xacts;
+
+-- Test ROLLBACK PREPARED
+ROLLBACK PREPARED 'foo1';
+
+SELECT * FROM pxtest1;
+
+SELECT gid FROM pg_prepared_xacts;
+
+
+-- Test COMMIT PREPARED
+BEGIN;
+INSERT INTO pxtest1 VALUES ('ddd');
+SELECT * FROM pxtest1;
+PREPARE TRANSACTION 'foo2';
+
+SELECT * FROM pxtest1;
+
+COMMIT PREPARED 'foo2';
+
+SELECT * FROM pxtest1;
+
+-- Test duplicate gids
+BEGIN;
+UPDATE pxtest1 SET foobar = 'eee' WHERE foobar = 'ddd';
+SELECT * FROM pxtest1;
+PREPARE TRANSACTION 'foo3';
+
+SELECT gid FROM pg_prepared_xacts;
+
+BEGIN;
+INSERT INTO pxtest1 VALUES ('fff');
+SELECT * FROM pxtest1;
+
+-- This should fail, because the gid foo3 is already in use
+PREPARE TRANSACTION 'foo3';
+
+SELECT * FROM pxtest1;
+
+ROLLBACK PREPARED 'foo3';
+
+SELECT * FROM pxtest1;
+
+-- Clean up
+DROP TABLE pxtest1;
+
+-- Test subtransactions
+BEGIN;
+ CREATE TABLE pxtest2 (a int);
+ INSERT INTO pxtest2 VALUES (1);
+ SAVEPOINT a;
+ INSERT INTO pxtest2 VALUES (2);
+ ROLLBACK TO a;
+ SAVEPOINT b;
+ INSERT INTO pxtest2 VALUES (3);
+PREPARE TRANSACTION 'regress-one';
+
+CREATE TABLE pxtest3(fff int);
+
+-- Test shared invalidation
+BEGIN;
+ DROP TABLE pxtest3;
+ CREATE TABLE pxtest4 (a int);
+ INSERT INTO pxtest4 VALUES (1);
+ INSERT INTO pxtest4 VALUES (2);
+ DECLARE foo CURSOR FOR SELECT * FROM pxtest4;
+ -- Fetch 1 tuple, keeping the cursor open
+ FETCH 1 FROM foo;
+PREPARE TRANSACTION 'regress-two';
+
+-- No such cursor
+FETCH 1 FROM foo;
+
+-- Table doesn't exist, the creation hasn't been committed yet
+SELECT * FROM pxtest2;
+
+-- There should be two prepared transactions
+SELECT gid FROM pg_prepared_xacts;
+
+-- pxtest3 should be locked because of the pending DROP
+set statement_timeout to 1000;
+SELECT * FROM pxtest3;
+reset statement_timeout;
+
+-- Disconnect, we will continue testing in a different backend
+\c -
+
+-- There should still be two prepared transactions
+SELECT gid FROM pg_prepared_xacts;
+
+-- pxtest3 should still be locked because of the pending DROP
+set statement_timeout to 1000;
+SELECT * FROM pxtest3;
+reset statement_timeout;
+
+-- Commit table creation
+COMMIT PREPARED 'regress-one';
+\d pxtest2
+SELECT * FROM pxtest2;
+
+-- There should be one prepared transaction
+SELECT gid FROM pg_prepared_xacts;
+
+-- Commit table drop
+COMMIT PREPARED 'regress-two';
+SELECT * FROM pxtest3;
+
+-- There should be no prepared transactions
+SELECT gid FROM pg_prepared_xacts;
+
+-- Clean up
+DROP TABLE pxtest2;
+DROP TABLE pxtest4;