diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2002-07-30 05:24:56 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2002-07-30 05:24:56 +0000 |
commit | c2d0ebce755157ce525d9b08c96dfa88b4da1eab (patch) | |
tree | 717399603e5072c27742a3851d8d6c2d87b03321 | |
parent | b6440a7ece2cf729b27052c6cfd7e837a1eb74eb (diff) | |
download | postgresql-c2d0ebce755157ce525d9b08c96dfa88b4da1eab.tar.gz postgresql-c2d0ebce755157ce525d9b08c96dfa88b4da1eab.zip |
Rewrite xindex.sgml for CREATE OPERATOR CLASS. catalogs.sgml finally
contains descriptions of every single system table. Update 'complex'
tutorial example too.
-rw-r--r-- | doc/src/sgml/catalogs.sgml | 394 | ||||
-rw-r--r-- | doc/src/sgml/xindex.sgml | 697 | ||||
-rw-r--r-- | src/tutorial/complex.source | 143 |
3 files changed, 794 insertions, 440 deletions
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 5ef29364533..13f883a65de 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1,6 +1,6 @@ <!-- Documentation of the system catalogs, directed toward PostgreSQL developers - $Header: /cvsroot/pgsql/doc/src/sgml/catalogs.sgml,v 2.48 2002/07/24 19:11:06 petere Exp $ + $Header: /cvsroot/pgsql/doc/src/sgml/catalogs.sgml,v 2.49 2002/07/30 05:24:56 tgl Exp $ --> <chapter id="catalogs"> @@ -21,7 +21,7 @@ DATABASE</command> inserts a row into the <structname>pg_database</structname> catalog -- and actually creates the database on disk.) There are some exceptions for - esoteric operations, such as adding index access methods. + especially esoteric operations, such as adding index access methods. </para> <table> @@ -180,9 +180,7 @@ </table> <para> - More detailed documentation of most catalogs follow below. The - catalogs that relate to index access methods are explained in the - <citetitle>Programmer's Guide</citetitle>. + More detailed documentation of each catalog follows below. </para> </sect1> @@ -267,6 +265,294 @@ </sect1> + <sect1 id="catalog-pg-am"> + <title>pg_am</title> + + <para> + <structname>pg_am</structname> stores information about index access + methods. There is one row for each index access method supported by + the system. + </para> + + <table> + <title>pg_am Columns</title> + + <tgroup cols=4> + <thead> + <row> + <entry>Name</entry> + <entry>Type</entry> + <entry>References</entry> + <entry>Description</entry> + </row> + </thead> + <tbody> + + <row> + <entry>amname</entry> + <entry><type>name</type></entry> + <entry></entry> + <entry>name of the access method</entry> + </row> + + <row> + <entry>amowner</entry> + <entry><type>int4</type></entry> + <entry>pg_shadow.usesysid</entry> + <entry>user ID of the owner (currently not used)</entry> + </row> + + <row> + <entry>amstrategies</entry> + <entry><type>int2</type></entry> + <entry></entry> + <entry>number of operator strategies for this access method</entry> + </row> + + <row> + <entry>amsupport</entry> + <entry><type>int2</type></entry> + <entry></entry> + <entry>number of support routines for this access method</entry> + </row> + + <row> + <entry>amorderstrategy</entry> + <entry><type>int2</type></entry> + <entry></entry> + <entry>zero if the index offers no sort order, otherwise the strategy + number of the strategy operator that describes the sort order</entry> + </row> + + <row> + <entry>amcanunique</entry> + <entry><type>bool</type></entry> + <entry></entry> + <entry>does AM support unique indexes?</entry> + </row> + + <row> + <entry>amcanmulticol</entry> + <entry><type>bool</type></entry> + <entry></entry> + <entry>does AM support multicolumn indexes?</entry> + </row> + + <row> + <entry>amindexnulls</entry> + <entry><type>bool</type></entry> + <entry></entry> + <entry>does AM support NULL index entries?</entry> + </row> + + <row> + <entry>amconcurrent</entry> + <entry><type>bool</type></entry> + <entry></entry> + <entry>does AM support concurrent updates?</entry> + </row> + + <row> + <entry>amgettuple</entry> + <entry><type>regproc</type></entry> + <entry>pg_proc.oid</entry> + <entry><quote>next valid tuple</quote> function</entry> + </row> + + <row> + <entry>aminsert</entry> + <entry><type>regproc</type></entry> + <entry>pg_proc.oid</entry> + <entry><quote>insert this tuple</quote> function</entry> + </row> + + <row> + <entry>ambeginscan</entry> + <entry><type>regproc</type></entry> + <entry>pg_proc.oid</entry> + <entry><quote>start new scan</quote> function</entry> + </row> + + <row> + <entry>amrescan</entry> + <entry><type>regproc</type></entry> + <entry>pg_proc.oid</entry> + <entry><quote>restart this scan</quote> function</entry> + </row> + + <row> + <entry>amendscan</entry> + <entry><type>regproc</type></entry> + <entry>pg_proc.oid</entry> + <entry><quote>end this scan</quote> function</entry> + </row> + + <row> + <entry>ammarkpos</entry> + <entry><type>regproc</type></entry> + <entry>pg_proc.oid</entry> + <entry><quote>mark current scan position</quote> function</entry> + </row> + + <row> + <entry>amrestrpos</entry> + <entry><type>regproc</type></entry> + <entry>pg_proc.oid</entry> + <entry><quote>restore marked scan position</quote> function</entry> + </row> + + <row> + <entry>ambuild</entry> + <entry><type>regproc</type></entry> + <entry>pg_proc.oid</entry> + <entry><quote>build new index</quote> function</entry> + </row> + + <row> + <entry>ambulkdelete</entry> + <entry><type>regproc</type></entry> + <entry>pg_proc.oid</entry> + <entry>bulk-delete function</entry> + </row> + + <row> + <entry>amcostestimate</entry> + <entry><type>regproc</type></entry> + <entry>pg_proc.oid</entry> + <entry>estimate cost of an indexscan</entry> + </row> + + </tbody> + </tgroup> + </table> + + <para> + An index AM that supports multiple columns (has + <structfield>amcanmulticol</structfield> true) <emphasis>must</> + support indexing nulls in columns after the first, because the planner + will assume the index can be used for queries on just the first + column(s). For example, consider an index on (a,b) and a query + WHERE a = 4. The system will assume the index can be used to scan for + rows with a = 4, which is wrong if the index omits rows where b is null. + However it is okay to omit rows where the first indexed column is null. + (GiST currently does so.) + <structfield>amindexnulls</structfield> should be set true only if the + index AM indexes all rows, including arbitrary combinations of nulls. + </para> + + </sect1> + + + <sect1 id="catalog-pg-amop"> + <title>pg_amop</title> + + <para> + <structname>pg_amop</structname> stores information about operators + associated with index access method operator classes. There is one + row for each operator that is a member of an operator class. + </para> + + <table> + <title>pg_amop Columns</title> + + <tgroup cols=4> + <thead> + <row> + <entry>Name</entry> + <entry>Type</entry> + <entry>References</entry> + <entry>Description</entry> + </row> + </thead> + <tbody> + + <row> + <entry>amopclaid</entry> + <entry><type>oid</type></entry> + <entry>pg_opclass.oid</entry> + <entry>the index opclass this entry is for</entry> + </row> + + <row> + <entry>amopstrategy</entry> + <entry><type>int2</type></entry> + <entry></entry> + <entry>operator strategy number</entry> + </row> + + <row> + <entry>amopreqcheck</entry> + <entry><type>bool</type></entry> + <entry></entry> + <entry>index hit must be rechecked</entry> + </row> + + <row> + <entry>amopopr</entry> + <entry><type>oid</type></entry> + <entry>pg_operator.oid</entry> + <entry>the operator's pg_operator OID</entry> + </row> + + </tbody> + </tgroup> + </table> + + </sect1> + + + <sect1 id="catalog-pg-amproc"> + <title>pg_amproc</title> + + <para> + <structname>pg_amproc</structname> stores information about support + procedures + associated with index access method operator classes. There is one + row for each support procedure belonging to an operator class. + </para> + + <table> + <title>pg_amproc Columns</title> + + <tgroup cols=4> + <thead> + <row> + <entry>Name</entry> + <entry>Type</entry> + <entry>References</entry> + <entry>Description</entry> + </row> + </thead> + <tbody> + + <row> + <entry>amopclaid</entry> + <entry><type>oid</type></entry> + <entry>pg_opclass.oid</entry> + <entry>the index opclass this entry is for</entry> + </row> + + <row> + <entry>amprocnum</entry> + <entry><type>int2</type></entry> + <entry></entry> + <entry>support procedure index</entry> + </row> + + <row> + <entry>amproc</entry> + <entry><type>regproc</type></entry> + <entry>pg_proc.oid</entry> + <entry>OID of the proc</entry> + </row> + + </tbody> + </tgroup> + </table> + + </sect1> + + <sect1 id="catalog-pg-attrdef"> <title>pg_attrdef</title> @@ -1923,6 +2209,104 @@ </sect1> + <sect1 id="catalog-pg-opclass"> + <title>pg_opclass</title> + + <para> + <structname>pg_opclass</structname> defines + index access method operator classes. Each operator class defines + semantics for index columns of a particular datatype and a particular + index access method. Note that there can be multiple operator classes + for a given datatype/access method combination, thus supporting multiple + behaviors. + </para> + + <para> + Operator classes are described at length in the + <citetitle>Programmer's Guide</citetitle>. + </para> + + <table> + <title>pg_opclass Columns</title> + + <tgroup cols=4> + <thead> + <row> + <entry>Name</entry> + <entry>Type</entry> + <entry>References</entry> + <entry>Description</entry> + </row> + </thead> + <tbody> + + <row> + <entry>opcamid</entry> + <entry><type>oid</type></entry> + <entry>pg_am.oid</entry> + <entry>index access method opclass is for</entry> + </row> + + <row> + <entry>opcname</entry> + <entry><type>name</type></entry> + <entry></entry> + <entry>name of this opclass</entry> + </row> + + <row> + <entry>opcnamespace</entry> + <entry><type>oid</type></entry> + <entry>pg_namespace.oid</entry> + <entry>namespace of this opclass</entry> + </row> + + <row> + <entry>opcowner</entry> + <entry><type>int4</type></entry> + <entry>pg_shadow.usesysid</entry> + <entry>opclass owner</entry> + </row> + + <row> + <entry>opcintype</entry> + <entry><type>oid</type></entry> + <entry>pg_type.oid</entry> + <entry>type of input data for opclass</entry> + </row> + + <row> + <entry>opcdefault</entry> + <entry><type>bool</type></entry> + <entry></entry> + <entry>true if opclass is default for opcintype</entry> + </row> + + <row> + <entry>opckeytype</entry> + <entry><type>oid</type></entry> + <entry>pg_type.oid</entry> + <entry>type of index data, or zero if same as opcintype</entry> + </row> + + </tbody> + </tgroup> + </table> + + <para> + The majority of the information defining an operator class is actually + not in its <structname>pg_opclass</structname> row, but in the associated + rows in <structname>pg_amop</structname> and + <structname>pg_amproc</structname>. Those rows are considered to be + part of the operator class definition --- this is not unlike the way + that a relation is defined by a single <structname>pg_class</structname> + row, plus associated rows in <structname>pg_attribute</structname> and + other tables. + </para> + + </sect1> + + <sect1 id="catalog-pg-operator"> <title>pg_operator</title> diff --git a/doc/src/sgml/xindex.sgml b/doc/src/sgml/xindex.sgml index 99f069a6748..062307e09ff 100644 --- a/doc/src/sgml/xindex.sgml +++ b/doc/src/sgml/xindex.sgml @@ -1,5 +1,5 @@ <!-- -$Header: /cvsroot/pgsql/doc/src/sgml/xindex.sgml,v 1.26 2002/06/21 03:25:53 momjian Exp $ +$Header: /cvsroot/pgsql/doc/src/sgml/xindex.sgml,v 1.27 2002/07/30 05:24:56 tgl Exp $ PostgreSQL documentation --> @@ -13,213 +13,234 @@ PostgreSQL documentation The procedures described thus far let you define new types, new functions, and new operators. However, we cannot yet define a secondary index (such as a B-tree, R-tree, or hash access method) - over a new type or its operators. - </para> - - <para> - Look back at - <xref linkend="EXTEND-CATALOGS">. - The right half shows the catalogs that we must modify in order to tell - <productname>PostgreSQL</productname> how to use a user-defined type and/or - user-defined operators with an index (i.e., <filename>pg_am, pg_amop, - pg_amproc, pg_operator</filename> and <filename>pg_opclass</filename>). - Unfortunately, there is no simple command to do this. We will demonstrate - how to modify these catalogs through a running example: a new operator + over a new type, nor associate operators of a new type with secondary + indexes. + To do these things, we must define an <firstterm>operator class</> + for the new datatype. We will describe operator classes in the + context of a running example: a new operator class for the B-tree access method that stores and sorts complex numbers in ascending absolute value order. </para> + + <note> + <para> + Prior to <productname>PostgreSQL</productname> release 7.3, it was + necesssary to make manual additions to + <classname>pg_amop</>, <classname>pg_amproc</>, and + <classname>pg_opclass</> in order to create a user-defined + operator class. That approach is now deprecated in favor of + using <command>CREATE OPERATOR CLASS</>, which is a much simpler + and less error-prone way of creating the necessary catalog entries. + </para> + </note> </sect1> <sect1 id="xindex-am"> - <title>Access Methods</title> + <title>Access Methods and Operator Classes</title> + + <para> + The <classname>pg_am</classname> table contains one row for every + index access method. Support for access to regular tables is + built into <productname>PostgreSQL</productname>, but all index access + methods are described in <classname>pg_am</classname>. It is possible + to add a new index access method by defining the required interface + routines and then creating a row in <classname>pg_am</classname> --- + but that is far beyond the scope of this chapter. + </para> + + <para> + The routines for an index access method do not directly know anything + about the data types the access method will operate on. Instead, an + <firstterm>operator class</> identifies the set of operations that the + access method needs to be able to use to work with a particular data type. + Operator classes are so called because one thing they specify is the set + of WHERE-clause operators that can be used with an index (ie, can be + converted into an indexscan qualification). An operator class may also + specify some <firstterm>support procedures</> that are needed by the + internal operations of the index access method, but do not directly + correspond to any WHERE-clause operator that can be used with the index. + </para> + + <para> + It is possible to define multiple operator classes for the same + input datatype and index access method. By doing this, multiple + sets of indexing semantics can be defined for a single datatype. + For example, a B-tree index requires a sort ordering to be defined + for each datatype it works on. + It might be useful for a complex-number datatype + to have one B-tree operator class that sorts the data by complex + absolute value, another that sorts by real part, and so on. + Typically one of the operator classes will be deemed most commonly + useful and will be marked as the default operator class for that + datatype and index access method. + </para> <para> - The <filename>pg_am</filename> table contains one row for every - index access method. Support for the heap access method is built - into <productname>PostgreSQL</productname>, but all other access - methods are described in <filename>pg_am</filename>. The schema is - shown in <xref linkend="xindex-pgam-table">. + The same operator class name + can be used for several different access methods (for example, both B-tree + and hash access methods have operator classes named + <literal>oid_ops</literal>), but each such class is an independent + entity and must be defined separately. + </para> + </sect1> - <table tocentry="1" id="xindex-pgam-table"> - <title>Index Access Method Schema</title> + <sect1 id="xindex-strategies"> + <title>Access Method Strategies</title> + + <para> + The operators associated with an operator class are identified by + <quote>strategy numbers</>, which serve to identify the semantics of + each operator within the context of its operator class. + For example, B-trees impose a strict ordering on keys, lesser to greater, + and so operators like <quote>less than</> and <quote>greater than or equal + to</> are interesting with respect to a B-tree. + Because + <productname>PostgreSQL</productname> allows the user to define operators, + <productname>PostgreSQL</productname> cannot look at the name of an operator + (e.g., <literal>></> or <literal><</>) and tell what kind of + comparison it is. Instead, the index access method defines a set of + <quote>strategies</>, which can be thought of as generalized operators. + Each operator class shows which actual operator corresponds to each + strategy for a particular datatype and interpretation of the index + semantics. + </para> + + <para> + B-tree indexes define 5 strategies, as shown in <xref + linkend="xindex-btree-strat-table">. + </para> + <table tocentry="1" id="xindex-btree-strat-table"> + <title>B-tree Strategies</title> + <titleabbrev>B-tree</titleabbrev> <tgroup cols="2"> <thead> <row> - <entry>Column</entry> - <entry>Description</entry> + <entry>Operation</entry> + <entry>Strategy Number</entry> </row> </thead> <tbody> <row> - <entry>amname</entry> - <entry>name of the access method</entry> - </row> - <row> - <entry>amowner</entry> - <entry>user ID of the owner (currently not used)</entry> - </row> - <row> - <entry>amstrategies</entry> - <entry>number of strategies for this access method (see below)</entry> - </row> - <row> - <entry>amsupport</entry> - <entry>number of support routines for this access method (see below)</entry> - </row> - <row> - <entry>amorderstrategy</entry> - <entry>zero if the index offers no sort order, otherwise the strategy - number of the strategy operator that describes the sort order</entry> - </row> - <row> - <entry>amcanunique</entry> - <entry>does AM support unique indexes?</entry> + <entry>less than</entry> + <entry>1</entry> </row> <row> - <entry>amcanmulticol</entry> - <entry>does AM support multicolumn indexes?</entry> + <entry>less than or equal</entry> + <entry>2</entry> </row> <row> - <entry>amindexnulls</entry> - <entry>does AM support NULL index entries?</entry> + <entry>equal</entry> + <entry>3</entry> </row> <row> - <entry>amconcurrent</entry> - <entry>does AM support concurrent updates?</entry> + <entry>greater than or equal</entry> + <entry>4</entry> </row> <row> - <entry>amgettuple</entry> + <entry>greater than</entry> + <entry>5</entry> </row> + </tbody> + </tgroup> + </table> + + <para> + Hash indexes express only bitwise similarity, and so they define only 1 + strategy, as shown in <xref linkend="xindex-hash-strat-table">. + </para> + + <table tocentry="1" id="xindex-hash-strat-table"> + <title>Hash Strategies</title> + <titleabbrev>Hash</titleabbrev> + <tgroup cols="2"> + <thead> <row> - <entry>aminsert</entry> + <entry>Operation</entry> + <entry>Strategy Number</entry> </row> + </thead> + <tbody> <row> - <entry>...</entry> - <entry>procedure identifiers for interface routines to the access - method. For example, regproc IDs for opening, closing, and - getting rows from the access method appear here.</entry> + <entry>equal</entry> + <entry>1</entry> </row> </tbody> </tgroup> </table> - </para> - - <note> - <para> - An index AM that supports multiple columns (has - <structfield>amcanmulticol</structfield> true) <emphasis>must</> - support indexing nulls in columns after the first, because the planner - will assume the index can be used for queries on just the first - column(s). For example, consider an index on (a,b) and a query - WHERE a = 4. The system will assume the index can be used to scan for - rows with a = 4, which is wrong if the index omits rows where b is null. - However it is okay to omit rows where the first indexed column is null. - (GiST currently does so.) - <structfield>amindexnulls</structfield> should be set true only if the - index AM indexes all rows, including arbitrary combinations of nulls. - </para> - </note> <para> - The <acronym>OID</acronym> of the row in - <filename>pg_am</filename> is used as a foreign key in a lot of other - tables. You do not need to add a new row to this table; all that - you are interested in is the <acronym>OID</acronym> of the access - method you want to extend: - -<screen> -SELECT oid FROM pg_am WHERE amname = 'btree'; - - oid ------ - 403 -(1 row) -</screen> - - We will use that query in a <literal>WHERE</literal> - clause later. + R-tree indexes express rectangle-containment relationships. + They define 8 strategies, as shown in <xref linkend="xindex-rtree-strat-table">. </para> - </sect1> - - <sect1 id="xindex-strategies"> - <title>Access Method Strategies</title> - <para> - The <structfield>amstrategies</structfield> column exists to standardize - comparisons across data types. For example, B-trees - impose a strict ordering on keys, lesser to greater. Since - <productname>PostgreSQL</productname> allows the user to define operators, - <productname>PostgreSQL</productname> cannot look at the name of an operator - (e.g., <literal>></> or <literal><</>) and tell what kind of comparison it is. In fact, - some access methods don't impose any ordering at all. For example, - R-trees express a rectangle-containment relationship, - whereas a hashed data structure expresses only bitwise similarity based - on the value of a hash function. <productname>PostgreSQL</productname> - needs some consistent way of taking a qualification in your query, - looking at the operator, and then deciding if a usable index exists. This - implies that <productname>PostgreSQL</productname> needs to know, for - example, that the <literal><=</> and <literal>></> operators partition a - B-tree. <productname>PostgreSQL</productname> - uses <firstterm>strategies</firstterm> to express these relationships between - operators and the way they can be used to scan indexes. - </para> - - <para> - Defining a new set of strategies is beyond the scope of this - discussion, but we'll explain how B-tree strategies work because - you'll need to know that to add a new B-tree operator class. In the - <classname>pg_am</classname> table, the - <structfield>amstrategies</structfield> column sets the number of - strategies defined for this access method. For B-trees, this number - is 5. The meanings of these strategies are shown in <xref - linkend="xindex-btree-table">. - </para> - - <table tocentry="1" id="xindex-btree-table"> - <title>B-tree Strategies</title> - <titleabbrev>B-tree</titleabbrev> + <table tocentry="1" id="xindex-rtree-strat-table"> + <title>R-tree Strategies</title> + <titleabbrev>R-tree</titleabbrev> <tgroup cols="2"> <thead> <row> <entry>Operation</entry> - <entry>Index</entry> + <entry>Strategy Number</entry> </row> </thead> <tbody> <row> - <entry>less than</entry> + <entry>left of</entry> <entry>1</entry> </row> <row> - <entry>less than or equal</entry> + <entry>left of or overlapping</entry> <entry>2</entry> </row> <row> - <entry>equal</entry> + <entry>overlapping</entry> <entry>3</entry> </row> <row> - <entry>greater than or equal</entry> + <entry>right of or overlapping</entry> <entry>4</entry> </row> <row> - <entry>greater than</entry> + <entry>right of</entry> <entry>5</entry> </row> + <row> + <entry>same</entry> + <entry>6</entry> + </row> + <row> + <entry>contains</entry> + <entry>7</entry> + </row> + <row> + <entry>contained by</entry> + <entry>8</entry> + </row> </tbody> </tgroup> </table> <para> - The idea is that you'll need to add operators corresponding to these strategies - to the <classname>pg_amop</classname> relation (see below). - The access method code can use these strategy numbers, regardless of data - type, to figure out how to partition the B-tree, - compute selectivity, and so on. Don't worry about the details of adding - operators yet; just understand that there must be a set of these - operators for <type>int2</>, <type>int4</>, <type>oid</>, and all other - data types on which a B-tree can operate. + GiST indexes are even more flexible: they do not have a fixed set of + strategies at all. Instead, the <quote>consistency</> support routine + of a particular GiST operator class interprets the strategy numbers + however it likes. + </para> + + <para> + By the way, the <structfield>amorderstrategy</structfield> column + in <classname>pg_am</> tells whether + the access method supports ordered scan. Zero means it doesn't; if it + does, <structfield>amorderstrategy</structfield> is the strategy + number that corresponds to the ordering operator. For example, B-tree + has <structfield>amorderstrategy</structfield> = 1, which is its + <quote>less than</quote> strategy number. + </para> + + <para> + In short, an operator class must specify a set of operators that express + each of these semantic ideas for the operator class's datatype. </para> </sect1> @@ -227,9 +248,9 @@ SELECT oid FROM pg_am WHERE amname = 'btree'; <title>Access Method Support Routines</title> <para> - Sometimes, strategies aren't enough information for the system to figure - out how to use an index. Some access methods require additional support - routines in order to work. For example, the B-tree + Strategies aren't usually enough information for the system to figure + out how to use an index. In practice, the access methods require + additional support routines in order to work. For example, the B-tree access method must be able to compare two keys and determine whether one is greater than, equal to, or less than the other. Similarly, the R-tree access method must be able to compute @@ -240,102 +261,156 @@ SELECT oid FROM pg_am WHERE amname = 'btree'; </para> <para> - In order to manage diverse support routines consistently across all - <productname>PostgreSQL</productname> access methods, - <classname>pg_am</classname> includes a column called - <structfield>amsupport</structfield>. This column records the - number of support routines used by an access method. For B-trees, - this number is one: the routine to take two keys and return -1, 0, - or +1, depending on whether the first key is less than, equal to, - or greater than the second. (Strictly speaking, this routine can - return a negative number (< 0), zero, or a non-zero positive - number (> 0).) + Just as with operators, the operator class identifies which specific + functions should play each of these roles for a given datatype and + semantic interpretation. The index access method specifies the set + of functions it needs, and the operator class identifies the correct + functions to use by assigning <quote>support function numbers</> to them. </para> <para> - The <structfield>amstrategies</structfield> entry in - <classname>pg_am</classname> is just the number of strategies - defined for the access method in question. The operators for less - than, less equal, and so on don't appear in - <classname>pg_am</classname>. Similarly, - <structfield>amsupport</structfield> is just the number of support - routines required by the access method. The actual routines are - listed elsewhere. + B-trees require a single support function, as shown in <xref + linkend="xindex-btree-support-table">. </para> + <table tocentry="1" id="xindex-btree-support-table"> + <title>B-tree Support Functions</title> + <titleabbrev>B-tree</titleabbrev> + <tgroup cols="2"> + <thead> + <row> + <entry>Operation</entry> + <entry>Support Number</entry> + </row> + </thead> + <tbody> + <row> + <entry> + Compare two keys and return -1, 0, + or +1, depending on whether the first key is less than, equal to, + or greater than the second. (Actually, this routine can + return any negative int32 value (< 0), zero, or any non-zero positive + int32 value (> 0).) + </entry> + <entry>1</entry> + </row> + </tbody> + </tgroup> + </table> + <para> - By the way, the <structfield>amorderstrategy</structfield> column tells whether - the access method supports ordered scan. Zero means it doesn't; if it - does, <structfield>amorderstrategy</structfield> is the number of the strategy - routine that corresponds to the ordering operator. For example, B-tree - has <structfield>amorderstrategy</structfield> = 1, which is its - <quote>less than</quote> strategy number. + Hash indexes likewise require one support function, as shown in <xref + linkend="xindex-hash-support-table">. </para> - </sect1> - <sect1 id="xindex-opclass"> - <title>Operator Classes</title> + <table tocentry="1" id="xindex-hash-support-table"> + <title>Hash Support Functions</title> + <titleabbrev>Hash</titleabbrev> + <tgroup cols="2"> + <thead> + <row> + <entry>Operation</entry> + <entry>Support Number</entry> + </row> + </thead> + <tbody> + <row> + <entry>compute the hash value for a key</entry> + <entry>1</entry> + </row> + </tbody> + </tgroup> + </table> <para> - The next table of interest is <classname>pg_opclass</classname>. This table - defines operator class names and input data types for each of the operator - classes supported by a given index access method. The same class name - can be used for several different access methods (for example, both B-tree - and hash access methods have operator classes named - <literal>oid_ops</literal>), but a separate - <filename>pg_opclass</filename> row must appear for each access method. - The OID of the <classname>pg_opclass</classname> row is - used as a foreign - key in other tables to associate specific operators and support routines - with the operator class. + R-tree indexes require three support functions, + as shown in <xref linkend="xindex-rtree-support-table">. </para> - <para> - You need to add a row with your operator class name (for example, - <literal>complex_abs_ops</literal>) to - <classname>pg_opclass</classname>: - -<programlisting> -INSERT INTO pg_opclass (opcamid, opcname, opcnamespace, opcowner, opcintype, opcdefault, opckeytype) - VALUES ( - (SELECT oid FROM pg_am WHERE amname = 'btree'), - 'complex_abs_ops', - (SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog'), - 1, -- UID of superuser is hardwired to 1 as of PG 7.3 - (SELECT oid FROM pg_type WHERE typname = 'complex'), - true, - 0); - -SELECT oid, * - FROM pg_opclass - WHERE opcname = 'complex_abs_ops'; - - oid | opcamid | opcname | opcnamespace | opcowner | opcintype | opcdefault | opckeytype ---------+---------+-----------------+--------------+----------+-----------+------------+------------ - 277975 | 403 | complex_abs_ops | 11 | 1 | 277946 | t | 0 -(1 row) -</programlisting> - - Note that the OID for your <classname>pg_opclass</classname> row will - be different! Don't worry about this though. We'll get this number - from the system later just like we got the OID of the type here. - </para> + <table tocentry="1" id="xindex-rtree-support-table"> + <title>R-tree Support Functions</title> + <titleabbrev>R-tree</titleabbrev> + <tgroup cols="2"> + <thead> + <row> + <entry>Operation</entry> + <entry>Support Number</entry> + </row> + </thead> + <tbody> + <row> + <entry>union</entry> + <entry>1</entry> + </row> + <row> + <entry>intersection</entry> + <entry>2</entry> + </row> + <row> + <entry>size</entry> + <entry>3</entry> + </row> + </tbody> + </tgroup> + </table> <para> - The above example assumes that you want to make this new operator class the - default B-tree operator class for the <type>complex</type> data type. - If you don't, just set <structfield>opcdefault</structfield> to false instead. - <structfield>opckeytype</structfield> is not described here; it should always - be zero for B-tree operator classes. + GiST indexes require seven support functions, + as shown in <xref linkend="xindex-gist-support-table">. </para> + + <table tocentry="1" id="xindex-gist-support-table"> + <title>GiST Support Functions</title> + <titleabbrev>GiST</titleabbrev> + <tgroup cols="2"> + <thead> + <row> + <entry>Operation</entry> + <entry>Support Number</entry> + </row> + </thead> + <tbody> + <row> + <entry>consistent</entry> + <entry>1</entry> + </row> + <row> + <entry>union</entry> + <entry>2</entry> + </row> + <row> + <entry>compress</entry> + <entry>3</entry> + </row> + <row> + <entry>decompress</entry> + <entry>4</entry> + </row> + <row> + <entry>penalty</entry> + <entry>5</entry> + </row> + <row> + <entry>picksplit</entry> + <entry>6</entry> + </row> + <row> + <entry>equal</entry> + <entry>7</entry> + </row> + </tbody> + </tgroup> + </table> + </sect1> <sect1 id="xindex-operators"> <title>Creating the Operators and Support Routines</title> <para> - So now we have an access method and an operator class. - We still need a set of operators. The procedure for + Now that we have seen the ideas, here is the promised example + of creating a new operator class. First, we need a set of operators. + The procedure for defining operators was discussed in <xref linkend="xoper">. For the <literal>complex_abs_ops</literal> operator class on B-trees, the operators we require are: @@ -426,22 +501,15 @@ CREATE FUNCTION complex_abs_eq(complex, complex) RETURNS boolean In practice, all operators defined as index access method strategies must return type <type>boolean</type>, since they must appear at the top level of a <literal>WHERE</> clause to be used with an index. - (On the other hand, the support function returns whatever the - particular access method expects -- in this case, a signed - integer.) + (On the other hand, support functions return whatever the + particular access method expects -- in the case of the comparison + function for B-trees, a signed integer.) </para> </listitem> </itemizedlist> </para> <para> - The final routine in the file is the <quote>support routine</quote> - mentioned when we discussed the <structfield>amsupport</> column of the - <classname>pg_am</classname> table. We will use this later on. For - now, ignore it. - </para> - - <para> Now we are ready to define the operators: <programlisting> @@ -464,114 +532,111 @@ CREATE OPERATOR = ( </para> <para> - The next step is to add entries for these operators to - the <classname>pg_amop</classname> relation. To do this, - we'll need the OIDs of the operators we just - defined. We'll look up the names of all the operators that take - two operands of type <type>complex</type>, and pick ours out: - -<screen> -SELECT o.oid AS opoid, o.oprname - INTO TEMP TABLE complex_ops_tmp - FROM pg_operator o, pg_type t - WHERE o.oprleft = t.oid and o.oprright = t.oid - and t.typname = 'complex'; - - opoid | oprname ---------+--------- - 277963 | + - 277970 | < - 277971 | <= - 277972 | = - 277973 | >= - 277974 | > -(6 rows) -</screen> - - (Again, some of your OID numbers will almost - certainly be different.) The operators we are interested in are those - with OIDs 277970 through 277974. The values you - get will probably be different, and you should substitute them for the - values below. We will do this with a select statement. - </para> - - <para> - Now we are ready to insert entries into <classname>pg_amop</classname> for - our new operator class. These entries must associate the correct - B-tree strategy numbers with each of the operators we need. - The command to insert the less-than operator looks like: + The next step is the registration of the comparison <quote>support + routine</quote> required by B-trees. The C code that implements this + is in the same file that contains the operator procedures: <programlisting> -INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) - SELECT opcl.oid, 1, false, c.opoid - FROM pg_opclass opcl, complex_ops_tmp c - WHERE - opcamid = (SELECT oid FROM pg_am WHERE amname = 'btree') AND - opcname = 'complex_abs_ops' AND - c.oprname = '<'; +CREATE FUNCTION complex_abs_cmp(complex, complex) + RETURNS integer + AS '<replaceable>PGROOT</replaceable>/src/tutorial/complex' + LANGUAGE C; </programlisting> - - Now do this for the other operators substituting for the <literal>1</> in the - second line above and the <literal><</> in the last line. Note the order: - <quote>less than</> is 1, <quote>less than or equal</> is 2, - <quote>equal</> is 3, <quote>greater than or equal</quote> is 4, and - <quote>greater than</quote> is 5. </para> + </sect1> + + <sect1 id="xindex-opclass"> + <title>Creating the Operator Class</title> <para> - The field <filename>amopreqcheck</filename> is not discussed here; it - should always be false for B-tree operators. + Now that we have the required operators and support routine, + we can finally create the operator class: + +<programlisting> +CREATE OPERATOR CLASS complex_abs_ops + DEFAULT FOR TYPE complex USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 complex_abs_cmp(complex, complex); +</programlisting> </para> <para> - The final step is the registration of the <quote>support routine</quote> previously - described in our discussion of <classname>pg_am</classname>. The - OID of this support routine is stored in the - <classname>pg_amproc</classname> table, keyed by the operator class - OID and the support routine number. + And we're done! (Whew.) It should now be possible to create + and use B-tree indexes on <type>complex</type> columns. </para> <para> - First, we need to register the function in - <productname>PostgreSQL</productname> (recall that we put the - C code that implements this routine in the bottom of - the file in which we implemented the operator routines): - + We could have written the operator entries more verbosely, as in <programlisting> -CREATE FUNCTION complex_abs_cmp(complex, complex) - RETURNS integer - AS '<replaceable>PGROOT</replaceable>/src/tutorial/complex' - LANGUAGE C; - -SELECT oid, proname FROM pg_proc - WHERE proname = 'complex_abs_cmp'; - - oid | proname ---------+----------------- - 277997 | complex_abs_cmp -(1 row) + OPERATOR 1 < (complex, complex) , </programlisting> + but there is no need to do so when the operators take the same datatype + we are defining the operator class for. + </para> - (Again, your OID number will probably be different.) + <para> + The above example assumes that you want to make this new operator class the + default B-tree operator class for the <type>complex</type> data type. + If you don't, just leave out the word <literal>DEFAULT</>. </para> + </sect1> + + <sect1 id="xindex-opclass-features"> + <title>Special Features of Operator Classes</title> <para> - We can add the new row as follows: + There are two special features of operator classes that we have + not discussed yet, mainly because they are not very useful + with the default B-tree index access method. + </para> + <para> + Normally, declaring an operator as a member of an operator class means + that the index access method can retrieve exactly the set of rows + that satisfy a WHERE condition using the operator. For example, <programlisting> -INSERT INTO pg_amproc (amopclaid, amprocnum, amproc) - SELECT opcl.oid, 1, p.oid - FROM pg_opclass opcl, pg_proc p - WHERE - opcamid = (SELECT oid FROM pg_am WHERE amname = 'btree') AND - opcname = 'complex_abs_ops' AND - p.proname = 'complex_abs_cmp'; +SELECT * FROM table WHERE integer_column < 4; </programlisting> + can be satisfied exactly by a B-tree index on the integer column. + But there are cases where an index is useful as an inexact guide to + the matching rows. For example, if an R-tree index stores only + bounding boxes for objects, then it cannot exactly satisfy a WHERE + condition that tests overlap between nonrectangular objects such as + polygons. Yet we could use the index to find objects whose bounding + box overlaps the bounding box of the target object, and then do the + exact overlap test only on the objects found by the index. If this + scenario applies, the index is said to be <quote>lossy</> for the + operator, and we mark the <literal>OPERATOR</> clause in the + <command>CREATE OPERATOR CLASS</> command with <literal>RECHECK</>. + <literal>RECHECK</> is valid if the index is guaranteed to return + all the required tuples, plus perhaps some additional tuples, which + can be eliminated by performing the original operator comparison. </para> <para> - And we're done! (Whew.) It should now be possible to create - and use B-tree indexes on <type>complex</type> columns. + Consider again the situation where we are storing in the index only + the bounding box of a complex object such as a polygon. In this + case there's not much value in storing the whole polygon in the index + entry --- we may as well store just a simpler object of type + <literal>box</>. This situation is expressed by the <literal>STORAGE</> + option in <command>CREATE OPERATOR CLASS</>: we'd write something like + +<programlisting> +CREATE OPERATOR CLASS polygon_ops + DEFAULT FOR TYPE polygon USING gist AS + ... + STORAGE box; +</programlisting> + + At present, only the GiST access method supports a + <literal>STORAGE</> type that's different from the column datatype. + The GiST <literal>compress</> and <literal>decompress</> support + routines must deal with datatype conversion when <literal>STORAGE</> + is used. </para> </sect1> diff --git a/src/tutorial/complex.source b/src/tutorial/complex.source index 6fbaaf89dbd..5df3c5d6779 100644 --- a/src/tutorial/complex.source +++ b/src/tutorial/complex.source @@ -5,9 +5,10 @@ -- use this new type. -- -- --- Copyright (c) 1994, Regents of the University of California +-- Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group +-- Portions Copyright (c) 1994, Regents of the University of California -- --- $Id: complex.source,v 1.12 2002/04/17 20:57:57 tgl Exp $ +-- $Header: /cvsroot/pgsql/src/tutorial/complex.source,v 1.13 2002/07/30 05:24:56 tgl Exp $ -- --------------------------------------------------------------------------- @@ -46,13 +47,14 @@ CREATE FUNCTION complex_out(opaque) CREATE TYPE complex ( internallength = 16, input = complex_in, - output = complex_out + output = complex_out, + alignment = double ); ----------------------------- -- Using the new type: --- user-defined types can be use like ordinary built-in types. +-- user-defined types can be used like ordinary built-in types. ----------------------------- -- eg. we can use it in a schema @@ -62,7 +64,7 @@ CREATE TABLE test_complex ( b complex ); --- data for user-defined type are just strings in the proper textual +-- data for user-defined types are just strings in the proper textual -- representation. INSERT INTO test_complex VALUES ('(1.0, 2.5)', '(4.2, 3.55 )'); @@ -74,7 +76,7 @@ SELECT * FROM test_complex; -- Creating an operator for the new type: -- Let's define an add operator for complex types. Since POSTGRES -- supports function overloading, we'll use + as the add operator. --- (Operators can be reused with different number and types of +-- (Operator names can be reused with different numbers and types of -- arguments.) ----------------------------- @@ -121,20 +123,11 @@ CREATE AGGREGATE complex_sum ( SELECT complex_sum(a) FROM test_complex; -------------------------------------------------------------------------------- --- ATTENTION! ATTENTION! ATTENTION! -- --- YOU MAY SKIP THE SECTION BELOW ON INTERFACING WITH INDICES. YOU DON'T -- --- NEED THE FOLLOWING IF YOU DON'T USE INDICES WITH NEW DATA TYPES. -- -------------------------------------------------------------------------------- - -SELECT 'READ ABOVE!' AS STOP; - ----------------------------- --- Interfacing New Types with Indices: +-- Interfacing New Types with Indexes: -- We cannot define a secondary index (eg. a B-tree) over the new type --- yet. We need to modify a few system catalogs to show POSTGRES how --- to use the new type. Unfortunately, there is no simple command to --- do this. Please bear with me. +-- yet. We need to create all the required operators and support +-- functions, then we can make the operator class. ----------------------------- -- first, define the required operators @@ -170,81 +163,20 @@ CREATE OPERATOR > ( restrict = scalargtsel, join = scalargtjoinsel ); -INSERT INTO pg_opclass (opcamid, opcname, opcnamespace, opcowner, opcintype, opcdefault, opckeytype) - VALUES ( - (SELECT oid FROM pg_am WHERE amname = 'btree'), - 'complex_abs_ops', - (SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog'), - 1, -- UID of superuser is hardwired to 1 as of PG 7.3 - (SELECT oid FROM pg_type WHERE typname = 'complex'), - true, - 0); - -SELECT oid, * - FROM pg_opclass WHERE opcname = 'complex_abs_ops'; - -SELECT o.oid AS opoid, o.oprname -INTO TEMP TABLE complex_ops_tmp -FROM pg_operator o, pg_type t -WHERE o.oprleft = t.oid and o.oprright = t.oid - and t.typname = 'complex'; - --- make sure we have the right operators -SELECT * from complex_ops_tmp; - -INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) - SELECT opcl.oid, 1, false, c.opoid - FROM pg_opclass opcl, complex_ops_tmp c - WHERE - opcamid = (SELECT oid FROM pg_am WHERE amname = 'btree') - and opcname = 'complex_abs_ops' - and c.oprname = '<'; - -INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) - SELECT opcl.oid, 2, false, c.opoid - FROM pg_opclass opcl, complex_ops_tmp c - WHERE - opcamid = (SELECT oid FROM pg_am WHERE amname = 'btree') - and opcname = 'complex_abs_ops' - and c.oprname = '<='; - -INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) - SELECT opcl.oid, 3, false, c.opoid - FROM pg_opclass opcl, complex_ops_tmp c - WHERE - opcamid = (SELECT oid FROM pg_am WHERE amname = 'btree') - and opcname = 'complex_abs_ops' - and c.oprname = '='; - -INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) - SELECT opcl.oid, 4, false, c.opoid - FROM pg_opclass opcl, complex_ops_tmp c - WHERE - opcamid = (SELECT oid FROM pg_am WHERE amname = 'btree') - and opcname = 'complex_abs_ops' - and c.oprname = '>='; - -INSERT INTO pg_amop (amopclaid, amopstrategy, amopreqcheck, amopopr) - SELECT opcl.oid, 5, false, c.opoid - FROM pg_opclass opcl, complex_ops_tmp c - WHERE - opcamid = (SELECT oid FROM pg_am WHERE amname = 'btree') - and opcname = 'complex_abs_ops' - and c.oprname = '>'; - --- +-- create the support function too CREATE FUNCTION complex_abs_cmp(complex, complex) RETURNS int4 AS '_OBJWD_/complex' LANGUAGE 'c'; -SELECT oid, proname FROM pg_proc WHERE proname = 'complex_abs_cmp'; +-- now we can make the operator class +CREATE OPERATOR CLASS complex_abs_ops + DEFAULT FOR TYPE complex USING btree AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 complex_abs_cmp(complex, complex); -INSERT INTO pg_amproc (amopclaid, amprocnum, amproc) - SELECT opcl.oid, 1, pro.oid - FROM pg_opclass opcl, pg_proc pro - WHERE - opcamid = (SELECT oid FROM pg_am WHERE amname = 'btree') - and opcname = 'complex_abs_ops' - and proname = 'complex_abs_cmp'; -- now, we can define a btree index on complex types. First, let's populate -- the table. Note that postgres needs many more tuples to start using the @@ -259,35 +191,8 @@ SELECT * from test_complex where a = '(56.0,-22.5)'; SELECT * from test_complex where a < '(56.0,-22.5)'; SELECT * from test_complex where a > '(56.0,-22.5)'; -DELETE FROM pg_amop WHERE - amopclaid = (SELECT oid FROM pg_opclass WHERE - opcamid = (SELECT oid FROM pg_am WHERE amname = 'btree') - and opcname = 'complex_abs_ops'); - -DELETE FROM pg_amproc WHERE - amopclaid = (SELECT oid FROM pg_opclass WHERE - opcamid = (SELECT oid FROM pg_am WHERE amname = 'btree') - and opcname = 'complex_abs_ops'); - -DELETE FROM pg_opclass WHERE - opcamid = (SELECT oid FROM pg_am WHERE amname = 'btree') - and opcname = 'complex_abs_ops'; -DROP FUNCTION complex_in(opaque); +-- clean up the example +DROP TABLE test_complex; +DROP TYPE complex CASCADE; DROP FUNCTION complex_out(opaque); -DROP FUNCTION complex_add(complex, complex); -DROP FUNCTION complex_abs_lt(complex, complex); -DROP FUNCTION complex_abs_le(complex, complex); -DROP FUNCTION complex_abs_eq(complex, complex); -DROP FUNCTION complex_abs_ge(complex, complex); -DROP FUNCTION complex_abs_gt(complex, complex); -DROP FUNCTION complex_abs_cmp(complex, complex); -DROP OPERATOR + (complex, complex); -DROP OPERATOR < (complex, complex); -DROP OPERATOR <= (complex, complex); -DROP OPERATOR = (complex, complex); -DROP OPERATOR >= (complex, complex); -DROP OPERATOR > (complex, complex); -DROP AGGREGATE complex_sum (complex); -DROP TYPE complex; -DROP TABLE test_complex, complex_ops_tmp; |