aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/common/tupdesc.c18
-rw-r--r--src/backend/access/gist/gist.c16
-rw-r--r--src/backend/access/hash/hash.c12
-rw-r--r--src/backend/access/heap/tuptoaster.c39
-rw-r--r--src/backend/access/nbtree/nbtree.c12
-rw-r--r--src/backend/access/rtree/rtree.c12
-rw-r--r--src/backend/catalog/genbki.sh7
-rw-r--r--src/backend/catalog/heap.c125
-rw-r--r--src/backend/catalog/index.c82
-rw-r--r--src/backend/commands/analyze.c1880
-rw-r--r--src/backend/commands/command.c128
-rw-r--r--src/backend/commands/vacuum.c241
-rw-r--r--src/backend/executor/nodeSort.c79
-rw-r--r--src/backend/nodes/copyfuncs.c11
-rw-r--r--src/backend/nodes/equalfuncs.c10
-rw-r--r--src/backend/nodes/readfuncs.c8
-rw-r--r--src/backend/optimizer/path/costsize.c193
-rw-r--r--src/backend/optimizer/path/joinpath.c58
-rw-r--r--src/backend/optimizer/plan/createplan.c10
-rw-r--r--src/backend/optimizer/plan/initsplan.c9
-rw-r--r--src/backend/optimizer/plan/planner.c6
-rw-r--r--src/backend/optimizer/prep/prepunion.c6
-rw-r--r--src/backend/optimizer/util/pathnode.c12
-rw-r--r--src/backend/optimizer/util/plancat.c3
-rw-r--r--src/backend/parser/analyze.c4
-rw-r--r--src/backend/parser/gram.y191
-rw-r--r--src/backend/parser/keywords.c3
-rw-r--r--src/backend/parser/parse_relation.c20
-rw-r--r--src/backend/tcop/utility.c29
-rw-r--r--src/backend/utils/adt/selfuncs.c923
-rw-r--r--src/backend/utils/cache/lsyscache.c259
-rw-r--r--src/backend/utils/cache/syscache.c4
-rw-r--r--src/backend/utils/sort/tuplesort.c233
-rw-r--r--src/include/access/tuptoaster.h12
-rw-r--r--src/include/catalog/catversion.h4
-rw-r--r--src/include/catalog/heap.h5
-rw-r--r--src/include/catalog/index.h4
-rw-r--r--src/include/catalog/indexing.h4
-rw-r--r--src/include/catalog/pg_attribute.h254
-rw-r--r--src/include/catalog/pg_class.h4
-rw-r--r--src/include/catalog/pg_statistic.h177
-rw-r--r--src/include/commands/command.h12
-rw-r--r--src/include/commands/vacuum.h120
-rw-r--r--src/include/config.h.in7
-rw-r--r--src/include/nodes/execnodes.h4
-rw-r--r--src/include/nodes/parsenodes.h21
-rw-r--r--src/include/nodes/primnodes.h8
-rw-r--r--src/include/nodes/relation.h6
-rw-r--r--src/include/optimizer/cost.h5
-rw-r--r--src/include/optimizer/pathnode.h4
-rw-r--r--src/include/utils/lsyscache.h12
-rw-r--r--src/include/utils/syscache.h4
-rw-r--r--src/include/utils/tuplesort.h22
-rw-r--r--src/interfaces/ecpg/preproc/keywords.c3
-rw-r--r--src/interfaces/ecpg/preproc/preproc.y116
-rw-r--r--src/test/regress/expected/oidjoins.out26
-rw-r--r--src/test/regress/expected/opr_sanity.out4
-rw-r--r--src/test/regress/sql/oidjoins.sql14
58 files changed, 3606 insertions, 1889 deletions
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c
index 769f754b669..86d704e8d08 100644
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.73 2001/03/22 06:16:06 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.74 2001/05/07 00:43:15 tgl Exp $
*
* NOTES
* some of the executor utility code such as "ExecTypeFromTL" should be
@@ -237,16 +237,16 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2)
Form_pg_attribute attr2 = tupdesc2->attrs[i];
/*
- * We do not need to check every single field here, and in fact
- * some fields such as attdispersion probably shouldn't be
- * compared. We can also disregard attnum (it was used to place
- * the row in the attrs array) and everything derived from the
- * column datatype.
+ * We do not need to check every single field here: we can disregard
+ * attrelid, attnum (it was used to place the row in the attrs array)
+ * and everything derived from the column datatype.
*/
if (strcmp(NameStr(attr1->attname), NameStr(attr2->attname)) != 0)
return false;
if (attr1->atttypid != attr2->atttypid)
return false;
+ if (attr1->attstattarget != attr2->attstattarget)
+ return false;
if (attr1->atttypmod != attr2->atttypmod)
return false;
if (attr1->attstorage != attr2->attstorage)
@@ -365,12 +365,12 @@ TupleDescInitEntry(TupleDesc desc,
else
MemSet(NameStr(att->attname), 0, NAMEDATALEN);
- att->attdispersion = 0; /* dummy value */
+ att->attstattarget = 0;
att->attcacheoff = -1;
att->atttypmod = typmod;
att->attnum = attributeNumber;
- att->attnelems = attdim;
+ att->attndims = attdim;
att->attisset = attisset;
att->attnotnull = false;
@@ -506,7 +506,7 @@ TupleDescMakeSelfReference(TupleDesc desc,
att->attbyval = true;
att->attalign = 'i';
att->attstorage = 'p';
- att->attnelems = 0;
+ att->attndims = 0;
}
/* ----------------------------------------------------------------
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c
index 1c5577b88a0..06010896821 100644
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -6,7 +6,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.72 2001/03/22 03:59:12 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.73 2001/05/07 00:43:15 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -84,8 +84,8 @@ static void gist_dumptree(Relation r, int level, BlockNumber blk, OffsetNumber c
#endif
/*
-** routine to build an index. Basically calls insert over and over
-*/
+ * routine to build an index. Basically calls insert over and over
+ */
Datum
gistbuild(PG_FUNCTION_ARGS)
{
@@ -105,7 +105,7 @@ gistbuild(PG_FUNCTION_ARGS)
itupdesc;
Datum attdata[INDEX_MAX_KEYS];
char nulls[INDEX_MAX_KEYS];
- int nhtups,
+ double nhtups,
nitups;
Node *pred = indexInfo->ii_Predicate;
@@ -172,7 +172,7 @@ gistbuild(PG_FUNCTION_ARGS)
#endif /* OMIT_PARTIAL_INDEX */
/* build the index */
- nhtups = nitups = 0;
+ nhtups = nitups = 0.0;
compvec = (bool *) palloc(sizeof(bool) * indexInfo->ii_NumIndexAttrs);
@@ -183,7 +183,7 @@ gistbuild(PG_FUNCTION_ARGS)
{
MemoryContextReset(econtext->ecxt_per_tuple_memory);
- nhtups++;
+ nhtups += 1.0;
#ifndef OMIT_PARTIAL_INDEX
@@ -196,7 +196,7 @@ gistbuild(PG_FUNCTION_ARGS)
slot->val = htup;
if (ExecQual((List *) oldPred, econtext, false))
{
- nitups++;
+ nitups += 1.0;
continue;
}
}
@@ -213,7 +213,7 @@ gistbuild(PG_FUNCTION_ARGS)
}
#endif /* OMIT_PARTIAL_INDEX */
- nitups++;
+ nitups += 1.0;
/*
* For the current heap tuple, extract all the attributes we use
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index aa76ba232a0..9617fcc33a6 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.50 2001/03/22 03:59:12 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.51 2001/05/07 00:43:15 tgl Exp $
*
* NOTES
* This file contains only the public interface routines.
@@ -57,7 +57,7 @@ hashbuild(PG_FUNCTION_ARGS)
itupdesc;
Datum attdata[INDEX_MAX_KEYS];
char nulls[INDEX_MAX_KEYS];
- int nhtups,
+ double nhtups,
nitups;
HashItem hitem;
Node *pred = indexInfo->ii_Predicate;
@@ -109,7 +109,7 @@ hashbuild(PG_FUNCTION_ARGS)
#endif /* OMIT_PARTIAL_INDEX */
/* build the index */
- nhtups = nitups = 0;
+ nhtups = nitups = 0.0;
/* start a heap scan */
hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
@@ -118,7 +118,7 @@ hashbuild(PG_FUNCTION_ARGS)
{
MemoryContextReset(econtext->ecxt_per_tuple_memory);
- nhtups++;
+ nhtups += 1.0;
#ifndef OMIT_PARTIAL_INDEX
@@ -131,7 +131,7 @@ hashbuild(PG_FUNCTION_ARGS)
slot->val = htup;
if (ExecQual((List *) oldPred, econtext, false))
{
- nitups++;
+ nitups += 1.0;
continue;
}
}
@@ -148,7 +148,7 @@ hashbuild(PG_FUNCTION_ARGS)
}
#endif /* OMIT_PARTIAL_INDEX */
- nitups++;
+ nitups += 1.0;
/*
* For the current heap tuple, extract all the attributes we use
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c
index fb509ab66de..2a9df577b10 100644
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.21 2001/03/25 00:45:20 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.22 2001/05/07 00:43:15 tgl Exp $
*
*
* INTERFACE ROUTINES
@@ -167,6 +167,43 @@ heap_tuple_untoast_attr(varattrib *attr)
/* ----------
+ * toast_raw_datum_size -
+ *
+ * Return the raw (detoasted) size of a varlena datum
+ * ----------
+ */
+Size
+toast_raw_datum_size(Datum value)
+{
+ varattrib *attr = (varattrib *) DatumGetPointer(value);
+ Size result;
+
+ if (VARATT_IS_COMPRESSED(attr))
+ {
+ /*
+ * va_rawsize shows the original data size, whether the datum
+ * is external or not.
+ */
+ result = attr->va_content.va_compressed.va_rawsize + VARHDRSZ;
+ }
+ else if (VARATT_IS_EXTERNAL(attr))
+ {
+ /*
+ * an uncompressed external attribute has rawsize including the
+ * header (not too consistent!)
+ */
+ result = attr->va_content.va_external.va_rawsize;
+ }
+ else
+ {
+ /* plain untoasted datum */
+ result = VARSIZE(attr);
+ }
+ return result;
+}
+
+
+/* ----------
* toast_delete -
*
* Cascaded delete toast-entries on DELETE
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 97d99da4fde..f456e0c9306 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.79 2001/03/22 03:59:15 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.80 2001/05/07 00:43:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -69,7 +69,7 @@ btbuild(PG_FUNCTION_ARGS)
itupdesc;
Datum attdata[INDEX_MAX_KEYS];
char nulls[INDEX_MAX_KEYS];
- int nhtups,
+ double nhtups,
nitups;
Node *pred = indexInfo->ii_Predicate;
@@ -156,7 +156,7 @@ btbuild(PG_FUNCTION_ARGS)
#endif /* OMIT_PARTIAL_INDEX */
/* build the index */
- nhtups = nitups = 0;
+ nhtups = nitups = 0.0;
if (usefast)
{
@@ -196,7 +196,7 @@ btbuild(PG_FUNCTION_ARGS)
MemoryContextReset(econtext->ecxt_per_tuple_memory);
- nhtups++;
+ nhtups += 1.0;
#ifndef OMIT_PARTIAL_INDEX
@@ -209,7 +209,7 @@ btbuild(PG_FUNCTION_ARGS)
slot->val = htup;
if (ExecQual((List *) oldPred, econtext, false))
{
- nitups++;
+ nitups += 1.0;
continue;
}
}
@@ -226,7 +226,7 @@ btbuild(PG_FUNCTION_ARGS)
}
#endif /* OMIT_PARTIAL_INDEX */
- nitups++;
+ nitups += 1.0;
/*
* For the current heap tuple, extract all the attributes we use
diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c
index 3752a59e99a..a8c6a13ea3c 100644
--- a/src/backend/access/rtree/rtree.c
+++ b/src/backend/access/rtree/rtree.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.61 2001/03/22 03:59:16 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.62 2001/05/07 00:43:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -100,7 +100,7 @@ rtbuild(PG_FUNCTION_ARGS)
itupdesc;
Datum attdata[INDEX_MAX_KEYS];
char nulls[INDEX_MAX_KEYS];
- int nhtups,
+ double nhtups,
nitups;
Node *pred = indexInfo->ii_Predicate;
@@ -163,7 +163,7 @@ rtbuild(PG_FUNCTION_ARGS)
#endif /* OMIT_PARTIAL_INDEX */
/* count the tuples as we insert them */
- nhtups = nitups = 0;
+ nhtups = nitups = 0.0;
/* start a heap scan */
hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
@@ -172,7 +172,7 @@ rtbuild(PG_FUNCTION_ARGS)
{
MemoryContextReset(econtext->ecxt_per_tuple_memory);
- nhtups++;
+ nhtups += 1.0;
#ifndef OMIT_PARTIAL_INDEX
@@ -185,7 +185,7 @@ rtbuild(PG_FUNCTION_ARGS)
slot->val = htup;
if (ExecQual((List *) oldPred, econtext, false))
{
- nitups++;
+ nitups += 1.0;
continue;
}
}
@@ -202,7 +202,7 @@ rtbuild(PG_FUNCTION_ARGS)
}
#endif /* OMIT_PARTIAL_INDEX */
- nitups++;
+ nitups += 1.0;
/*
* For the current heap tuple, extract all the attributes we use
diff --git a/src/backend/catalog/genbki.sh b/src/backend/catalog/genbki.sh
index c2993fa8fc6..cac53f3e085 100644
--- a/src/backend/catalog/genbki.sh
+++ b/src/backend/catalog/genbki.sh
@@ -10,7 +10,7 @@
#
#
# IDENTIFICATION
-# $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.19 2001/01/16 22:48:34 tgl Exp $
+# $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.20 2001/05/07 00:43:16 tgl Exp $
#
# NOTES
# non-essential whitespace is removed from the generated file.
@@ -126,10 +126,12 @@ for dir in $INCLUDE_DIRS; do
fi
done
-# Get INDEX_MAX_KEYS from config.h (who needs consistency?)
+# Get INDEX_MAX_KEYS and DEFAULT_ATTSTATTARGET from config.h
+# (who needs consistency?)
for dir in $INCLUDE_DIRS; do
if [ -f "$dir/config.h" ]; then
INDEXMAXKEYS=`grep '#define[ ]*INDEX_MAX_KEYS' $dir/config.h | $AWK '{ print $3 }'`
+ DEFAULTATTSTATTARGET=`grep '#define[ ]*DEFAULT_ATTSTATTARGET' $dir/config.h | $AWK '{ print $3 }'`
break
fi
done
@@ -168,6 +170,7 @@ sed -e "s/;[ ]*$//g" \
-e "s/(NameData/(name/g" \
-e "s/(Oid/(oid/g" \
-e "s/NAMEDATALEN/$NAMEDATALEN/g" \
+ -e "s/DEFAULT_ATTSTATTARGET/$DEFAULTATTSTATTARGET/g" \
-e "s/INDEX_MAX_KEYS\*2/$INDEXMAXKEYS2/g" \
-e "s/INDEX_MAX_KEYS\*4/$INDEXMAXKEYS4/g" \
-e "s/INDEX_MAX_KEYS/$INDEXMAXKEYS/g" \
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 54867d51a4b..03f16e11c3f 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.162 2001/03/22 06:16:10 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.163 2001/05/07 00:43:17 tgl Exp $
*
*
* INTERFACE ROUTINES
@@ -96,54 +96,72 @@ static void RemoveStatistics(Relation rel);
/*
* Note:
- * Should the executor special case these attributes in the future?
- * Advantage: consume 1/2 the space in the ATTRIBUTE relation.
- * Disadvantage: having rules to compute values in these tuples may
- * be more difficult if not impossible.
+ * Should the system special case these attributes in the future?
+ * Advantage: consume much less space in the ATTRIBUTE relation.
+ * Disadvantage: special cases will be all over the place.
*/
static FormData_pg_attribute a1 = {
- 0xffffffff, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
- SelfItemPointerAttributeNumber, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0'
+ 0, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
+ SelfItemPointerAttributeNumber, 0, -1, -1,
+ false, 'p', false, 'i', false, false
};
static FormData_pg_attribute a2 = {
- 0xffffffff, {"oid"}, OIDOID, 0, sizeof(Oid),
- ObjectIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+ 0, {"oid"}, OIDOID, 0, sizeof(Oid),
+ ObjectIdAttributeNumber, 0, -1, -1,
+ true, 'p', false, 'i', false, false
};
static FormData_pg_attribute a3 = {
- 0xffffffff, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
- MinTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+ 0, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
+ MinTransactionIdAttributeNumber, 0, -1, -1,
+ true, 'p', false, 'i', false, false
};
static FormData_pg_attribute a4 = {
- 0xffffffff, {"cmin"}, CIDOID, 0, sizeof(CommandId),
- MinCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+ 0, {"cmin"}, CIDOID, 0, sizeof(CommandId),
+ MinCommandIdAttributeNumber, 0, -1, -1,
+ true, 'p', false, 'i', false, false
};
static FormData_pg_attribute a5 = {
- 0xffffffff, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
- MaxTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+ 0, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
+ MaxTransactionIdAttributeNumber, 0, -1, -1,
+ true, 'p', false, 'i', false, false
};
static FormData_pg_attribute a6 = {
- 0xffffffff, {"cmax"}, CIDOID, 0, sizeof(CommandId),
- MaxCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+ 0, {"cmax"}, CIDOID, 0, sizeof(CommandId),
+ MaxCommandIdAttributeNumber, 0, -1, -1,
+ true, 'p', false, 'i', false, false
};
/*
- We decide to call this attribute "tableoid" rather than say
-"classoid" on the basis that in the future there may be more than one
-table of a particular class/type. In any case table is still the word
-used in SQL.
-*/
+ * We decided to call this attribute "tableoid" rather than say
+ * "classoid" on the basis that in the future there may be more than one
+ * table of a particular class/type. In any case table is still the word
+ * used in SQL.
+ */
static FormData_pg_attribute a7 = {
- 0xffffffff, {"tableoid"}, OIDOID, 0, sizeof(Oid),
- TableOidAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+ 0, {"tableoid"}, OIDOID, 0, sizeof(Oid),
+ TableOidAttributeNumber, 0, -1, -1,
+ true, 'p', false, 'i', false, false
};
-static Form_pg_attribute HeapAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7};
+static Form_pg_attribute SysAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7};
+
+/*
+ * This function returns a Form_pg_attribute pointer for a system attribute.
+ */
+Form_pg_attribute
+SystemAttributeDefinition(AttrNumber attno)
+{
+ if (attno >= 0 || attno < - (int) lengthof(SysAtt))
+ elog(ERROR, "SystemAttributeDefinition: invalid attribute number %d",
+ attno);
+ return SysAtt[-attno - 1];
+}
/* ----------------------------------------------------------------
* XXX END OF UGLY HARD CODED BADNESS XXX
@@ -380,32 +398,6 @@ heap_storage_create(Relation rel)
* 8) the relations are closed and the new relation's oid
* is returned.
*
- * old comments:
- * A new relation is inserted into the RELATION relation
- * with the specified attribute(s) (newly inserted into
- * the ATTRIBUTE relation). How does concurrency control
- * work? Is it automatic now? Expects the caller to have
- * attname, atttypid, atttyparg, attproc, and attlen domains filled.
- * Create fills the attnum domains sequentually from zero,
- * fills the attdispersion domains with zeros, and fills the
- * attrelid fields with the relid.
- *
- * scan relation catalog for name conflict
- * scan type catalog for typids (if not arg)
- * create and insert attribute(s) into attribute catalog
- * create new relation
- * insert new relation into attribute catalog
- *
- * Should coordinate with heap_create_with_catalog(). Either
- * it should not be called or there should be a way to prevent
- * the relation from being removed at the end of the
- * transaction if it is successful ('u'/'r' may be enough).
- * Also, if the transaction does not commit, then the
- * relation should be removed.
- *
- * XXX amcreate ignores "off" when inserting (for now).
- * XXX amcreate (like the other utilities) needs to understand indexes.
- *
* ----------------------------------------------------------------
*/
@@ -432,14 +424,14 @@ CheckAttributeNames(TupleDesc tupdesc)
*/
for (i = 0; i < natts; i++)
{
- for (j = 0; j < (int) (sizeof(HeapAtt) / sizeof(HeapAtt[0])); j++)
+ for (j = 0; j < (int) lengthof(SysAtt); j++)
{
- if (strcmp(NameStr(HeapAtt[j]->attname),
+ if (strcmp(NameStr(SysAtt[j]->attname),
NameStr(tupdesc->attrs[i]->attname)) == 0)
{
elog(ERROR, "Attribute '%s' has a name conflict"
"\n\tName matches an existing system attribute",
- NameStr(HeapAtt[j]->attname));
+ NameStr(SysAtt[j]->attname));
}
}
if (tupdesc->attrs[i]->atttypid == UNKNOWNOID)
@@ -574,7 +566,7 @@ AddNewAttributeTuples(Oid new_rel_oid,
/* Fill in the correct relation OID */
(*dpp)->attrelid = new_rel_oid;
/* Make sure these are OK, too */
- (*dpp)->attdispersion = 0;
+ (*dpp)->attstattarget = DEFAULT_ATTSTATTARGET;
(*dpp)->attcacheoff = -1;
tup = heap_addheader(Natts_pg_attribute,
@@ -593,14 +585,14 @@ AddNewAttributeTuples(Oid new_rel_oid,
/*
* next we add the system attributes..
*/
- dpp = HeapAtt;
+ dpp = SysAtt;
for (i = 0; i < -1 - FirstLowInvalidHeapAttributeNumber; i++)
{
/* Fill in the correct relation OID */
/* HACK: we are writing on static data here */
(*dpp)->attrelid = new_rel_oid;
/* Unneeded since they should be OK in the constant data anyway */
- /* (*dpp)->attdispersion = 0; */
+ /* (*dpp)->attstattarget = 0; */
/* (*dpp)->attcacheoff = -1; */
tup = heap_addheader(Natts_pg_attribute,
@@ -669,8 +661,23 @@ AddNewRelationTuple(Relation pg_class_desc,
* save. (NOTE: CREATE INDEX inserts the same bogus estimates if it
* finds the relation has 0 rows and pages. See index.c.)
*/
- new_rel_reltup->relpages = 10; /* bogus estimates */
- new_rel_reltup->reltuples = 1000;
+ switch (relkind)
+ {
+ case RELKIND_RELATION:
+ case RELKIND_INDEX:
+ case RELKIND_TOASTVALUE:
+ new_rel_reltup->relpages = 10; /* bogus estimates */
+ new_rel_reltup->reltuples = 1000;
+ break;
+ case RELKIND_SEQUENCE:
+ new_rel_reltup->relpages = 1;
+ new_rel_reltup->reltuples = 1;
+ break;
+ default: /* views, etc */
+ new_rel_reltup->relpages = 0;
+ new_rel_reltup->reltuples = 0;
+ break;
+ }
new_rel_reltup->relowner = GetUserId();
new_rel_reltup->reltype = new_type_oid;
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 2adb30e1ed8..5eefab11489 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.145 2001/04/02 14:34:25 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.146 2001/05/07 00:43:17 tgl Exp $
*
*
* INTERFACE ROUTINES
@@ -55,7 +55,7 @@
*/
#define AVG_ATTR_SIZE 8
#define NTUPLES_PER_PAGE(natts) \
- ((BLCKSZ - MAXALIGN(sizeof (PageHeaderData))) / \
+ ((BLCKSZ - MAXALIGN(sizeof(PageHeaderData))) / \
((natts) * AVG_ATTR_SIZE + MAXALIGN(sizeof(HeapTupleHeaderData))))
/* non-export function prototypes */
@@ -99,39 +99,6 @@ IsReindexProcessing(void)
}
/* ----------------------------------------------------------------
- * sysatts is a structure containing attribute tuple forms
- * for system attributes (numbered -1, -2, ...). This really
- * should be generated or eliminated or moved elsewhere. -cim 1/19/91
- *
- * typedef struct FormData_pg_attribute {
- * Oid attrelid;
- * NameData attname;
- * Oid atttypid;
- * uint32 attnvals;
- * int16 attlen;
- * AttrNumber attnum;
- * uint32 attnelems;
- * int32 attcacheoff;
- * int32 atttypmod;
- * bool attbyval;
- * bool attisset;
- * char attalign;
- * bool attnotnull;
- * bool atthasdef;
- * } FormData_pg_attribute;
- *
- * ----------------------------------------------------------------
- */
-static FormData_pg_attribute sysatts[] = {
- {0, {"ctid"}, TIDOID, 0, 6, -1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0'},
- {0, {"oid"}, OIDOID, 0, 4, -2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
- {0, {"xmin"}, XIDOID, 0, 4, -3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
- {0, {"cmin"}, CIDOID, 0, 4, -4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
- {0, {"xmax"}, XIDOID, 0, 4, -5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
- {0, {"cmax"}, CIDOID, 0, 4, -6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-};
-
-/* ----------------------------------------------------------------
* GetHeapRelationOid
* ----------------------------------------------------------------
*/
@@ -250,7 +217,6 @@ ConstructTupleDescriptor(Relation heapRelation,
for (i = 0; i < numatts; i++)
{
AttrNumber atnum; /* attributeNumber[attributeOffset] */
- AttrNumber atind;
Form_pg_attribute from;
Form_pg_attribute to;
@@ -264,16 +230,9 @@ ConstructTupleDescriptor(Relation heapRelation,
{
/*
- * here we are indexing on a system attribute (-1...-n) so we
- * convert atnum into a usable index 0...n-1 so we can use it
- * to dereference the array sysatts[] which stores tuple
- * descriptor information for system attributes.
+ * here we are indexing on a system attribute (-1...-n)
*/
- if (atnum <= FirstLowInvalidHeapAttributeNumber || atnum >= 0)
- elog(ERROR, "Cannot create index on system attribute: attribute number out of range (%d)", atnum);
- atind = (-atnum) - 1;
-
- from = &sysatts[atind];
+ from = SystemAttributeDefinition(atnum);
}
else
{
@@ -284,9 +243,8 @@ ConstructTupleDescriptor(Relation heapRelation,
if (atnum > natts)
elog(ERROR, "Cannot create index: attribute %d does not exist",
atnum);
- atind = AttrNumberGetAttrOffset(atnum);
- from = heapTupDesc->attrs[atind];
+ from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
}
/*
@@ -303,10 +261,10 @@ ConstructTupleDescriptor(Relation heapRelation,
*/
to->attnum = i + 1;
- to->attdispersion = 0.0;
+ to->attstattarget = 0;
+ to->attcacheoff = -1;
to->attnotnull = false;
to->atthasdef = false;
- to->attcacheoff = -1;
/*
* We do not yet have the correct relation OID for the index, so
@@ -1542,10 +1500,14 @@ setNewRelfilenode(Relation relation)
/* ----------------
* UpdateStats
+ *
+ * Update pg_class' relpages and reltuples statistics for the given relation
+ * (which can be either a table or an index). Note that this is not used
+ * in the context of VACUUM.
* ----------------
*/
void
-UpdateStats(Oid relid, long reltuples)
+UpdateStats(Oid relid, double reltuples)
{
Relation whichRel;
Relation pg_class;
@@ -1636,6 +1598,10 @@ UpdateStats(Oid relid, long reltuples)
* with zero size statistics until a VACUUM is done. The optimizer
* will generate very bad plans if the stats claim the table is empty
* when it is actually sizable. See also CREATE TABLE in heap.c.
+ *
+ * Note: this path is also taken during bootstrap, because bootstrap.c
+ * passes reltuples = 0 after loading a table. We have to estimate some
+ * number for reltuples based on the actual number of pages.
*/
relpages = RelationGetNumberOfBlocks(whichRel);
@@ -1689,15 +1655,15 @@ UpdateStats(Oid relid, long reltuples)
for (i = 0; i < Natts_pg_class; i++)
{
- nulls[i] = heap_attisnull(tuple, i + 1) ? 'n' : ' ';
+ nulls[i] = ' ';
replace[i] = ' ';
values[i] = (Datum) NULL;
}
replace[Anum_pg_class_relpages - 1] = 'r';
- values[Anum_pg_class_relpages - 1] = (Datum) relpages;
+ values[Anum_pg_class_relpages - 1] = Int32GetDatum(relpages);
replace[Anum_pg_class_reltuples - 1] = 'r';
- values[Anum_pg_class_reltuples - 1] = (Datum) reltuples;
+ values[Anum_pg_class_reltuples - 1] = Float4GetDatum((float4) reltuples);
newtup = heap_modifytuple(tuple, pg_class, values, nulls, replace);
simple_heap_update(pg_class, &tuple->t_self, newtup);
if (!IsIgnoringSystemIndexes())
@@ -1741,7 +1707,7 @@ DefaultBuild(Relation heapRelation,
TupleDesc heapDescriptor;
Datum datum[INDEX_MAX_KEYS];
char nullv[INDEX_MAX_KEYS];
- long reltuples,
+ double reltuples,
indtuples;
Node *predicate = indexInfo->ii_Predicate;
@@ -1796,7 +1762,7 @@ DefaultBuild(Relation heapRelation,
0, /* number of keys */
(ScanKey) NULL); /* scan key */
- reltuples = indtuples = 0;
+ reltuples = indtuples = 0.0;
/*
* for each tuple in the base relation, we create an index tuple and
@@ -1808,7 +1774,7 @@ DefaultBuild(Relation heapRelation,
{
MemoryContextReset(econtext->ecxt_per_tuple_memory);
- reltuples++;
+ reltuples += 1.0;
#ifndef OMIT_PARTIAL_INDEX
@@ -1821,7 +1787,7 @@ DefaultBuild(Relation heapRelation,
slot->val = heapTuple;
if (ExecQual((List *) oldPred, econtext, false))
{
- indtuples++;
+ indtuples += 1.0;
continue;
}
}
@@ -1838,7 +1804,7 @@ DefaultBuild(Relation heapRelation,
}
#endif /* OMIT_PARTIAL_INDEX */
- indtuples++;
+ indtuples += 1.0;
/*
* FormIndexDatum fills in its datum and null parameters with
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 88e56869da5..24cc7a8b254 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -8,19 +8,16 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.16 2001/03/22 06:16:11 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.17 2001/05/07 00:43:17 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
-#include <sys/types.h>
-#include <sys/file.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
+#include <math.h>
#include "access/heapam.h"
+#include "access/tuptoaster.h"
#include "catalog/catname.h"
#include "catalog/indexing.h"
#include "catalog/pg_operator.h"
@@ -29,43 +26,139 @@
#include "commands/vacuum.h"
#include "miscadmin.h"
#include "parser/parse_oper.h"
-#include "tcop/tcopprot.h"
#include "utils/acl.h"
#include "utils/builtins.h"
+#include "utils/datum.h"
#include "utils/fmgroids.h"
-#include "utils/inval.h"
#include "utils/syscache.h"
+#include "utils/tuplesort.h"
-#define swapLong(a,b) {long tmp; tmp=a; a=b; b=tmp;}
-#define swapInt(a,b) {int tmp; tmp=a; a=b; b=tmp;}
-#define swapDatum(a,b) {Datum tmp; tmp=a; a=b; b=tmp;}
-#define VacAttrStatsEqValid(stats) ( stats->f_cmpeq.fn_addr != NULL )
-#define VacAttrStatsLtGtValid(stats) ( stats->f_cmplt.fn_addr != NULL && \
- stats->f_cmpgt.fn_addr != NULL && \
- RegProcedureIsValid(stats->outfunc) )
+/*
+ * Analysis algorithms supported
+ */
+typedef enum {
+ ALG_MINIMAL = 1, /* Compute only most-common-values */
+ ALG_SCALAR /* Compute MCV, histogram, sort correlation */
+} AlgCode;
+
+/*
+ * To avoid consuming too much memory during analysis and/or too much space
+ * in the resulting pg_statistic rows, we ignore varlena datums that are wider
+ * than WIDTH_THRESHOLD (after detoasting!). This is legitimate for MCV
+ * and distinct-value calculations since a wide value is unlikely to be
+ * duplicated at all, much less be a most-common value. For the same reason,
+ * ignoring wide values will not affect our estimates of histogram bin
+ * boundaries very much.
+ */
+#define WIDTH_THRESHOLD 256
+
+/*
+ * We build one of these structs for each attribute (column) that is to be
+ * analyzed. The struct and subsidiary data are in TransactionCommandContext,
+ * so they live until the end of the ANALYZE operation.
+ */
+typedef struct
+{
+ /* These fields are set up by examine_attribute */
+ int attnum; /* attribute number */
+ AlgCode algcode; /* Which algorithm to use for this column */
+ int minrows; /* Minimum # of rows needed for stats */
+ Form_pg_attribute attr; /* copy of pg_attribute row for column */
+ Form_pg_type attrtype; /* copy of pg_type row for column */
+ Oid eqopr; /* '=' operator for datatype, if any */
+ Oid eqfunc; /* and associated function */
+ Oid ltopr; /* '<' operator for datatype, if any */
+
+ /* These fields are filled in by the actual statistics-gathering routine */
+ bool stats_valid;
+ float4 stanullfrac; /* fraction of entries that are NULL */
+ int4 stawidth; /* average width */
+ float4 stadistinct; /* # distinct values */
+ int2 stakind[STATISTIC_NUM_SLOTS];
+ Oid staop[STATISTIC_NUM_SLOTS];
+ int numnumbers[STATISTIC_NUM_SLOTS];
+ float4 *stanumbers[STATISTIC_NUM_SLOTS];
+ int numvalues[STATISTIC_NUM_SLOTS];
+ Datum *stavalues[STATISTIC_NUM_SLOTS];
+} VacAttrStats;
+
+
+typedef struct
+{
+ Datum value; /* a data value */
+ int tupno; /* position index for tuple it came from */
+} ScalarItem;
+
+typedef struct
+{
+ int count; /* # of duplicates */
+ int first; /* values[] index of first occurrence */
+} ScalarMCVItem;
+
+
+#define swapInt(a,b) {int _tmp; _tmp=a; a=b; b=_tmp;}
+#define swapDatum(a,b) {Datum _tmp; _tmp=a; a=b; b=_tmp;}
-static void attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple);
-static void bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len);
-static void update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats);
-static void del_stats(Oid relid, int attcnt, int *attnums);
+
+static int MESSAGE_LEVEL;
+
+/* context information for compare_scalars() */
+static FmgrInfo *datumCmpFn;
+static SortFunctionKind datumCmpFnKind;
+static int *datumCmpTupnoLink;
+
+
+static VacAttrStats *examine_attribute(Relation onerel, int attnum);
+static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
+ int targrows, long *totalrows);
+static double random_fract(void);
+static double init_selection_state(int n);
+static long select_next_random_record(long t, int n, double *stateptr);
+static int compare_rows(const void *a, const void *b);
+static int compare_scalars(const void *a, const void *b);
+static int compare_mcvs(const void *a, const void *b);
+static OffsetNumber get_page_max_offset(Relation relation,
+ BlockNumber blocknumber);
+static void compute_minimal_stats(VacAttrStats *stats,
+ TupleDesc tupDesc, long totalrows,
+ HeapTuple *rows, int numrows);
+static void compute_scalar_stats(VacAttrStats *stats,
+ TupleDesc tupDesc, long totalrows,
+ HeapTuple *rows, int numrows);
+static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);
/*
- * analyze_rel() -- analyze relation
+ * analyze_rel() -- analyze one relation
*/
void
-analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
+analyze_rel(Oid relid, VacuumStmt *vacstmt)
{
- HeapTuple tuple;
Relation onerel;
- int32 i;
- int attr_cnt,
- *attnums = NULL;
Form_pg_attribute *attr;
- VacAttrStats *vacattrstats;
- HeapScanDesc scan;
+ int attr_cnt,
+ tcnt,
+ i;
+ VacAttrStats **vacattrstats;
+ int targrows,
+ numrows;
+ long totalrows;
+ HeapTuple *rows;
+ HeapTuple tuple;
+
+ if (vacstmt->verbose)
+ MESSAGE_LEVEL = NOTICE;
+ else
+ MESSAGE_LEVEL = DEBUG;
+ /*
+ * Begin a transaction for analyzing this relation.
+ *
+ * Note: All memory allocated during ANALYZE will live in
+ * TransactionCommandContext or a subcontext thereof, so it will
+ * all be released by transaction commit at the end of this routine.
+ */
StartTransactionCommand();
/*
@@ -76,7 +169,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
/*
* Race condition -- if the pg_class tuple has gone away since the
- * last time we saw it, we don't need to vacuum it.
+ * last time we saw it, we don't need to process it.
*/
tuple = SearchSysCache(RELOID,
ObjectIdGetDatum(relid),
@@ -88,8 +181,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
}
/*
- * We can VACUUM ANALYZE any table except pg_statistic. see
- * update_relstats
+ * We can ANALYZE any table except pg_statistic. See update_attstats
*/
if (strcmp(NameStr(((Form_pg_class) GETSTRUCT(tuple))->relname),
StatisticRelationName) == 0)
@@ -100,586 +192,1466 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
}
ReleaseSysCache(tuple);
+ /*
+ * Open the class, getting only a read lock on it, and check permissions
+ */
onerel = heap_open(relid, AccessShareLock);
if (!pg_ownercheck(GetUserId(), RelationGetRelationName(onerel),
RELNAME))
{
-
- /*
- * we already did an elog during vacuum elog(NOTICE, "Skipping
- * \"%s\" --- only table owner can VACUUM it",
- * RelationGetRelationName(onerel));
- */
+ /* No need for a notice if we already complained during VACUUM */
+ if (!vacstmt->vacuum)
+ elog(NOTICE, "Skipping \"%s\" --- only table owner can ANALYZE it",
+ RelationGetRelationName(onerel));
heap_close(onerel, NoLock);
CommitTransactionCommand();
return;
}
- elog(MESSAGE_LEVEL, "Analyzing...");
+ elog(MESSAGE_LEVEL, "Analyzing %s", RelationGetRelationName(onerel));
- attr_cnt = onerel->rd_att->natts;
+ /*
+ * Determine which columns to analyze
+ *
+ * Note that system attributes are never analyzed.
+ */
attr = onerel->rd_att->attrs;
+ attr_cnt = onerel->rd_att->natts;
- if (anal_cols2 != NIL)
+ if (vacstmt->va_cols != NIL)
{
- int tcnt = 0;
List *le;
- if (length(anal_cols2) > attr_cnt)
- elog(ERROR, "vacuum: too many attributes specified for relation %s",
- RelationGetRelationName(onerel));
- attnums = (int *) palloc(attr_cnt * sizeof(int));
- foreach(le, anal_cols2)
+ vacattrstats = (VacAttrStats **) palloc(length(vacstmt->va_cols) *
+ sizeof(VacAttrStats *));
+ tcnt = 0;
+ foreach(le, vacstmt->va_cols)
{
- char *col = (char *) lfirst(le);
+ char *col = strVal(lfirst(le));
for (i = 0; i < attr_cnt; i++)
{
if (namestrcmp(&(attr[i]->attname), col) == 0)
break;
}
- if (i < attr_cnt) /* found */
- attnums[tcnt++] = i;
- else
- {
- elog(ERROR, "vacuum: there is no attribute %s in %s",
+ if (i >= attr_cnt)
+ elog(ERROR, "ANALYZE: there is no attribute %s in %s",
col, RelationGetRelationName(onerel));
- }
+ vacattrstats[tcnt] = examine_attribute(onerel, i+1);
+ if (vacattrstats[tcnt] != NULL)
+ tcnt++;
+ }
+ attr_cnt = tcnt;
+ }
+ else
+ {
+ vacattrstats = (VacAttrStats **) palloc(attr_cnt *
+ sizeof(VacAttrStats *));
+ tcnt = 0;
+ for (i = 0; i < attr_cnt; i++)
+ {
+ vacattrstats[tcnt] = examine_attribute(onerel, i+1);
+ if (vacattrstats[tcnt] != NULL)
+ tcnt++;
}
attr_cnt = tcnt;
}
- vacattrstats = (VacAttrStats *) palloc(attr_cnt * sizeof(VacAttrStats));
+ /*
+ * Quit if no analyzable columns
+ */
+ if (attr_cnt <= 0)
+ {
+ heap_close(onerel, NoLock);
+ CommitTransactionCommand();
+ return;
+ }
+ /*
+ * Determine how many rows we need to sample, using the worst case
+ * from all analyzable columns. We use a lower bound of 100 rows
+ * to avoid possible overflow in Vitter's algorithm.
+ */
+ targrows = 100;
for (i = 0; i < attr_cnt; i++)
{
- Operator func_operator;
- VacAttrStats *stats;
-
- stats = &vacattrstats[i];
- stats->attr = palloc(ATTRIBUTE_TUPLE_SIZE);
- memcpy(stats->attr, attr[((attnums) ? attnums[i] : i)],
- ATTRIBUTE_TUPLE_SIZE);
- stats->best = stats->guess1 = stats->guess2 = 0;
- stats->max = stats->min = 0;
- stats->best_len = stats->guess1_len = stats->guess2_len = 0;
- stats->max_len = stats->min_len = 0;
- stats->initialized = false;
- stats->best_cnt = stats->guess1_cnt = stats->guess1_hits = stats->guess2_hits = 0;
- stats->max_cnt = stats->min_cnt = stats->null_cnt = stats->nonnull_cnt = 0;
-
- func_operator = compatible_oper("=",
- stats->attr->atttypid,
- stats->attr->atttypid,
- true);
- if (func_operator != NULL)
- {
- fmgr_info(oprfuncid(func_operator), &(stats->f_cmpeq));
- ReleaseSysCache(func_operator);
- }
- else
- stats->f_cmpeq.fn_addr = NULL;
+ if (targrows < vacattrstats[i]->minrows)
+ targrows = vacattrstats[i]->minrows;
+ }
+
+ /*
+ * Acquire the sample rows
+ */
+ rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
+ numrows = acquire_sample_rows(onerel, rows, targrows, &totalrows);
- func_operator = compatible_oper("<",
- stats->attr->atttypid,
- stats->attr->atttypid,
- true);
- if (func_operator != NULL)
+ /*
+ * If we are running a standalone ANALYZE, update pages/tuples stats
+ * in pg_class. We have the accurate page count from heap_beginscan,
+ * but only an approximate number of tuples; therefore, if we are
+ * part of VACUUM ANALYZE do *not* overwrite the accurate count already
+ * inserted by VACUUM.
+ */
+ if (!vacstmt->vacuum)
+ vac_update_relstats(RelationGetRelid(onerel),
+ onerel->rd_nblocks,
+ (double) totalrows,
+ RelationGetForm(onerel)->relhasindex);
+
+ /*
+ * Compute the statistics. Temporary results during the calculations
+ * for each column are stored in a child context. The calc routines
+ * are responsible to make sure that whatever they store into the
+ * VacAttrStats structure is allocated in TransactionCommandContext.
+ */
+ if (numrows > 0)
+ {
+ MemoryContext col_context,
+ old_context;
+
+ col_context = AllocSetContextCreate(CurrentMemoryContext,
+ "Analyze Column",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ old_context = MemoryContextSwitchTo(col_context);
+ for (i = 0; i < attr_cnt; i++)
{
- fmgr_info(oprfuncid(func_operator), &(stats->f_cmplt));
- stats->op_cmplt = oprid(func_operator);
- ReleaseSysCache(func_operator);
+ switch (vacattrstats[i]->algcode)
+ {
+ case ALG_MINIMAL:
+ compute_minimal_stats(vacattrstats[i],
+ onerel->rd_att, totalrows,
+ rows, numrows);
+ break;
+ case ALG_SCALAR:
+ compute_scalar_stats(vacattrstats[i],
+ onerel->rd_att, totalrows,
+ rows, numrows);
+ break;
+ }
+ MemoryContextResetAndDeleteChildren(col_context);
}
- else
+ MemoryContextSwitchTo(old_context);
+ MemoryContextDelete(col_context);
+
+ /*
+ * Emit the completed stats rows into pg_statistic, replacing any
+ * previous statistics for the target columns. (If there are stats
+ * in pg_statistic for columns we didn't process, we leave them alone.)
+ */
+ update_attstats(relid, attr_cnt, vacattrstats);
+ }
+
+ /*
+ * Close source relation now, but keep lock so that no one deletes it
+ * before we commit. (If someone did, they'd fail to clean up the
+ * entries we made in pg_statistic.)
+ */
+ heap_close(onerel, NoLock);
+
+ /* Commit and release working memory */
+ CommitTransactionCommand();
+}
+
+/*
+ * examine_attribute -- pre-analysis of a single column
+ *
+ * Determine whether the column is analyzable; if so, create and initialize
+ * a VacAttrStats struct for it. If not, return NULL.
+ */
+static VacAttrStats *
+examine_attribute(Relation onerel, int attnum)
+{
+ Form_pg_attribute attr = onerel->rd_att->attrs[attnum-1];
+ Operator func_operator;
+ Oid oprrest;
+ HeapTuple typtuple;
+ Oid eqopr = InvalidOid;
+ Oid eqfunc = InvalidOid;
+ Oid ltopr = InvalidOid;
+ VacAttrStats *stats;
+
+ /* Don't analyze column if user has specified not to */
+ if (attr->attstattarget <= 0)
+ return NULL;
+
+ /* If column has no "=" operator, we can't do much of anything */
+ func_operator = compatible_oper("=",
+ attr->atttypid,
+ attr->atttypid,
+ true);
+ if (func_operator != NULL)
+ {
+ oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest;
+ if (oprrest == F_EQSEL)
{
- stats->f_cmplt.fn_addr = NULL;
- stats->op_cmplt = InvalidOid;
+ eqopr = oprid(func_operator);
+ eqfunc = oprfuncid(func_operator);
}
+ ReleaseSysCache(func_operator);
+ }
+ if (!OidIsValid(eqfunc))
+ return NULL;
- func_operator = compatible_oper(">",
- stats->attr->atttypid,
- stats->attr->atttypid,
- true);
- if (func_operator != NULL)
+ /*
+ * If we have "=" then we're at least able to do the minimal algorithm,
+ * so start filling in a VacAttrStats struct.
+ */
+ stats = (VacAttrStats *) palloc(sizeof(VacAttrStats));
+ MemSet(stats, 0, sizeof(VacAttrStats));
+ stats->attnum = attnum;
+ stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_TUPLE_SIZE);
+ memcpy(stats->attr, attr, ATTRIBUTE_TUPLE_SIZE);
+ typtuple = SearchSysCache(TYPEOID,
+ ObjectIdGetDatum(attr->atttypid),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(typtuple))
+ elog(ERROR, "cache lookup of type %u failed", attr->atttypid);
+ stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
+ memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
+ ReleaseSysCache(typtuple);
+ stats->eqopr = eqopr;
+ stats->eqfunc = eqfunc;
+
+ /* Is there a "<" operator with suitable semantics? */
+ func_operator = compatible_oper("<",
+ attr->atttypid,
+ attr->atttypid,
+ true);
+ if (func_operator != NULL)
+ {
+ oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest;
+ if (oprrest == F_SCALARLTSEL)
{
- fmgr_info(oprfuncid(func_operator), &(stats->f_cmpgt));
- ReleaseSysCache(func_operator);
+ ltopr = oprid(func_operator);
}
- else
- stats->f_cmpgt.fn_addr = NULL;
+ ReleaseSysCache(func_operator);
+ }
+ stats->ltopr = ltopr;
+
+ /*
+ * Determine the algorithm to use (this will get more complicated later)
+ */
+ if (OidIsValid(ltopr))
+ {
+ /* Seems to be a scalar datatype */
+ stats->algcode = ALG_SCALAR;
+ /*--------------------
+ * The following choice of minrows is based on the paper
+ * "Random sampling for histogram construction: how much is enough?"
+ * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
+ * Proceedings of ACM SIGMOD International Conference on Management
+ * of Data, 1998, Pages 436-447. Their Corollary 1 to Theorem 5
+ * says that for table size n, histogram size k, maximum relative
+ * error in bin size f, and error probability gamma, the minimum
+ * random sample size is
+ * r = 4 * k * ln(2*n/gamma) / f^2
+ * Taking f = 0.5, gamma = 0.01, n = 1 million rows, we obtain
+ * r = 305.82 * k
+ * Note that because of the log function, the dependence on n is
+ * quite weak; even at n = 1 billion, a 300*k sample gives <= 0.59
+ * bin size error with probability 0.99. So there's no real need to
+ * scale for n, which is a good thing because we don't necessarily
+ * know it at this point.
+ *--------------------
+ */
+ stats->minrows = 300 * attr->attstattarget;
+ }
+ else
+ {
+ /* Can't do much but the minimal stuff */
+ stats->algcode = ALG_MINIMAL;
+ /* Might as well use the same minrows as above */
+ stats->minrows = 300 * attr->attstattarget;
+ }
+
+ return stats;
+}
- tuple = SearchSysCache(TYPEOID,
- ObjectIdGetDatum(stats->attr->atttypid),
- 0, 0, 0);
- if (HeapTupleIsValid(tuple))
+/*
+ * acquire_sample_rows -- acquire a random sample of rows from the table
+ *
+ * Up to targrows rows are collected (if there are fewer than that many
+ * rows in the table, all rows are collected). When the table is larger
+ * than targrows, a truly random sample is collected: every row has an
+ * equal chance of ending up in the final sample.
+ *
+ * We also estimate the total number of rows in the table, and return that
+ * into *totalrows.
+ *
+ * The returned list of tuples is in order by physical position in the table.
+ * (We will rely on this later to derive correlation estimates.)
+ */
+static int
+acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
+ long *totalrows)
+{
+ int numrows = 0;
+ HeapScanDesc scan;
+ HeapTuple tuple;
+ ItemPointer lasttuple;
+ BlockNumber lastblock,
+ estblock;
+ OffsetNumber lastoffset;
+ int numest;
+ double tuplesperpage;
+ long t;
+ double rstate;
+
+ Assert(targrows > 1);
+ /*
+ * Do a simple linear scan until we reach the target number of rows.
+ */
+ scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL);
+ while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+ {
+ rows[numrows++] = heap_copytuple(tuple);
+ if (numrows >= targrows)
+ break;
+ }
+ heap_endscan(scan);
+ /*
+ * If we ran out of tuples then we're done, no matter how few we
+ * collected. No sort is needed, since they're already in order.
+ */
+ if (!HeapTupleIsValid(tuple))
+ {
+ *totalrows = numrows;
+ return numrows;
+ }
+ /*
+ * Otherwise, start replacing tuples in the sample until we reach the
+ * end of the relation. This algorithm is from Jeff Vitter's paper
+ * (see full citation below). It works by repeatedly computing the number
+ * of the next tuple we want to fetch, which will replace a randomly
+ * chosen element of the reservoir (current set of tuples). At all times
+ * the reservoir is a true random sample of the tuples we've passed over
+ * so far, so when we fall off the end of the relation we're done.
+ *
+ * A slight difficulty is that since we don't want to fetch tuples or even
+ * pages that we skip over, it's not possible to fetch *exactly* the N'th
+ * tuple at each step --- we don't know how many valid tuples are on
+ * the skipped pages. We handle this by assuming that the average number
+ * of valid tuples/page on the pages already scanned over holds good for
+ * the rest of the relation as well; this lets us estimate which page
+ * the next tuple should be on and its position in the page. Then we
+ * fetch the first valid tuple at or after that position, being careful
+ * not to use the same tuple twice. This approach should still give a
+ * good random sample, although it's not perfect.
+ */
+ lasttuple = &(rows[numrows-1]->t_self);
+ lastblock = ItemPointerGetBlockNumber(lasttuple);
+ lastoffset = ItemPointerGetOffsetNumber(lasttuple);
+ /*
+ * If possible, estimate tuples/page using only completely-scanned pages.
+ */
+ for (numest = numrows; numest > 0; numest--)
+ {
+ if (ItemPointerGetBlockNumber(&(rows[numest-1]->t_self)) != lastblock)
+ break;
+ }
+ if (numest == 0)
+ {
+ numest = numrows; /* don't have a full page? */
+ estblock = lastblock + 1;
+ }
+ else
+ {
+ estblock = lastblock;
+ }
+ tuplesperpage = (double) numest / (double) estblock;
+
+ t = numrows; /* t is the # of records processed so far */
+ rstate = init_selection_state(targrows);
+ for (;;)
+ {
+ double targpos;
+ BlockNumber targblock;
+ OffsetNumber targoffset,
+ maxoffset;
+
+ t = select_next_random_record(t, targrows, &rstate);
+ /* Try to read the t'th record in the table */
+ targpos = (double) t / tuplesperpage;
+ targblock = (BlockNumber) targpos;
+ targoffset = ((int) (targpos - targblock) * tuplesperpage) +
+ FirstOffsetNumber;
+ /* Make sure we are past the last selected record */
+ if (targblock <= lastblock)
{
- stats->outfunc = ((Form_pg_type) GETSTRUCT(tuple))->typoutput;
- stats->typelem = ((Form_pg_type) GETSTRUCT(tuple))->typelem;
- ReleaseSysCache(tuple);
+ targblock = lastblock;
+ if (targoffset <= lastoffset)
+ targoffset = lastoffset + 1;
}
- else
+ /* Loop to find first valid record at or after given position */
+ pageloop:;
+ /*
+ * Have we fallen off the end of the relation? (We rely on
+ * heap_beginscan to have updated rd_nblocks.)
+ */
+ if (targblock >= onerel->rd_nblocks)
+ break;
+ maxoffset = get_page_max_offset(onerel, targblock);
+ for (;;)
{
- stats->outfunc = InvalidOid;
- stats->typelem = InvalidOid;
+ HeapTupleData targtuple;
+ Buffer targbuffer;
+
+ if (targoffset > maxoffset)
+ {
+ /* Fell off end of this page, try next */
+ targblock++;
+ targoffset = FirstOffsetNumber;
+ goto pageloop;
+ }
+ ItemPointerSet(&targtuple.t_self, targblock, targoffset);
+ heap_fetch(onerel, SnapshotNow, &targtuple, &targbuffer);
+ if (targtuple.t_data != NULL)
+ {
+ /*
+ * Found a suitable tuple, so save it, replacing one old
+ * tuple at random
+ */
+ int k = (int) (targrows * random_fract());
+
+ Assert(k >= 0 && k < targrows);
+ heap_freetuple(rows[k]);
+ rows[k] = heap_copytuple(&targtuple);
+ ReleaseBuffer(targbuffer);
+ lastblock = targblock;
+ lastoffset = targoffset;
+ break;
+ }
+ /* this tuple is dead, so advance to next one on same page */
+ targoffset++;
}
}
- /* delete existing pg_statistic rows for relation */
- del_stats(relid, ((attnums) ? attr_cnt : 0), attnums);
-
- /* scan relation to gather statistics */
- scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL);
- while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
- attr_stats(onerel, attr_cnt, vacattrstats, tuple);
+ /*
+ * Now we need to sort the collected tuples by position (itempointer).
+ */
+ qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
- heap_endscan(scan);
+ /*
+ * Estimate total number of valid rows in relation.
+ */
+ *totalrows = (long) (onerel->rd_nblocks * tuplesperpage + 0.5);
- /* close rel, but keep lock so it doesn't go away before commit */
- heap_close(onerel, NoLock);
+ return numrows;
+}
- /* update statistics in pg_class */
- update_attstats(relid, attr_cnt, vacattrstats);
+/* Select a random value R uniformly distributed in 0 < R < 1 */
+static double
+random_fract(void)
+{
+ long z;
- CommitTransactionCommand();
+ /* random() can produce endpoint values, try again if so */
+ do
+ {
+ z = random();
+ } while (! (z > 0 && z < MAX_RANDOM_VALUE));
+ return (double) z / (double) MAX_RANDOM_VALUE;
}
/*
- * attr_stats() -- compute column statistics used by the planner
+ * These two routines embody Algorithm Z from "Random sampling with a
+ * reservoir" by Jeffrey S. Vitter, in ACM Trans. Math. Softw. 11, 1
+ * (Mar. 1985), Pages 37-57. While Vitter describes his algorithm in terms
+ * of the count S of records to skip before processing another record,
+ * it is convenient to work primarily with t, the index (counting from 1)
+ * of the last record processed and next record to process. The only extra
+ * state needed between calls is W, a random state variable.
*
- * We compute the column min, max, null and non-null counts.
- * Plus we attempt to find the count of the value that occurs most
- * frequently in each column. These figures are used to compute
- * the selectivity of the column.
+ * init_selection_state computes the initial W value.
*
- * We use a three-bucket cache to get the most frequent item.
- * The 'guess' buckets count hits. A cache miss causes guess1
- * to get the most hit 'guess' item in the most recent cycle, and
- * the new item goes into guess2. Whenever the total count of hits
- * of a 'guess' entry is larger than 'best', 'guess' becomes 'best'.
+ * Given that we've already processed t records (t >= n),
+ * select_next_random_record determines the number of the next record to
+ * process.
+ */
+static double
+init_selection_state(int n)
+{
+ /* Initial value of W (for use when Algorithm Z is first applied) */
+ return exp(- log(random_fract())/n);
+}
+
+static long
+select_next_random_record(long t, int n, double *stateptr)
+{
+ /* The magic constant here is T from Vitter's paper */
+ if (t <= (22 * n))
+ {
+ /* Process records using Algorithm X until t is large enough */
+ double V,
+ quot;
+
+ V = random_fract(); /* Generate V */
+ t++;
+ quot = (double) (t - n) / (double) t;
+ /* Find min S satisfying (4.1) */
+ while (quot > V)
+ {
+ t++;
+ quot *= (double) (t - n) / (double) t;
+ }
+ }
+ else
+ {
+ /* Now apply Algorithm Z */
+ double W = *stateptr;
+ long term = t - n + 1;
+ int S;
+
+ for (;;)
+ {
+ long numer,
+ numer_lim,
+ denom;
+ double U,
+ X,
+ lhs,
+ rhs,
+ y,
+ tmp;
+
+ /* Generate U and X */
+ U = random_fract();
+ X = t * (W - 1.0);
+ S = X; /* S is tentatively set to floor(X) */
+ /* Test if U <= h(S)/cg(X) in the manner of (6.3) */
+ tmp = (double) (t + 1) / (double) term;
+ lhs = exp(log(((U * tmp * tmp) * (term + S))/(t + X))/n);
+ rhs = (((t + X)/(term + S)) * term)/t;
+ if (lhs <= rhs)
+ {
+ W = rhs/lhs;
+ break;
+ }
+ /* Test if U <= f(S)/cg(X) */
+ y = (((U * (t + 1))/term) * (t + S + 1))/(t + X);
+ if (n < S)
+ {
+ denom = t;
+ numer_lim = term + S;
+ }
+ else
+ {
+ denom = t - n + S;
+ numer_lim = t + 1;
+ }
+ for (numer = t + S; numer >= numer_lim; numer--)
+ {
+ y *= (double) numer / (double) denom;
+ denom--;
+ }
+ W = exp(- log(random_fract())/n); /* Generate W in advance */
+ if (exp(log(y)/n) <= (t + X)/t)
+ break;
+ }
+ t += S + 1;
+ *stateptr = W;
+ }
+ return t;
+}
+
+/*
+ * qsort comparator for sorting rows[] array
+ */
+static int
+compare_rows(const void *a, const void *b)
+{
+ HeapTuple ha = * (HeapTuple *) a;
+ HeapTuple hb = * (HeapTuple *) b;
+ BlockNumber ba = ItemPointerGetBlockNumber(&ha->t_self);
+ OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
+ BlockNumber bb = ItemPointerGetBlockNumber(&hb->t_self);
+ OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
+
+ if (ba < bb)
+ return -1;
+ if (ba > bb)
+ return 1;
+ if (oa < ob)
+ return -1;
+ if (oa > ob)
+ return 1;
+ return 0;
+}
+
+/*
+ * Discover the largest valid tuple offset number on the given page
+ *
+ * This code probably ought to live in some other module.
+ */
+static OffsetNumber
+get_page_max_offset(Relation relation, BlockNumber blocknumber)
+{
+ Buffer buffer;
+ Page p;
+ OffsetNumber offnum;
+
+ buffer = ReadBuffer(relation, blocknumber);
+ if (!BufferIsValid(buffer))
+ elog(ERROR, "get_page_max_offset: %s relation: ReadBuffer(%ld) failed",
+ RelationGetRelationName(relation), (long) blocknumber);
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+ p = BufferGetPage(buffer);
+ offnum = PageGetMaxOffsetNumber(p);
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ ReleaseBuffer(buffer);
+ return offnum;
+}
+
+
+/*
+ * compute_minimal_stats() -- compute minimal column statistics
*
- * This method works perfectly for columns with unique values, and columns
- * with only two unique values, plus nulls.
+ * We use this when we can find only an "=" operator for the datatype.
*
- * It becomes less perfect as the number of unique values increases and
- * their distribution in the table becomes more random.
+ * We determine the fraction of non-null rows, the average width, the
+ * most common values, and the (estimated) number of distinct values.
*
+ * The most common values are determined by brute force: we keep a list
+ * of previously seen values, ordered by number of times seen, as we scan
+ * the samples. A newly seen value is inserted just after the last
+ * multiply-seen value, causing the bottommost (oldest) singly-seen value
+ * to drop off the list. The accuracy of this method, and also its cost,
+ * depend mainly on the length of the list we are willing to keep.
*/
static void
-attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple)
+compute_minimal_stats(VacAttrStats *stats,
+ TupleDesc tupDesc, long totalrows,
+ HeapTuple *rows, int numrows)
{
int i;
- TupleDesc tupDesc = onerel->rd_att;
-
- for (i = 0; i < attr_cnt; i++)
+ int null_cnt = 0;
+ int nonnull_cnt = 0;
+ int toowide_cnt = 0;
+ double total_width = 0;
+ bool is_varlena = (!stats->attr->attbyval &&
+ stats->attr->attlen == -1);
+ FmgrInfo f_cmpeq;
+ typedef struct
+ {
+ Datum value;
+ int count;
+ } TrackItem;
+ TrackItem *track;
+ int track_cnt,
+ track_max;
+ int num_mcv = stats->attr->attstattarget;
+
+ /* We track up to 2*n values for an n-element MCV list; but at least 10 */
+ track_max = 2 * num_mcv;
+ if (track_max < 10)
+ track_max = 10;
+ track = (TrackItem *) palloc(track_max * sizeof(TrackItem));
+ track_cnt = 0;
+
+ fmgr_info(stats->eqfunc, &f_cmpeq);
+
+ for (i = 0; i < numrows; i++)
{
- VacAttrStats *stats = &vacattrstats[i];
- Datum origvalue;
+ HeapTuple tuple = rows[i];
Datum value;
bool isnull;
- bool value_hit;
-
- if (!VacAttrStatsEqValid(stats))
- continue;
-
-#ifdef _DROP_COLUMN_HACK__
- if (COLUMN_IS_DROPPED(stats->attr))
- continue;
-#endif /* _DROP_COLUMN_HACK__ */
+ bool match;
+ int firstcount1,
+ j;
- origvalue = heap_getattr(tuple, stats->attr->attnum,
- tupDesc, &isnull);
+ value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
+ /* Check for null/nonnull */
if (isnull)
{
- stats->null_cnt++;
+ null_cnt++;
continue;
}
- stats->nonnull_cnt++;
+ nonnull_cnt++;
/*
- * If the value is toasted, detoast it to avoid repeated
- * detoastings and resultant memory leakage inside the comparison
- * routines.
+ * If it's a varlena field, add up widths for average width
+ * calculation. Note that if the value is toasted, we
+ * use the toasted width. We don't bother with this calculation
+ * if it's a fixed-width type.
*/
- if (!stats->attr->attbyval && stats->attr->attlen == -1)
- value = PointerGetDatum(PG_DETOAST_DATUM(origvalue));
- else
- value = origvalue;
-
- if (!stats->initialized)
+ if (is_varlena)
{
- bucketcpy(stats->attr, value, &stats->best, &stats->best_len);
- /* best_cnt gets incremented below */
- bucketcpy(stats->attr, value, &stats->guess1, &stats->guess1_len);
- stats->guess1_cnt = stats->guess1_hits = 1;
- bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
- stats->guess2_hits = 1;
- if (VacAttrStatsLtGtValid(stats))
+ total_width += VARSIZE(DatumGetPointer(value));
+ /*
+ * If the value is toasted, we want to detoast it just once to
+ * avoid repeated detoastings and resultant excess memory usage
+ * during the comparisons. Also, check to see if the value is
+ * excessively wide, and if so don't detoast at all --- just
+ * ignore the value.
+ */
+ if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
{
- bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
- bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
- /* min_cnt, max_cnt get incremented below */
+ toowide_cnt++;
+ continue;
}
- stats->initialized = true;
+ value = PointerGetDatum(PG_DETOAST_DATUM(value));
}
- if (VacAttrStatsLtGtValid(stats))
+ /*
+ * See if the value matches anything we're already tracking.
+ */
+ match = false;
+ firstcount1 = track_cnt;
+ for (j = 0; j < track_cnt; j++)
{
- if (DatumGetBool(FunctionCall2(&stats->f_cmplt,
- value, stats->min)))
+ if (DatumGetBool(FunctionCall2(&f_cmpeq, value, track[j].value)))
{
- bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
- stats->min_cnt = 1;
+ match = true;
+ break;
}
- else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
- value, stats->min)))
- stats->min_cnt++;
+ if (j < firstcount1 && track[j].count == 1)
+ firstcount1 = j;
+ }
- if (DatumGetBool(FunctionCall2(&stats->f_cmpgt,
- value, stats->max)))
+ if (match)
+ {
+ /* Found a match */
+ track[j].count++;
+ /* This value may now need to "bubble up" in the track list */
+ while (j > 0 && track[j].count > track[j-1].count)
{
- bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
- stats->max_cnt = 1;
+ swapDatum(track[j].value, track[j-1].value);
+ swapInt(track[j].count, track[j-1].count);
+ j--;
}
- else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
- value, stats->max)))
- stats->max_cnt++;
}
-
- value_hit = true;
- if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
- value, stats->best)))
- stats->best_cnt++;
- else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
- value, stats->guess1)))
+ else
{
- stats->guess1_cnt++;
- stats->guess1_hits++;
+ /* No match. Insert at head of count-1 list */
+ if (track_cnt < track_max)
+ track_cnt++;
+ for (j = track_cnt-1; j > firstcount1; j--)
+ {
+ track[j].value = track[j-1].value;
+ track[j].count = track[j-1].count;
+ }
+ if (firstcount1 < track_cnt)
+ {
+ track[firstcount1].value = value;
+ track[firstcount1].count = 1;
+ }
}
- else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
- value, stats->guess2)))
- stats->guess2_hits++;
+ }
+
+ /* We can only compute valid stats if we found some non-null values. */
+ if (nonnull_cnt > 0)
+ {
+ int nmultiple,
+ summultiple;
+
+ stats->stats_valid = true;
+ /* Do the simple null-frac and width stats */
+ stats->stanullfrac = (double) null_cnt / (double) numrows;
+ if (is_varlena)
+ stats->stawidth = total_width / (double) nonnull_cnt;
else
- value_hit = false;
+ stats->stawidth = stats->attrtype->typlen;
- if (stats->guess2_hits > stats->guess1_hits)
+ /* Count the number of values we found multiple times */
+ summultiple = 0;
+ for (nmultiple = 0; nmultiple < track_cnt; nmultiple++)
{
- swapDatum(stats->guess1, stats->guess2);
- swapInt(stats->guess1_len, stats->guess2_len);
- swapLong(stats->guess1_hits, stats->guess2_hits);
- stats->guess1_cnt = stats->guess1_hits;
+ if (track[nmultiple].count == 1)
+ break;
+ summultiple += track[nmultiple].count;
}
- if (stats->guess1_cnt > stats->best_cnt)
+
+ if (nmultiple == 0)
{
- swapDatum(stats->best, stats->guess1);
- swapInt(stats->best_len, stats->guess1_len);
- swapLong(stats->best_cnt, stats->guess1_cnt);
- stats->guess1_hits = 1;
- stats->guess2_hits = 1;
+ /* If we found no repeated values, assume it's a unique column */
+ stats->stadistinct = -1.0;
}
- if (!value_hit)
+ else if (track_cnt < track_max && toowide_cnt == 0 &&
+ nmultiple == track_cnt)
{
- bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
- stats->guess1_hits = 1;
- stats->guess2_hits = 1;
+ /*
+ * Our track list includes every value in the sample, and every
+ * value appeared more than once. Assume the column has just
+ * these values.
+ */
+ stats->stadistinct = track_cnt;
}
+ else
+ {
+ /*----------
+ * Estimate the number of distinct values using the estimator
+ * proposed by Chaudhuri et al (see citation above). This is
+ * sqrt(n/r) * max(f1,1) + f2 + f3 + ...
+ * where fk is the number of distinct values that occurred
+ * exactly k times in our sample of r rows (from a total of n).
+ * We assume (not very reliably!) that all the multiply-occurring
+ * values are reflected in the final track[] list, and the other
+ * nonnull values all appeared but once.
+ *----------
+ */
+ int f1 = nonnull_cnt - summultiple;
+ double term1;
- /* Clean up detoasted copy, if any */
- if (value != origvalue)
- pfree(DatumGetPointer(value));
- }
-}
+ if (f1 < 1)
+ f1 = 1;
+ term1 = sqrt((double) totalrows / (double) numrows) * f1;
+ stats->stadistinct = floor(term1 + nmultiple + 0.5);
+ }
-/*
- * bucketcpy() -- copy a new value into one of the statistics buckets
- */
-static void
-bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len)
-{
- if (attr->attbyval)
- *bucket = value;
- else
- {
- int len = (attr->attlen != -1 ? attr->attlen : VARSIZE(value));
+ /*
+ * If we estimated the number of distinct values at more than 10%
+ * of the total row count (a very arbitrary limit), then assume
+ * that stadistinct should scale with the row count rather than be
+ * a fixed value.
+ */
+ if (stats->stadistinct > 0.1 * totalrows)
+ stats->stadistinct = - (stats->stadistinct / totalrows);
- /* Avoid unnecessary palloc() traffic... */
- if (len > *bucket_len)
+ /* Generate an MCV slot entry, only if we found multiples */
+ if (nmultiple < num_mcv)
+ num_mcv = nmultiple;
+ if (num_mcv > 0)
{
- if (*bucket_len != 0)
- pfree(DatumGetPointer(*bucket));
- *bucket = PointerGetDatum(palloc(len));
- *bucket_len = len;
+ MemoryContext old_context;
+ Datum *mcv_values;
+ float4 *mcv_freqs;
+
+ /* Must copy the target values into TransactionCommandContext */
+ old_context = MemoryContextSwitchTo(TransactionCommandContext);
+ mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+ mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+ for (i = 0; i < num_mcv; i++)
+ {
+ mcv_values[i] = datumCopy(track[i].value,
+ stats->attr->attbyval,
+ stats->attr->attlen);
+ mcv_freqs[i] = (double) track[i].count / (double) numrows;
+ }
+ MemoryContextSwitchTo(old_context);
+
+ stats->stakind[0] = STATISTIC_KIND_MCV;
+ stats->staop[0] = stats->eqopr;
+ stats->stanumbers[0] = mcv_freqs;
+ stats->numnumbers[0] = num_mcv;
+ stats->stavalues[0] = mcv_values;
+ stats->numvalues[0] = num_mcv;
}
- memcpy(DatumGetPointer(*bucket), DatumGetPointer(value), len);
}
+
+ /* We don't need to bother cleaning up any of our temporary palloc's */
}
/*
- * update_attstats() -- update attribute statistics for one relation
+ * compute_scalar_stats() -- compute column statistics
*
- * Statistics are stored in several places: the pg_class row for the
- * relation has stats about the whole relation, the pg_attribute rows
- * for each attribute store "dispersion", and there is a pg_statistic
- * row for each (non-system) attribute. (Dispersion probably ought to
- * be moved to pg_statistic, but it's not worth doing unless there's
- * another reason to have to change pg_attribute.) The pg_class values
- * are updated by VACUUM, not here.
- *
- * We violate no-overwrite semantics here by storing new values for
- * the dispersion column directly into the pg_attribute tuple that's
- * already on the page. The reason for this is that if we updated
- * these tuples in the usual way, vacuuming pg_attribute itself
- * wouldn't work very well --- by the time we got done with a vacuum
- * cycle, most of the tuples in pg_attribute would've been obsoleted.
- * Updating pg_attribute's own statistics would be especially tricky.
- * Of course, this only works for fixed-size never-null columns, but
- * dispersion is.
+ * We use this when we can find "=" and "<" operators for the datatype.
*
- * pg_statistic rows are just added normally. This means that
- * pg_statistic will probably contain some deleted rows at the
- * completion of a vacuum cycle, unless it happens to get vacuumed last.
+ * We determine the fraction of non-null rows, the average width, the
+ * most common values, the (estimated) number of distinct values, the
+ * distribution histogram, and the correlation of physical to logical order.
*
- * To keep things simple, we punt for pg_statistic, and don't try
- * to compute or store rows for pg_statistic itself in pg_statistic.
- * This could possibly be made to work, but it's not worth the trouble.
+ * The desired stats can be determined fairly easily after sorting the
+ * data values into order.
*/
static void
-update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats)
+compute_scalar_stats(VacAttrStats *stats,
+ TupleDesc tupDesc, long totalrows,
+ HeapTuple *rows, int numrows)
{
- Relation ad,
- sd;
- HeapScanDesc scan;
- HeapTuple atup,
- stup;
- ScanKeyData askey;
- Form_pg_attribute attp;
-
- ad = heap_openr(AttributeRelationName, RowExclusiveLock);
- sd = heap_openr(StatisticRelationName, RowExclusiveLock);
-
- /* Find pg_attribute rows for this relation */
- ScanKeyEntryInitialize(&askey, 0, Anum_pg_attribute_attrelid,
- F_INT4EQ, relid);
-
- scan = heap_beginscan(ad, false, SnapshotNow, 1, &askey);
-
- while (HeapTupleIsValid(atup = heap_getnext(scan, 0)))
+ int i;
+ int null_cnt = 0;
+ int nonnull_cnt = 0;
+ int toowide_cnt = 0;
+ double total_width = 0;
+ bool is_varlena = (!stats->attr->attbyval &&
+ stats->attr->attlen == -1);
+ double corr_xysum;
+ RegProcedure cmpFn;
+ SortFunctionKind cmpFnKind;
+ FmgrInfo f_cmpfn;
+ ScalarItem *values;
+ int values_cnt = 0;
+ int *tupnoLink;
+ ScalarMCVItem *track;
+ int track_cnt = 0;
+ int num_mcv = stats->attr->attstattarget;
+
+ values = (ScalarItem *) palloc(numrows * sizeof(ScalarItem));
+ tupnoLink = (int *) palloc(numrows * sizeof(int));
+ track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
+
+ SelectSortFunction(stats->ltopr, &cmpFn, &cmpFnKind);
+ fmgr_info(cmpFn, &f_cmpfn);
+
+ /* Initial scan to find sortable values */
+ for (i = 0; i < numrows; i++)
{
- int i;
- VacAttrStats *stats;
+ HeapTuple tuple = rows[i];
+ Datum value;
+ bool isnull;
- attp = (Form_pg_attribute) GETSTRUCT(atup);
- if (attp->attnum <= 0) /* skip system attributes for now */
- continue;
+ value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
- for (i = 0; i < natts; i++)
+ /* Check for null/nonnull */
+ if (isnull)
{
- if (attp->attnum == vacattrstats[i].attr->attnum)
- break;
+ null_cnt++;
+ continue;
}
- if (i >= natts)
- continue; /* skip attr if no stats collected */
- stats = &(vacattrstats[i]);
+ nonnull_cnt++;
- if (VacAttrStatsEqValid(stats))
+ /*
+ * If it's a varlena field, add up widths for average width
+ * calculation. Note that if the value is toasted, we
+ * use the toasted width. We don't bother with this calculation
+ * if it's a fixed-width type.
+ */
+ if (is_varlena)
{
- float4 selratio; /* average ratio of rows selected
- * for a random constant */
-
- /* Compute dispersion */
- if (stats->nonnull_cnt == 0 && stats->null_cnt == 0)
+ total_width += VARSIZE(DatumGetPointer(value));
+ /*
+ * If the value is toasted, we want to detoast it just once to
+ * avoid repeated detoastings and resultant excess memory usage
+ * during the comparisons. Also, check to see if the value is
+ * excessively wide, and if so don't detoast at all --- just
+ * ignore the value.
+ */
+ if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
{
-
- /*
- * empty relation, so put a dummy value in attdispersion
- */
- selratio = 0;
+ toowide_cnt++;
+ continue;
}
- else if (stats->null_cnt <= 1 && stats->best_cnt == 1)
- {
+ value = PointerGetDatum(PG_DETOAST_DATUM(value));
+ }
- /*
- * looks like we have a unique-key attribute --- flag this
- * with special -1.0 flag value.
- *
- * The correct dispersion is 1.0/numberOfRows, but since the
- * relation row count can get updated without recomputing
- * dispersion, we want to store a "symbolic" value and
- * figure 1.0/numberOfRows on the fly.
- */
- selratio = -1;
- }
- else
+ /* Add it to the list to be sorted */
+ values[values_cnt].value = value;
+ values[values_cnt].tupno = values_cnt;
+ tupnoLink[values_cnt] = values_cnt;
+ values_cnt++;
+ }
+
+ /* We can only compute valid stats if we found some sortable values. */
+ if (values_cnt > 0)
+ {
+ int ndistinct, /* # distinct values in sample */
+ nmultiple, /* # that appear multiple times */
+ num_hist,
+ dups_cnt;
+ int slot_idx = 0;
+
+ /* Sort the collected values */
+ datumCmpFn = &f_cmpfn;
+ datumCmpFnKind = cmpFnKind;
+ datumCmpTupnoLink = tupnoLink;
+ qsort((void *) values, values_cnt,
+ sizeof(ScalarItem), compare_scalars);
+
+ /*
+ * Now scan the values in order, find the most common ones,
+ * and also accumulate ordering-correlation statistics.
+ *
+ * To determine which are most common, we first have to count the
+ * number of duplicates of each value. The duplicates are adjacent
+ * in the sorted list, so a brute-force approach is to compare
+ * successive datum values until we find two that are not equal.
+ * However, that requires N-1 invocations of the datum comparison
+ * routine, which are completely redundant with work that was done
+ * during the sort. (The sort algorithm must at some point have
+ * compared each pair of items that are adjacent in the sorted order;
+ * otherwise it could not know that it's ordered the pair correctly.)
+ * We exploit this by having compare_scalars remember the highest
+ * tupno index that each ScalarItem has been found equal to. At the
+ * end of the sort, a ScalarItem's tupnoLink will still point to
+ * itself if and only if it is the last item of its group of
+ * duplicates (since the group will be ordered by tupno).
+ */
+ corr_xysum = 0;
+ ndistinct = 0;
+ nmultiple = 0;
+ dups_cnt = 0;
+ for (i = 0; i < values_cnt; i++)
+ {
+ int tupno = values[i].tupno;
+
+ corr_xysum += (double) i * (double) tupno;
+ dups_cnt++;
+ if (tupnoLink[tupno] == tupno)
{
- if (VacAttrStatsLtGtValid(stats) &&
- stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
+ /* Reached end of duplicates of this value */
+ ndistinct++;
+ if (dups_cnt > 1)
{
+ nmultiple++;
+ if (track_cnt < num_mcv ||
+ dups_cnt > track[track_cnt-1].count)
+ {
+ /*
+ * Found a new item for the mcv list; find its
+ * position, bubbling down old items if needed.
+ * Loop invariant is that j points at an empty/
+ * replaceable slot.
+ */
+ int j;
+
+ if (track_cnt < num_mcv)
+ track_cnt++;
+ for (j = track_cnt-1; j > 0; j--)
+ {
+ if (dups_cnt <= track[j-1].count)
+ break;
+ track[j].count = track[j-1].count;
+ track[j].first = track[j-1].first;
+ }
+ track[j].count = dups_cnt;
+ track[j].first = i + 1 - dups_cnt;
+ }
+ }
+ dups_cnt = 0;
+ }
+ }
- /*
- * exact result when there are just 1 or 2 values...
- */
- double min_cnt_d = stats->min_cnt,
- max_cnt_d = stats->max_cnt,
- null_cnt_d = stats->null_cnt;
- double total = ((double) stats->nonnull_cnt) + null_cnt_d;
+ stats->stats_valid = true;
+ /* Do the simple null-frac and width stats */
+ stats->stanullfrac = (double) null_cnt / (double) numrows;
+ if (is_varlena)
+ stats->stawidth = total_width / (double) nonnull_cnt;
+ else
+ stats->stawidth = stats->attrtype->typlen;
- selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) / (total * total);
- }
- else
- {
- double most = (double) (stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt);
- double total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt);
+ if (nmultiple == 0)
+ {
+ /* If we found no repeated values, assume it's a unique column */
+ stats->stadistinct = -1.0;
+ }
+ else if (toowide_cnt == 0 && nmultiple == ndistinct)
+ {
+ /*
+ * Every value in the sample appeared more than once. Assume the
+ * column has just these values.
+ */
+ stats->stadistinct = ndistinct;
+ }
+ else
+ {
+ /*----------
+ * Estimate the number of distinct values using the estimator
+ * proposed by Chaudhuri et al (see citation above). This is
+ * sqrt(n/r) * max(f1,1) + f2 + f3 + ...
+ * where fk is the number of distinct values that occurred
+ * exactly k times in our sample of r rows (from a total of n).
+ * Overwidth values are assumed to have been distinct.
+ *----------
+ */
+ int f1 = ndistinct - nmultiple + toowide_cnt;
+ double term1;
- /*
- * we assume count of other values are 20% of best
- * count in table
- */
- selratio = (most * most + 0.20 * most * (total - most)) / (total * total);
- }
- /* Make sure calculated values are in-range */
- if (selratio < 0.0)
- selratio = 0.0;
- else if (selratio > 1.0)
- selratio = 1.0;
+ if (f1 < 1)
+ f1 = 1;
+ term1 = sqrt((double) totalrows / (double) numrows) * f1;
+ stats->stadistinct = floor(term1 + nmultiple + 0.5);
+ }
+
+ /*
+ * If we estimated the number of distinct values at more than 10%
+ * of the total row count (a very arbitrary limit), then assume
+ * that stadistinct should scale with the row count rather than be
+ * a fixed value.
+ */
+ if (stats->stadistinct > 0.1 * totalrows)
+ stats->stadistinct = - (stats->stadistinct / totalrows);
+
+ /* Generate an MCV slot entry, only if we found multiples */
+ if (nmultiple < num_mcv)
+ num_mcv = nmultiple;
+ Assert(track_cnt >= num_mcv);
+ if (num_mcv > 0)
+ {
+ MemoryContext old_context;
+ Datum *mcv_values;
+ float4 *mcv_freqs;
+
+ /* Must copy the target values into TransactionCommandContext */
+ old_context = MemoryContextSwitchTo(TransactionCommandContext);
+ mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+ mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+ for (i = 0; i < num_mcv; i++)
+ {
+ mcv_values[i] = datumCopy(values[track[i].first].value,
+ stats->attr->attbyval,
+ stats->attr->attlen);
+ mcv_freqs[i] = (double) track[i].count / (double) numrows;
}
+ MemoryContextSwitchTo(old_context);
+
+ stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
+ stats->staop[slot_idx] = stats->eqopr;
+ stats->stanumbers[slot_idx] = mcv_freqs;
+ stats->numnumbers[slot_idx] = num_mcv;
+ stats->stavalues[slot_idx] = mcv_values;
+ stats->numvalues[slot_idx] = num_mcv;
+ slot_idx++;
+ }
- /* overwrite the existing statistics in the tuple */
- attp->attdispersion = selratio;
+ /*
+ * Generate a histogram slot entry if there are at least two
+ * distinct values not accounted for in the MCV list. (This
+ * ensures the histogram won't collapse to empty or a singleton.)
+ */
+ num_hist = ndistinct - num_mcv;
+ if (num_hist > stats->attr->attstattarget)
+ num_hist = stats->attr->attstattarget + 1;
+ if (num_hist >= 2)
+ {
+ MemoryContext old_context;
+ Datum *hist_values;
+ int nvals;
- /* invalidate the tuple in the cache and write the buffer */
- RelationInvalidateHeapTuple(ad, atup);
- WriteNoReleaseBuffer(scan->rs_cbuf);
+ /* Sort the MCV items into position order to speed next loop */
+ qsort((void *) track, num_mcv,
+ sizeof(ScalarMCVItem), compare_mcvs);
/*
- * Create pg_statistic tuples for the relation, if we have
- * gathered the right data. del_stats() previously deleted
- * all the pg_statistic tuples for the rel, so we just have to
- * insert new ones here.
+ * Collapse out the MCV items from the values[] array.
*
- * Note analyze_rel() has seen to it that we won't come here when
- * vacuuming pg_statistic itself.
+ * Note we destroy the values[] array here... but we don't need
+ * it for anything more. We do, however, still need values_cnt.
*/
- if (VacAttrStatsLtGtValid(stats) && stats->initialized)
+ if (num_mcv > 0)
{
- float4 nullratio;
- float4 bestratio;
- FmgrInfo out_function;
- char *out_string;
- double best_cnt_d = stats->best_cnt,
- null_cnt_d = stats->null_cnt,
- nonnull_cnt_d = stats->nonnull_cnt; /* prevent overflow */
- Datum values[Natts_pg_statistic];
- char nulls[Natts_pg_statistic];
- Relation irelations[Num_pg_statistic_indices];
+ int src,
+ dest;
+ int j;
- nullratio = null_cnt_d / (nonnull_cnt_d + null_cnt_d);
- bestratio = best_cnt_d / (nonnull_cnt_d + null_cnt_d);
-
- fmgr_info(stats->outfunc, &out_function);
+ src = dest = 0;
+ j = 0; /* index of next interesting MCV item */
+ while (src < values_cnt)
+ {
+ int ncopy;
+
+ if (j < num_mcv)
+ {
+ int first = track[j].first;
+
+ if (src >= first)
+ {
+ /* advance past this MCV item */
+ src = first + track[j].count;
+ j++;
+ continue;
+ }
+ ncopy = first - src;
+ }
+ else
+ {
+ ncopy = values_cnt - src;
+ }
+ memmove(&values[dest], &values[src],
+ ncopy * sizeof(ScalarItem));
+ src += ncopy;
+ dest += ncopy;
+ }
+ nvals = dest;
+ }
+ else
+ nvals = values_cnt;
+ Assert(nvals >= num_hist);
- for (i = 0; i < Natts_pg_statistic; ++i)
- nulls[i] = ' ';
+ /* Must copy the target values into TransactionCommandContext */
+ old_context = MemoryContextSwitchTo(TransactionCommandContext);
+ hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
+ for (i = 0; i < num_hist; i++)
+ {
+ int pos;
- /*
- * initialize values[]
- */
- i = 0;
- values[i++] = ObjectIdGetDatum(relid); /* starelid */
- values[i++] = Int16GetDatum(attp->attnum); /* staattnum */
- values[i++] = ObjectIdGetDatum(stats->op_cmplt); /* staop */
- values[i++] = Float4GetDatum(nullratio); /* stanullfrac */
- values[i++] = Float4GetDatum(bestratio); /* stacommonfrac */
- out_string = DatumGetCString(FunctionCall3(&out_function,
- stats->best,
- ObjectIdGetDatum(stats->typelem),
- Int32GetDatum(stats->attr->atttypmod)));
- values[i++] = DirectFunctionCall1(textin, /* stacommonval */
- CStringGetDatum(out_string));
- pfree(out_string);
- out_string = DatumGetCString(FunctionCall3(&out_function,
- stats->min,
- ObjectIdGetDatum(stats->typelem),
- Int32GetDatum(stats->attr->atttypmod)));
- values[i++] = DirectFunctionCall1(textin, /* staloval */
- CStringGetDatum(out_string));
- pfree(out_string);
- out_string = DatumGetCString(FunctionCall3(&out_function,
- stats->max,
- ObjectIdGetDatum(stats->typelem),
- Int32GetDatum(stats->attr->atttypmod)));
- values[i++] = DirectFunctionCall1(textin, /* stahival */
- CStringGetDatum(out_string));
- pfree(out_string);
-
- stup = heap_formtuple(sd->rd_att, values, nulls);
-
- /* store tuple and update indexes too */
- heap_insert(sd, stup);
-
- CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices, irelations);
- CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup);
- CatalogCloseIndices(Num_pg_statistic_indices, irelations);
-
- /* release allocated space */
- pfree(DatumGetPointer(values[Anum_pg_statistic_stacommonval - 1]));
- pfree(DatumGetPointer(values[Anum_pg_statistic_staloval - 1]));
- pfree(DatumGetPointer(values[Anum_pg_statistic_stahival - 1]));
- heap_freetuple(stup);
+ pos = (i * (nvals - 1)) / (num_hist - 1);
+ hist_values[i] = datumCopy(values[pos].value,
+ stats->attr->attbyval,
+ stats->attr->attlen);
}
+ MemoryContextSwitchTo(old_context);
+
+ stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
+ stats->staop[slot_idx] = stats->ltopr;
+ stats->stavalues[slot_idx] = hist_values;
+ stats->numvalues[slot_idx] = num_hist;
+ slot_idx++;
+ }
+
+ /* Generate a correlation entry if there are multiple values */
+ if (values_cnt > 1)
+ {
+ MemoryContext old_context;
+ float4 *corrs;
+ double corr_xsum,
+ corr_x2sum;
+
+ /* Must copy the target values into TransactionCommandContext */
+ old_context = MemoryContextSwitchTo(TransactionCommandContext);
+ corrs = (float4 *) palloc(sizeof(float4));
+ MemoryContextSwitchTo(old_context);
+
+ /*----------
+ * Since we know the x and y value sets are both
+ * 0, 1, ..., values_cnt-1
+ * we have sum(x) = sum(y) =
+ * (values_cnt-1)*values_cnt / 2
+ * and sum(x^2) = sum(y^2) =
+ * (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6.
+ *----------
+ */
+ corr_xsum = (double) (values_cnt-1) * (double) values_cnt / 2.0;
+ corr_x2sum = (double) (values_cnt-1) * (double) values_cnt *
+ (double) (2*values_cnt-1) / 6.0;
+ /* And the correlation coefficient reduces to */
+ corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
+ (values_cnt * corr_x2sum - corr_xsum * corr_xsum);
+
+ stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
+ stats->staop[slot_idx] = stats->ltopr;
+ stats->stanumbers[slot_idx] = corrs;
+ stats->numnumbers[slot_idx] = 1;
+ slot_idx++;
}
}
- heap_endscan(scan);
- /* close rels, but hold locks till upcoming commit */
- heap_close(ad, NoLock);
- heap_close(sd, NoLock);
+
+ /* We don't need to bother cleaning up any of our temporary palloc's */
}
/*
- * del_stats() -- delete pg_statistic rows for a relation
+ * qsort comparator for sorting ScalarItems
*
- * If a list of attribute numbers is given, only zap stats for those attrs.
+ * Aside from sorting the items, we update the datumCmpTupnoLink[] array
+ * whenever two ScalarItems are found to contain equal datums. The array
+ * is indexed by tupno; for each ScalarItem, it contains the highest
+ * tupno that that item's datum has been found to be equal to. This allows
+ * us to avoid additional comparisons in compute_scalar_stats().
*/
-static void
-del_stats(Oid relid, int attcnt, int *attnums)
+static int
+compare_scalars(const void *a, const void *b)
{
- Relation pgstatistic;
- HeapScanDesc scan;
- HeapTuple tuple;
- ScanKeyData key;
+ Datum da = ((ScalarItem *) a)->value;
+ int ta = ((ScalarItem *) a)->tupno;
+ Datum db = ((ScalarItem *) b)->value;
+ int tb = ((ScalarItem *) b)->tupno;
- pgstatistic = heap_openr(StatisticRelationName, RowExclusiveLock);
+ if (datumCmpFnKind == SORTFUNC_LT)
+ {
+ if (DatumGetBool(FunctionCall2(datumCmpFn, da, db)))
+ return -1; /* a < b */
+ if (DatumGetBool(FunctionCall2(datumCmpFn, db, da)))
+ return 1; /* a > b */
+ }
+ else
+ {
+ /* sort function is CMP or REVCMP */
+ int32 compare;
- ScanKeyEntryInitialize(&key, 0x0, Anum_pg_statistic_starelid,
- F_OIDEQ, ObjectIdGetDatum(relid));
- scan = heap_beginscan(pgstatistic, false, SnapshotNow, 1, &key);
+ compare = DatumGetInt32(FunctionCall2(datumCmpFn, da, db));
+ if (compare != 0)
+ {
+ if (datumCmpFnKind == SORTFUNC_REVCMP)
+ compare = -compare;
+ return compare;
+ }
+ }
- while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+ /*
+ * The two datums are equal, so update datumCmpTupnoLink[].
+ */
+ if (datumCmpTupnoLink[ta] < tb)
+ datumCmpTupnoLink[ta] = tb;
+ if (datumCmpTupnoLink[tb] < ta)
+ datumCmpTupnoLink[tb] = ta;
+
+ /*
+ * For equal datums, sort by tupno
+ */
+ return ta - tb;
+}
+
+/*
+ * qsort comparator for sorting ScalarMCVItems by position
+ */
+static int
+compare_mcvs(const void *a, const void *b)
+{
+ int da = ((ScalarMCVItem *) a)->first;
+ int db = ((ScalarMCVItem *) b)->first;
+
+ return da - db;
+}
+
+
+/*
+ * update_attstats() -- update attribute statistics for one relation
+ *
+ * Statistics are stored in several places: the pg_class row for the
+ * relation has stats about the whole relation, and there is a
+ * pg_statistic row for each (non-system) attribute that has ever
+ * been analyzed. The pg_class values are updated by VACUUM, not here.
+ *
+ * pg_statistic rows are just added or updated normally. This means
+ * that pg_statistic will probably contain some deleted rows at the
+ * completion of a vacuum cycle, unless it happens to get vacuumed last.
+ *
+ * To keep things simple, we punt for pg_statistic, and don't try
+ * to compute or store rows for pg_statistic itself in pg_statistic.
+ * This could possibly be made to work, but it's not worth the trouble.
+ * Note analyze_rel() has seen to it that we won't come here when
+ * vacuuming pg_statistic itself.
+ */
+static void
+update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
+{
+ Relation sd;
+ int attno;
+
+ /*
+ * We use an ExclusiveLock on pg_statistic to ensure that only one
+ * backend is writing it at a time --- without that, we might have to
+ * deal with concurrent updates here, and it's not worth the trouble.
+ */
+ sd = heap_openr(StatisticRelationName, ExclusiveLock);
+
+ for (attno = 0; attno < natts; attno++)
{
- if (attcnt > 0)
+ VacAttrStats *stats = vacattrstats[attno];
+ FmgrInfo out_function;
+ HeapTuple stup,
+ oldtup;
+ int i, k, n;
+ Datum values[Natts_pg_statistic];
+ char nulls[Natts_pg_statistic];
+ char replaces[Natts_pg_statistic];
+ Relation irelations[Num_pg_statistic_indices];
+
+ /* Ignore attr if we weren't able to collect stats */
+ if (!stats->stats_valid)
+ continue;
+
+ fmgr_info(stats->attrtype->typoutput, &out_function);
+
+ /*
+ * Construct a new pg_statistic tuple
+ */
+ for (i = 0; i < Natts_pg_statistic; ++i)
{
- Form_pg_statistic pgs = (Form_pg_statistic) GETSTRUCT(tuple);
- int i;
+ nulls[i] = ' ';
+ replaces[i] = 'r';
+ }
- for (i = 0; i < attcnt; i++)
+ i = 0;
+ values[i++] = ObjectIdGetDatum(relid); /* starelid */
+ values[i++] = Int16GetDatum(stats->attnum); /* staattnum */
+ values[i++] = Float4GetDatum(stats->stanullfrac); /* stanullfrac */
+ values[i++] = Int32GetDatum(stats->stawidth); /* stawidth */
+ values[i++] = Float4GetDatum(stats->stadistinct); /* stadistinct */
+ for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+ {
+ values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
+ }
+ for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+ {
+ values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
+ }
+ for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+ {
+ int nnum = stats->numnumbers[k];
+
+ if (nnum > 0)
{
- if (pgs->staattnum == attnums[i] + 1)
- break;
+ Datum *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
+ ArrayType *arry;
+
+ for (n = 0; n < nnum; n++)
+ numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
+ /* XXX knows more than it should about type float4: */
+ arry = construct_array(numdatums, nnum,
+ false, sizeof(float4), 'i');
+ values[i++] = PointerGetDatum(arry); /* stanumbersN */
+ }
+ else
+ {
+ nulls[i] = 'n';
+ values[i++] = (Datum) 0;
}
- if (i >= attcnt)
- continue; /* don't delete it */
}
- simple_heap_delete(pgstatistic, &tuple->t_self);
- }
+ for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+ {
+ int ntxt = stats->numvalues[k];
- heap_endscan(scan);
+ if (ntxt > 0)
+ {
+ Datum *txtdatums = (Datum *) palloc(ntxt * sizeof(Datum));
+ ArrayType *arry;
- /*
- * Close rel, but *keep* lock; we will need to reacquire it later, so
- * there's a possibility of deadlock against another VACUUM process if
- * we let go now. Keeping the lock shouldn't delay any common
- * operation other than an attempted VACUUM of pg_statistic itself.
- */
- heap_close(pgstatistic, NoLock);
+ for (n = 0; n < ntxt; n++)
+ {
+ /*
+ * Convert data values to a text string to be inserted
+ * into the text array.
+ */
+ Datum stringdatum;
+
+ stringdatum =
+ FunctionCall3(&out_function,
+ stats->stavalues[k][n],
+ ObjectIdGetDatum(stats->attrtype->typelem),
+ Int32GetDatum(stats->attr->atttypmod));
+ txtdatums[n] = DirectFunctionCall1(textin, stringdatum);
+ pfree(DatumGetPointer(stringdatum));
+ }
+ /* XXX knows more than it should about type text: */
+ arry = construct_array(txtdatums, ntxt,
+ false, -1, 'i');
+ values[i++] = PointerGetDatum(arry); /* stavaluesN */
+ }
+ else
+ {
+ nulls[i] = 'n';
+ values[i++] = (Datum) 0;
+ }
+ }
+
+ /* Is there already a pg_statistic tuple for this attribute? */
+ oldtup = SearchSysCache(STATRELATT,
+ ObjectIdGetDatum(relid),
+ Int16GetDatum(stats->attnum),
+ 0, 0);
+
+ if (HeapTupleIsValid(oldtup))
+ {
+ /* Yes, replace it */
+ stup = heap_modifytuple(oldtup,
+ sd,
+ values,
+ nulls,
+ replaces);
+ ReleaseSysCache(oldtup);
+ simple_heap_update(sd, &stup->t_self, stup);
+ }
+ else
+ {
+ /* No, insert new tuple */
+ stup = heap_formtuple(sd->rd_att, values, nulls);
+ heap_insert(sd, stup);
+ }
+
+ /* update indices too */
+ CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices,
+ irelations);
+ CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup);
+ CatalogCloseIndices(Num_pg_statistic_indices, irelations);
+
+ heap_freetuple(stup);
+ }
+
+ /* close rel, but hold lock till upcoming commit */
+ heap_close(sd, NoLock);
}
diff --git a/src/backend/commands/command.c b/src/backend/commands/command.c
index 96d493688e3..13a78f11773 100644
--- a/src/backend/commands/command.c
+++ b/src/backend/commands/command.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.125 2001/03/23 04:49:52 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.126 2001/05/07 00:43:17 tgl Exp $
*
* NOTES
* The PerformAddAttribute() code, like most of the relation
@@ -56,6 +56,7 @@
#include "access/genam.h"
+static void drop_default(Oid relid, int16 attnum);
static bool needs_toast_table(Relation rel);
static bool is_relation(char *name);
@@ -408,7 +409,7 @@ AlterTableAddColumn(const char *relationName,
HeapTuple typeTuple;
Form_pg_type tform;
char *typename;
- int attnelems;
+ int attndims;
if (SearchSysCacheExists(ATTNAME,
ObjectIdGetDatum(reltup->t_data->t_oid),
@@ -425,11 +426,11 @@ AlterTableAddColumn(const char *relationName,
if (colDef->typename->arrayBounds)
{
- attnelems = length(colDef->typename->arrayBounds);
+ attndims = length(colDef->typename->arrayBounds);
typename = makeArrayTypeName(colDef->typename->name);
}
else
- attnelems = 0;
+ attndims = 0;
typeTuple = SearchSysCache(TYPENAME,
PointerGetDatum(typename),
@@ -441,12 +442,12 @@ AlterTableAddColumn(const char *relationName,
namestrcpy(&(attribute->attname), colDef->colname);
attribute->atttypid = typeTuple->t_data->t_oid;
attribute->attlen = tform->typlen;
- attribute->attdispersion = 0;
+ attribute->attstattarget = DEFAULT_ATTSTATTARGET;
attribute->attcacheoff = -1;
attribute->atttypmod = colDef->typename->typmod;
attribute->attnum = i;
attribute->attbyval = tform->typbyval;
- attribute->attnelems = attnelems;
+ attribute->attndims = attndims;
attribute->attisset = (bool) (tform->typtype == 'c');
attribute->attstorage = tform->typstorage;
attribute->attalign = tform->typalign;
@@ -496,17 +497,13 @@ AlterTableAddColumn(const char *relationName,
}
-
-static void drop_default(Oid relid, int16 attnum);
-
-
/*
* ALTER TABLE ALTER COLUMN SET/DROP DEFAULT
*/
void
-AlterTableAlterColumn(const char *relationName,
- bool inh, const char *colName,
- Node *newDefault)
+AlterTableAlterColumnDefault(const char *relationName,
+ bool inh, const char *colName,
+ Node *newDefault)
{
Relation rel;
HeapTuple tuple;
@@ -551,8 +548,8 @@ AlterTableAlterColumn(const char *relationName,
if (childrelid == myrelid)
continue;
rel = heap_open(childrelid, AccessExclusiveLock);
- AlterTableAlterColumn(RelationGetRelationName(rel),
- false, colName, newDefault);
+ AlterTableAlterColumnDefault(RelationGetRelationName(rel),
+ false, colName, newDefault);
heap_close(rel, AccessExclusiveLock);
}
}
@@ -560,7 +557,7 @@ AlterTableAlterColumn(const char *relationName,
/* -= now do the thing on this relation =- */
/* reopen the business */
- rel = heap_openr((char *) relationName, AccessExclusiveLock);
+ rel = heap_openr(relationName, AccessExclusiveLock);
/*
* get the number of the attribute
@@ -647,7 +644,6 @@ AlterTableAlterColumn(const char *relationName,
}
-
static void
drop_default(Oid relid, int16 attnum)
{
@@ -675,6 +671,104 @@ drop_default(Oid relid, int16 attnum)
}
+/*
+ * ALTER TABLE ALTER COLUMN SET STATISTICS
+ */
+void
+AlterTableAlterColumnStatistics(const char *relationName,
+ bool inh, const char *colName,
+ Node *statsTarget)
+{
+ Relation rel;
+ Oid myrelid;
+ int newtarget;
+ Relation attrelation;
+ HeapTuple tuple;
+
+#ifndef NO_SECURITY
+ if (!pg_ownercheck(GetUserId(), relationName, RELNAME))
+ elog(ERROR, "ALTER TABLE: permission denied");
+#endif
+
+ rel = heap_openr(relationName, AccessExclusiveLock);
+ if (rel->rd_rel->relkind != RELKIND_RELATION)
+ elog(ERROR, "ALTER TABLE: relation \"%s\" is not a table",
+ relationName);
+ myrelid = RelationGetRelid(rel);
+ heap_close(rel, NoLock); /* close rel, but keep lock! */
+
+ /*
+ * Propagate to children if desired
+ */
+ if (inh)
+ {
+ List *child,
+ *children;
+
+ /* this routine is actually in the planner */
+ children = find_all_inheritors(myrelid);
+
+ /*
+ * find_all_inheritors does the recursive search of the
+ * inheritance hierarchy, so all we have to do is process all of
+ * the relids in the list that it returns.
+ */
+ foreach(child, children)
+ {
+ Oid childrelid = lfirsti(child);
+
+ if (childrelid == myrelid)
+ continue;
+ rel = heap_open(childrelid, AccessExclusiveLock);
+ AlterTableAlterColumnStatistics(RelationGetRelationName(rel),
+ false, colName, statsTarget);
+ heap_close(rel, AccessExclusiveLock);
+ }
+ }
+
+ /* -= now do the thing on this relation =- */
+
+ Assert(IsA(statsTarget, Integer));
+ newtarget = intVal(statsTarget);
+
+ /* Limit target to sane range (should we raise an error instead?) */
+ if (newtarget < 0)
+ newtarget = 0;
+ else if (newtarget > 1000)
+ newtarget = 1000;
+
+ attrelation = heap_openr(AttributeRelationName, RowExclusiveLock);
+
+ tuple = SearchSysCacheCopy(ATTNAME,
+ ObjectIdGetDatum(myrelid),
+ PointerGetDatum(colName),
+ 0, 0);
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "ALTER TABLE: relation \"%s\" has no column \"%s\"",
+ relationName, colName);
+
+ if (((Form_pg_attribute) GETSTRUCT(tuple))->attnum < 0)
+ elog(ERROR, "ALTER TABLE: cannot change system attribute \"%s\"",
+ colName);
+
+ ((Form_pg_attribute) GETSTRUCT(tuple))->attstattarget = newtarget;
+
+ simple_heap_update(attrelation, &tuple->t_self, tuple);
+
+ /* keep system catalog indices current */
+ {
+ Relation irelations[Num_pg_attr_indices];
+
+ CatalogOpenIndices(Num_pg_attr_indices, Name_pg_attr_indices, irelations);
+ CatalogIndexInsert(irelations, Num_pg_attr_indices, attrelation, tuple);
+ CatalogCloseIndices(Num_pg_attr_indices, irelations);
+ }
+
+ heap_freetuple(tuple);
+ heap_close(attrelation, RowExclusiveLock);
+}
+
+
#ifdef _DROP_COLUMN_HACK__
/*
* ALTER TABLE DROP COLUMN trial implementation
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 694d0e8bbc1..9a0dbdc8c8e 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.189 2001/03/25 23:23:58 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.190 2001/05/07 00:43:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -53,25 +53,90 @@ extern XLogRecPtr log_heap_move(Relation reln,
Buffer oldbuf, ItemPointerData from,
Buffer newbuf, HeapTuple newtup);
+
+typedef struct VRelListData
+{
+ Oid vrl_relid;
+ struct VRelListData *vrl_next;
+} VRelListData;
+
+typedef VRelListData *VRelList;
+
+typedef struct VacPageData
+{
+ BlockNumber blkno; /* BlockNumber of this Page */
+ Size free; /* FreeSpace on this Page */
+ uint16 offsets_used; /* Number of OffNums used by vacuum */
+ uint16 offsets_free; /* Number of OffNums free or to be free */
+ OffsetNumber offsets[1]; /* Array of its OffNums */
+} VacPageData;
+
+typedef VacPageData *VacPage;
+
+typedef struct VacPageListData
+{
+ int empty_end_pages;/* Number of "empty" end-pages */
+ int num_pages; /* Number of pages in pagedesc */
+ int num_allocated_pages; /* Number of allocated pages in
+ * pagedesc */
+ VacPage *pagedesc; /* Descriptions of pages */
+} VacPageListData;
+
+typedef VacPageListData *VacPageList;
+
+typedef struct VTupleLinkData
+{
+ ItemPointerData new_tid;
+ ItemPointerData this_tid;
+} VTupleLinkData;
+
+typedef VTupleLinkData *VTupleLink;
+
+typedef struct VTupleMoveData
+{
+ ItemPointerData tid; /* tuple ID */
+ VacPage vacpage; /* where to move */
+ bool cleanVpd; /* clean vacpage before using */
+} VTupleMoveData;
+
+typedef VTupleMoveData *VTupleMove;
+
+typedef struct VRelStats
+{
+ Oid relid;
+ long num_pages;
+ long num_tuples;
+ Size min_tlen;
+ Size max_tlen;
+ bool hasindex;
+ int num_vtlinks;
+ VTupleLink vtlinks;
+} VRelStats;
+
+
static MemoryContext vac_context = NULL;
static int MESSAGE_LEVEL; /* message level */
static TransactionId XmaxRecent;
+
/* non-export function prototypes */
static void vacuum_init(void);
static void vacuum_shutdown(void);
-static void vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2);
-static VRelList getrels(NameData *VacRelP);
+static VRelList getrels(Name VacRelP, const char *stmttype);
static void vacuum_rel(Oid relid);
-static void scan_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages);
-static void repair_frag(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages, int nindices, Relation *Irel);
-static void vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacpagelist);
+static void scan_heap(VRelStats *vacrelstats, Relation onerel,
+ VacPageList vacuum_pages, VacPageList fraged_pages);
+static void repair_frag(VRelStats *vacrelstats, Relation onerel,
+ VacPageList vacuum_pages, VacPageList fraged_pages,
+ int nindices, Relation *Irel);
+static void vacuum_heap(VRelStats *vacrelstats, Relation onerel,
+ VacPageList vacpagelist);
static void vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage);
-static void vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples);
-static void scan_index(Relation indrel, int num_tuples);
-static void update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats);
+static void vacuum_index(VacPageList vacpagelist, Relation indrel,
+ long num_tuples, int keep_tuples);
+static void scan_index(Relation indrel, long num_tuples);
static VacPage tid_reaped(ItemPointer itemptr, VacPageList vacpagelist);
static void reap_page(VacPageList vacpagelist, VacPage vacpage);
static void vpage_insert(VacPageList vacpagelist, VacPage vpnew);
@@ -88,17 +153,17 @@ static bool enough_space(VacPage vacpage, Size len);
static char *show_rusage(struct rusage * ru0);
+/*
+ * Primary entry point for VACUUM and ANALYZE commands.
+ */
void
-vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
+vacuum(VacuumStmt *vacstmt)
{
+ const char *stmttype = vacstmt->vacuum ? "VACUUM" : "ANALYZE";
NameData VacRel;
Name VacRelName;
- MemoryContext old;
- List *le;
- List *anal_cols2 = NIL;
-
- if (anal_cols != NIL && !analyze)
- elog(ERROR, "Can't vacuum columns, only tables. You can 'vacuum analyze' columns.");
+ VRelList vrl,
+ cur;
/*
* We cannot run VACUUM inside a user transaction block; if we were
@@ -110,9 +175,9 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
* behavior.
*/
if (IsTransactionBlock())
- elog(ERROR, "VACUUM cannot run inside a BEGIN/END block");
+ elog(ERROR, "%s cannot run inside a BEGIN/END block", stmttype);
- if (verbose)
+ if (vacstmt->verbose)
MESSAGE_LEVEL = NOTICE;
else
MESSAGE_LEVEL = DEBUG;
@@ -130,37 +195,36 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
- /* vacrel gets de-allocated on xact commit, so copy it to safe storage */
- if (vacrel)
+ /* Convert vacrel, which is just a string, to a Name */
+ if (vacstmt->vacrel)
{
- namestrcpy(&VacRel, vacrel);
+ namestrcpy(&VacRel, vacstmt->vacrel);
VacRelName = &VacRel;
}
else
VacRelName = NULL;
- /* must also copy the column list, if any, to safe storage */
- old = MemoryContextSwitchTo(vac_context);
- foreach(le, anal_cols)
- {
- char *col = (char *) lfirst(le);
-
- anal_cols2 = lappend(anal_cols2, pstrdup(col));
- }
- MemoryContextSwitchTo(old);
+ /* Build list of relations to process (note this lives in vac_context) */
+ vrl = getrels(VacRelName, stmttype);
/*
* Start up the vacuum cleaner.
- *
- * NOTE: since this commits the current transaction, the memory holding
- * any passed-in parameters gets freed here. We must have already
- * copied pass-by-reference parameters to safe storage. Don't make me
- * fix this again!
*/
vacuum_init();
- /* vacuum the database */
- vac_vacuum(VacRelName, analyze, anal_cols2);
+ /*
+ * Process each selected relation. We are careful to process
+ * each relation in a separate transaction in order to avoid holding
+ * too many locks at one time.
+ */
+ for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
+ {
+ if (vacstmt->vacuum)
+ vacuum_rel(cur->vrl_relid);
+ /* analyze separately so locking is minimized */
+ if (vacstmt->analyze)
+ analyze_rel(cur->vrl_relid, vacstmt);
+ }
/* clean up */
vacuum_shutdown();
@@ -187,14 +251,14 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
* PostgresMain().
*/
static void
-vacuum_init()
+vacuum_init(void)
{
/* matches the StartTransaction in PostgresMain() */
CommitTransactionCommand();
}
static void
-vacuum_shutdown()
+vacuum_shutdown(void)
{
/* on entry, we are not in a transaction */
@@ -223,34 +287,10 @@ vacuum_shutdown()
}
/*
- * vac_vacuum() -- vacuum the database.
- *
- * This routine builds a list of relations to vacuum, and then calls
- * code that vacuums them one at a time. We are careful to vacuum each
- * relation in a separate transaction in order to avoid holding too many
- * locks at one time.
+ * Build a list of VRelListData nodes for each relation to be processed
*/
-static void
-vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2)
-{
- VRelList vrl,
- cur;
-
- /* get list of relations */
- vrl = getrels(VacRelP);
-
- /* vacuum each heap relation */
- for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
- {
- vacuum_rel(cur->vrl_relid);
- /* analyze separately so locking is minimized */
- if (analyze)
- analyze_rel(cur->vrl_relid, anal_cols2, MESSAGE_LEVEL);
- }
-}
-
static VRelList
-getrels(NameData *VacRelP)
+getrels(Name VacRelP, const char *stmttype)
{
Relation rel;
TupleDesc tupdesc;
@@ -262,12 +302,9 @@ getrels(NameData *VacRelP)
char *rname;
char rkind;
bool n;
- bool found = false;
ScanKeyData key;
- StartTransactionCommand();
-
- if (NameStr(*VacRelP))
+ if (VacRelP)
{
/*
@@ -287,6 +324,7 @@ getrels(NameData *VacRelP)
}
else
{
+ /* find all relations listed in pg_class */
ScanKeyEntryInitialize(&key, 0x0, Anum_pg_class_relkind,
F_CHAREQ, CharGetDatum('r'));
}
@@ -300,21 +338,20 @@ getrels(NameData *VacRelP)
while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
{
- found = true;
-
d = heap_getattr(tuple, Anum_pg_class_relname, tupdesc, &n);
- rname = (char *) DatumGetPointer(d);
+ rname = (char *) DatumGetName(d);
d = heap_getattr(tuple, Anum_pg_class_relkind, tupdesc, &n);
rkind = DatumGetChar(d);
if (rkind != RELKIND_RELATION)
{
- elog(NOTICE, "Vacuum: can not process indices, views and certain system tables");
+ elog(NOTICE, "%s: can not process indexes, views or special system tables",
+ stmttype);
continue;
}
- /* get a relation list entry for this guy */
+ /* Make a relation list entry for this guy */
if (vrl == (VRelList) NULL)
vrl = cur = (VRelList)
MemoryContextAlloc(vac_context, sizeof(VRelListData));
@@ -332,10 +369,8 @@ getrels(NameData *VacRelP)
heap_endscan(scan);
heap_close(rel, AccessShareLock);
- if (!found)
- elog(NOTICE, "Vacuum: table not found");
-
- CommitTransactionCommand();
+ if (vrl == NULL)
+ elog(NOTICE, "%s: table not found", stmttype);
return vrl;
}
@@ -432,7 +467,8 @@ vacuum_rel(Oid relid)
*/
vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
vacrelstats->relid = relid;
- vacrelstats->num_pages = vacrelstats->num_tuples = 0;
+ vacrelstats->num_pages = 0;
+ vacrelstats->num_tuples = 0;
vacrelstats->hasindex = false;
GetXmaxRecent(&XmaxRecent);
@@ -457,8 +493,8 @@ vacuum_rel(Oid relid)
vacrelstats->hasindex = true;
else
vacrelstats->hasindex = false;
-#ifdef NOT_USED
+#ifdef NOT_USED
/*
* reindex in VACUUM is dangerous under WAL. ifdef out until it
* becomes safe.
@@ -528,9 +564,8 @@ vacuum_rel(Oid relid)
heap_close(onerel, NoLock);
/* update statistics in pg_class */
- update_relstats(vacrelstats->relid, vacrelstats->num_pages,
- vacrelstats->num_tuples, vacrelstats->hasindex,
- vacrelstats);
+ vac_update_relstats(vacrelstats->relid, vacrelstats->num_pages,
+ vacrelstats->num_tuples, vacrelstats->hasindex);
/*
* Complete the transaction and free all temporary memory used.
@@ -582,8 +617,8 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
char *relname;
VacPage vacpage,
vp;
+ long num_tuples;
uint32 tups_vacuumed,
- num_tuples,
nkeep,
nunused,
ncrash,
@@ -913,7 +948,6 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
/* save stats in the rel list for use later */
vacrelstats->num_tuples = num_tuples;
vacrelstats->num_pages = nblocks;
-/* vacrelstats->natts = attr_cnt;*/
if (num_tuples == 0)
min_tlen = max_tlen = 0;
vacrelstats->min_tlen = min_tlen;
@@ -960,7 +994,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
}
elog(MESSAGE_LEVEL, "Pages %u: Changed %u, reaped %u, Empty %u, New %u; \
-Tup %u: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
+Tup %lu: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
Re-using: Free/Avail. Space %lu/%lu; EndEmpty/Avail. Pages %u/%u. %s",
nblocks, changed_pages, vacuum_pages->num_pages, empty_pages,
new_pages, num_tuples, tups_vacuumed,
@@ -2009,7 +2043,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
{
Buffer buf;
VacPage *vacpage;
- int nblocks;
+ long nblocks;
int i;
nblocks = vacuum_pages->num_pages;
@@ -2044,7 +2078,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
/* truncate relation if there are some empty end-pages */
if (vacuum_pages->empty_end_pages > 0)
{
- elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u.",
+ elog(MESSAGE_LEVEL, "Rel %s: Pages: %lu --> %lu.",
RelationGetRelationName(onerel),
vacrelstats->num_pages, nblocks);
nblocks = smgrtruncate(DEFAULT_SMGR, onerel, nblocks);
@@ -2094,11 +2128,11 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
*
*/
static void
-scan_index(Relation indrel, int num_tuples)
+scan_index(Relation indrel, long num_tuples)
{
RetrieveIndexResult res;
IndexScanDesc iscan;
- int nitups;
+ long nitups;
int nipages;
struct rusage ru0;
@@ -2119,14 +2153,14 @@ scan_index(Relation indrel, int num_tuples)
/* now update statistics in pg_class */
nipages = RelationGetNumberOfBlocks(indrel);
- update_relstats(RelationGetRelid(indrel), nipages, nitups, false, NULL);
+ vac_update_relstats(RelationGetRelid(indrel), nipages, nitups, false);
- elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. %s",
+ elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu. %s",
RelationGetRelationName(indrel), nipages, nitups,
show_rusage(&ru0));
if (nitups != num_tuples)
- elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
+ elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\
\n\tRecreate the index.",
RelationGetRelationName(indrel), nitups, num_tuples);
@@ -2145,13 +2179,14 @@ scan_index(Relation indrel, int num_tuples)
* pg_class.
*/
static void
-vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples)
+vacuum_index(VacPageList vacpagelist, Relation indrel,
+ long num_tuples, int keep_tuples)
{
RetrieveIndexResult res;
IndexScanDesc iscan;
ItemPointer heapptr;
int tups_vacuumed;
- int num_index_tuples;
+ long num_index_tuples;
int num_pages;
VacPage vp;
struct rusage ru0;
@@ -2196,15 +2231,16 @@ vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_
/* now update statistics in pg_class */
num_pages = RelationGetNumberOfBlocks(indrel);
- update_relstats(RelationGetRelid(indrel), num_pages, num_index_tuples, false, NULL);
+ vac_update_relstats(RelationGetRelid(indrel),
+ num_pages, num_index_tuples, false);
- elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. %s",
+ elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu: Deleted %u. %s",
RelationGetRelationName(indrel), num_pages,
num_index_tuples - keep_tuples, tups_vacuumed,
show_rusage(&ru0));
if (num_index_tuples != num_tuples + keep_tuples)
- elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
+ elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\
\n\tRecreate the index.",
RelationGetRelationName(indrel), num_index_tuples, num_tuples);
@@ -2255,7 +2291,7 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
}
/*
- * update_relstats() -- update statistics for one relation
+ * vac_update_relstats() -- update statistics for one relation
*
* Update the whole-relation statistics that are kept in its pg_class
* row. There are additional stats that will be updated if we are
@@ -2268,13 +2304,12 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
* we updated these tuples in the usual way, vacuuming pg_class itself
* wouldn't work very well --- by the time we got done with a vacuum
* cycle, most of the tuples in pg_class would've been obsoleted.
- * Updating pg_class's own statistics would be especially tricky.
* Of course, this only works for fixed-size never-null columns, but
* these are.
*/
-static void
-update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex,
- VRelStats *vacrelstats)
+void
+vac_update_relstats(Oid relid, long num_pages, double num_tuples,
+ bool hasindex)
{
Relation rd;
HeapTupleData rtup;
diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c
index 12c6f82a8b2..e0543a28109 100644
--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.32 2001/03/22 06:16:13 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.33 2001/05/07 00:43:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -20,24 +20,24 @@
#include "utils/tuplesort.h"
/* ----------------------------------------------------------------
- * FormSortKeys(node)
+ * ExtractSortKeys
*
- * Forms the structure containing information used to sort the relation.
+ * Extract the sorting key information from the plan node.
*
- * Returns an array of ScanKeyData.
+ * Returns two palloc'd arrays, one of sort operator OIDs and
+ * one of attribute numbers.
* ----------------------------------------------------------------
*/
-static ScanKey
-FormSortKeys(Sort *sortnode)
+static void
+ExtractSortKeys(Sort *sortnode,
+ Oid **sortOperators,
+ AttrNumber **attNums)
{
- ScanKey sortkeys;
List *targetList;
- List *tl;
int keycount;
- Resdom *resdom;
- AttrNumber resno;
- Index reskey;
- Oid reskeyop;
+ Oid *sortOps;
+ AttrNumber *attNos;
+ List *tl;
/*
* get information from the node
@@ -46,36 +46,33 @@ FormSortKeys(Sort *sortnode)
keycount = sortnode->keycount;
/*
- * first allocate space for scan keys
+ * first allocate space for results
*/
if (keycount <= 0)
- elog(ERROR, "FormSortKeys: keycount <= 0");
- sortkeys = (ScanKey) palloc(keycount * sizeof(ScanKeyData));
- MemSet((char *) sortkeys, 0, keycount * sizeof(ScanKeyData));
+ elog(ERROR, "ExtractSortKeys: keycount <= 0");
+ sortOps = (Oid *) palloc(keycount * sizeof(Oid));
+ MemSet(sortOps, 0, keycount * sizeof(Oid));
+ *sortOperators = sortOps;
+ attNos = (AttrNumber *) palloc(keycount * sizeof(AttrNumber));
+ MemSet(attNos, 0, keycount * sizeof(AttrNumber));
+ *attNums = attNos;
/*
- * form each scan key from the resdom info in the target list
+ * extract info from the resdom nodes in the target list
*/
foreach(tl, targetList)
{
TargetEntry *target = (TargetEntry *) lfirst(tl);
-
- resdom = target->resdom;
- resno = resdom->resno;
- reskey = resdom->reskey;
- reskeyop = resdom->reskeyop;
+ Resdom *resdom = target->resdom;
+ Index reskey = resdom->reskey;
if (reskey > 0) /* ignore TLEs that are not sort keys */
{
- ScanKeyEntryInitialize(&sortkeys[reskey - 1],
- 0x0,
- resno,
- (RegProcedure) reskeyop,
- (Datum) 0);
+ Assert(reskey <= keycount);
+ sortOps[reskey - 1] = resdom->reskeyop;
+ attNos[reskey - 1] = resdom->resno;
}
}
-
- return sortkeys;
}
/* ----------------------------------------------------------------
@@ -124,8 +121,8 @@ ExecSort(Sort *node)
{
Plan *outerNode;
TupleDesc tupDesc;
- int keycount;
- ScanKey sortkeys;
+ Oid *sortOperators;
+ AttrNumber *attNums;
SO1_printf("ExecSort: %s\n",
"sorting subplan");
@@ -145,14 +142,17 @@ ExecSort(Sort *node)
outerNode = outerPlan((Plan *) node);
tupDesc = ExecGetTupType(outerNode);
- keycount = node->keycount;
- sortkeys = (ScanKey) sortstate->sort_Keys;
- tuplesortstate = tuplesort_begin_heap(tupDesc, keycount, sortkeys,
- true /* randomAccess */ );
+ ExtractSortKeys(node, &sortOperators, &attNums);
+ tuplesortstate = tuplesort_begin_heap(tupDesc, node->keycount,
+ sortOperators, attNums,
+ true /* randomAccess */ );
sortstate->tuplesortstate = (void *) tuplesortstate;
+ pfree(sortOperators);
+ pfree(attNums);
+
/*
* Scan the subplan and feed all the tuples to tuplesort.
*/
@@ -230,7 +230,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent)
*/
sortstate = makeNode(SortState);
sortstate->sort_Done = false;
- sortstate->sort_Keys = NULL;
sortstate->tuplesortstate = NULL;
node->sortstate = sortstate;
@@ -259,11 +258,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent)
ExecInitNode(outerPlan, estate, (Plan *) node);
/*
- * initialize sortstate information
- */
- sortstate->sort_Keys = FormSortKeys(node);
-
- /*
* initialize tuple type. no need to initialize projection info
* because this node doesn't do projections.
*/
@@ -321,9 +315,6 @@ ExecEndSort(Sort *node)
tuplesort_end((Tuplesortstate *) sortstate->tuplesortstate);
sortstate->tuplesortstate = NULL;
- if (sortstate->sort_Keys != NULL)
- pfree(sortstate->sort_Keys);
-
pfree(sortstate);
node->sortstate = NULL;
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c
index ad50630931e..ee5a803b802 100644
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.140 2001/03/22 06:16:14 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.141 2001/05/07 00:43:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1378,8 +1378,8 @@ _copyRestrictInfo(RestrictInfo *from)
newnode->left_pathkey = NIL;
newnode->right_pathkey = NIL;
newnode->hashjoinoperator = from->hashjoinoperator;
- newnode->left_dispersion = from->left_dispersion;
- newnode->right_dispersion = from->right_dispersion;
+ newnode->left_bucketsize = from->left_bucketsize;
+ newnode->right_bucketsize = from->right_bucketsize;
return newnode;
}
@@ -2209,11 +2209,12 @@ _copyVacuumStmt(VacuumStmt *from)
{
VacuumStmt *newnode = makeNode(VacuumStmt);
- newnode->verbose = from->verbose;
+ newnode->vacuum = from->vacuum;
newnode->analyze = from->analyze;
+ newnode->verbose = from->verbose;
if (from->vacrel)
newnode->vacrel = pstrdup(from->vacrel);
- Node_Copy(from, newnode, va_spec);
+ Node_Copy(from, newnode, va_cols);
return newnode;
}
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index 06ee63bbacd..284a534aa96 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -20,7 +20,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.88 2001/03/22 03:59:31 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.89 2001/05/07 00:43:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -516,7 +516,7 @@ _equalRestrictInfo(RestrictInfo *a, RestrictInfo *b)
return false;
/*
- * ignore eval_cost, left/right_pathkey, and left/right_dispersion,
+ * ignore eval_cost, left/right_pathkey, and left/right_bucketsize,
* since they may not be set yet, and should be derivable from the
* clause anyway
*/
@@ -1113,13 +1113,15 @@ _equalDropdbStmt(DropdbStmt *a, DropdbStmt *b)
static bool
_equalVacuumStmt(VacuumStmt *a, VacuumStmt *b)
{
- if (a->verbose != b->verbose)
+ if (a->vacuum != b->vacuum)
return false;
if (a->analyze != b->analyze)
return false;
+ if (a->verbose != b->verbose)
+ return false;
if (!equalstr(a->vacrel, b->vacrel))
return false;
- if (!equal(a->va_spec, b->va_spec))
+ if (!equal(a->va_cols, b->va_cols))
return false;
return true;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c
index 9a071e7a250..4c0c1b03ef5 100644
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.107 2001/03/22 03:59:32 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.108 2001/05/07 00:43:19 tgl Exp $
*
* NOTES
* Most of the read functions for plan nodes are tested. (In fact, they
@@ -1874,11 +1874,11 @@ _readRestrictInfo(void)
/* eval_cost is not part of saved representation; compute on first use */
local_node->eval_cost = -1;
- /* ditto for cached pathkeys and dispersion */
+ /* ditto for cached pathkeys and bucketsize */
local_node->left_pathkey = NIL;
local_node->right_pathkey = NIL;
- local_node->left_dispersion = -1;
- local_node->right_dispersion = -1;
+ local_node->left_bucketsize = -1;
+ local_node->right_bucketsize = -1;
return local_node;
}
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index c52af72a16b..bdfbbb18186 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -41,7 +41,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.70 2001/04/25 22:04:37 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.71 2001/05/07 00:43:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -50,11 +50,15 @@
#include <math.h>
+#include "catalog/pg_statistic.h"
#include "executor/nodeHash.h"
#include "miscadmin.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
+#include "parser/parsetree.h"
#include "utils/lsyscache.h"
+#include "utils/syscache.h"
/*
@@ -573,7 +577,7 @@ cost_mergejoin(Path *path,
* 'outer_path' is the path for the outer relation
* 'inner_path' is the path for the inner relation
* 'restrictlist' are the RestrictInfo nodes to be applied at the join
- * 'innerdispersion' is an estimate of the dispersion statistic
+ * 'innerbucketsize' is an estimate of the bucketsize statistic
* for the inner hash key.
*/
void
@@ -581,7 +585,7 @@ cost_hashjoin(Path *path,
Path *outer_path,
Path *inner_path,
List *restrictlist,
- Selectivity innerdispersion)
+ Selectivity innerbucketsize)
{
Cost startup_cost = 0;
Cost run_cost = 0;
@@ -607,22 +611,20 @@ cost_hashjoin(Path *path,
/*
* The number of tuple comparisons needed is the number of outer
- * tuples times the typical hash bucket size. nodeHash.c tries for
- * average bucket loading of NTUP_PER_BUCKET, but that goal will be
- * reached only if data values are uniformly distributed among the
- * buckets. To be conservative, we scale up the target bucket size by
- * the number of inner rows times inner dispersion, giving an estimate
- * of the typical number of duplicates of each value. We then charge
- * one cpu_operator_cost per tuple comparison.
+ * tuples times the typical number of tuples in a hash bucket,
+ * which is the inner relation size times its bucketsize fraction.
+ * We charge one cpu_operator_cost per tuple comparison.
*/
run_cost += cpu_operator_cost * outer_path->parent->rows *
- NTUP_PER_BUCKET * ceil(inner_path->parent->rows * innerdispersion);
+ ceil(inner_path->parent->rows * innerbucketsize);
/*
* Estimate the number of tuples that get through the hashing filter
* as one per tuple in the two source relations. This could be a
* drastic underestimate if there are many equal-keyed tuples in
- * either relation, but we have no good way of estimating that...
+ * either relation, but we have no simple way of estimating that;
+ * and since this is only a second-order parameter, it's probably
+ * not worth expending a lot of effort on the estimate.
*/
ntuples = outer_path->parent->rows + inner_path->parent->rows;
@@ -651,7 +653,7 @@ cost_hashjoin(Path *path,
/*
* Bias against putting larger relation on inside. We don't want an
* absolute prohibition, though, since larger relation might have
- * better dispersion --- and we can't trust the size estimates
+ * better bucketsize --- and we can't trust the size estimates
* unreservedly, anyway. Instead, inflate the startup cost by the
* square root of the size ratio. (Why square root? No real good
* reason, but it seems reasonable...)
@@ -663,6 +665,171 @@ cost_hashjoin(Path *path,
path->total_cost = startup_cost + run_cost;
}
+/*
+ * Estimate hash bucketsize fraction (ie, number of entries in a bucket
+ * divided by total tuples in relation) if the specified Var is used
+ * as a hash key.
+ *
+ * This statistic is used by cost_hashjoin. We split out the calculation
+ * because it's useful to cache the result for re-use across multiple path
+ * cost calculations.
+ *
+ * XXX This is really pretty bogus since we're effectively assuming that the
+ * distribution of hash keys will be the same after applying restriction
+ * clauses as it was in the underlying relation. However, we are not nearly
+ * smart enough to figure out how the restrict clauses might change the
+ * distribution, so this will have to do for now.
+ *
+ * The executor tries for average bucket loading of NTUP_PER_BUCKET by setting
+ * number of buckets equal to ntuples / NTUP_PER_BUCKET, which would yield
+ * a bucketsize fraction of NTUP_PER_BUCKET / ntuples. But that goal will
+ * be reached only if the data values are uniformly distributed among the
+ * buckets, which requires (a) at least ntuples / NTUP_PER_BUCKET distinct
+ * data values, and (b) a not-too-skewed data distribution. Otherwise the
+ * buckets will be nonuniformly occupied. If the other relation in the join
+ * has a similar distribution, the most-loaded buckets are exactly those
+ * that will be probed most often. Therefore, the "average" bucket size for
+ * costing purposes should really be taken as something close to the "worst
+ * case" bucket size. We try to estimate this by first scaling up if there
+ * are too few distinct data values, and then scaling up again by the
+ * ratio of the most common value's frequency to the average frequency.
+ *
+ * If no statistics are available, use a default estimate of 0.1. This will
+ * discourage use of a hash rather strongly if the inner relation is large,
+ * which is what we want. We do not want to hash unless we know that the
+ * inner rel is well-dispersed (or the alternatives seem much worse).
+ */
+Selectivity
+estimate_hash_bucketsize(Query *root, Var *var)
+{
+ Oid relid;
+ RelOptInfo *rel;
+ HeapTuple tuple;
+ Form_pg_statistic stats;
+ double estfract,
+ ndistinct,
+ needdistinct,
+ mcvfreq,
+ avgfreq;
+ float4 *numbers;
+ int nnumbers;
+
+ /*
+ * Lookup info about var's relation and attribute;
+ * if none available, return default estimate.
+ */
+ if (!IsA(var, Var))
+ return 0.1;
+
+ relid = getrelid(var->varno, root->rtable);
+ if (relid == InvalidOid)
+ return 0.1;
+
+ rel = get_base_rel(root, var->varno);
+
+ if (rel->tuples <= 0.0 || rel->rows <= 0.0)
+ return 0.1; /* ensure we can divide below */
+
+ tuple = SearchSysCache(STATRELATT,
+ ObjectIdGetDatum(relid),
+ Int16GetDatum(var->varattno),
+ 0, 0);
+ if (!HeapTupleIsValid(tuple))
+ {
+ /*
+ * Perhaps the Var is a system attribute; if so, it will have no
+ * entry in pg_statistic, but we may be able to guess something
+ * about its distribution anyway.
+ */
+ switch (var->varattno)
+ {
+ case ObjectIdAttributeNumber:
+ case SelfItemPointerAttributeNumber:
+ /* these are unique, so buckets should be well-distributed */
+ return (double) NTUP_PER_BUCKET / rel->rows;
+ case TableOidAttributeNumber:
+ /* hashing this is a terrible idea... */
+ return 1.0;
+ }
+ return 0.1;
+ }
+ stats = (Form_pg_statistic) GETSTRUCT(tuple);
+
+ /*
+ * Obtain number of distinct data values in raw relation.
+ */
+ ndistinct = stats->stadistinct;
+ if (ndistinct < 0.0)
+ ndistinct = -ndistinct * rel->tuples;
+
+ /*
+ * Adjust ndistinct to account for restriction clauses. Observe we are
+ * assuming that the data distribution is affected uniformly by the
+ * restriction clauses!
+ *
+ * XXX Possibly better way, but much more expensive: multiply by
+ * selectivity of rel's restriction clauses that mention the target Var.
+ */
+ ndistinct *= rel->rows / rel->tuples;
+
+ /*
+ * Discourage use of hash join if there seem not to be very many distinct
+ * data values. The threshold here is somewhat arbitrary, as is the
+ * fraction used to "discourage" the choice.
+ */
+ if (ndistinct < 50.0)
+ {
+ ReleaseSysCache(tuple);
+ return 0.5;
+ }
+
+ /*
+ * Form initial estimate of bucketsize fraction. Here we use rel->rows,
+ * ie the number of rows after applying restriction clauses, because
+ * that's what the fraction will eventually be multiplied by in
+ * cost_heapjoin.
+ */
+ estfract = (double) NTUP_PER_BUCKET / rel->rows;
+
+ /*
+ * Adjust estimated bucketsize if too few distinct values to fill
+ * all the buckets.
+ */
+ needdistinct = rel->rows / (double) NTUP_PER_BUCKET;
+ if (ndistinct < needdistinct)
+ estfract *= needdistinct / ndistinct;
+
+ /*
+ * Look up the frequency of the most common value, if available.
+ */
+ mcvfreq = 0.0;
+
+ if (get_attstatsslot(tuple, var->vartype, var->vartypmod,
+ STATISTIC_KIND_MCV, InvalidOid,
+ NULL, NULL, &numbers, &nnumbers))
+ {
+ /*
+ * The first MCV stat is for the most common value.
+ */
+ if (nnumbers > 0)
+ mcvfreq = numbers[0];
+ free_attstatsslot(var->vartype, NULL, 0,
+ numbers, nnumbers);
+ }
+
+ /*
+ * Adjust estimated bucketsize upward to account for skewed distribution.
+ */
+ avgfreq = (1.0 - stats->stanullfrac) / ndistinct;
+
+ if (avgfreq > 0.0 && mcvfreq > avgfreq)
+ estfract *= mcvfreq / avgfreq;
+
+ ReleaseSysCache(tuple);
+
+ return (Selectivity) estfract;
+}
+
/*
* cost_qual_eval
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index d41336ddcee..cd7cabd41de 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,15 +8,15 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.63 2001/04/15 00:48:17 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.64 2001/05/07 00:43:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
+#include "postgres.h"
+
#include <sys/types.h>
#include <math.h>
-#include "postgres.h"
-
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
@@ -45,7 +45,6 @@ static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
List *restrictlist, JoinType jointype);
static Path *best_innerjoin(List *join_paths, List *outer_relid,
JoinType jointype);
-static Selectivity estimate_dispersion(Query *root, Var *var);
static List *select_mergejoin_clauses(RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
@@ -722,7 +721,7 @@ hash_inner_and_outer(Query *root,
Expr *clause;
Var *left,
*right;
- Selectivity innerdispersion;
+ Selectivity innerbucketsize;
List *hashclauses;
if (restrictinfo->hashjoinoperator == InvalidOid)
@@ -742,34 +741,34 @@ hash_inner_and_outer(Query *root,
/*
* Check if clause is usable with these sub-rels, find inner side,
- * estimate dispersion of inner var for costing purposes.
+ * estimate bucketsize of inner var for costing purposes.
*
* Since we tend to visit the same clauses over and over when
- * planning a large query, we cache the dispersion estimates in
+ * planning a large query, we cache the bucketsize estimates in
* the RestrictInfo node to avoid repeated lookups of statistics.
*/
if (intMember(left->varno, outerrelids) &&
intMember(right->varno, innerrelids))
{
/* righthand side is inner */
- innerdispersion = restrictinfo->right_dispersion;
- if (innerdispersion < 0)
+ innerbucketsize = restrictinfo->right_bucketsize;
+ if (innerbucketsize < 0)
{
/* not cached yet */
- innerdispersion = estimate_dispersion(root, right);
- restrictinfo->right_dispersion = innerdispersion;
+ innerbucketsize = estimate_hash_bucketsize(root, right);
+ restrictinfo->right_bucketsize = innerbucketsize;
}
}
else if (intMember(left->varno, innerrelids) &&
intMember(right->varno, outerrelids))
{
/* lefthand side is inner */
- innerdispersion = restrictinfo->left_dispersion;
- if (innerdispersion < 0)
+ innerbucketsize = restrictinfo->left_bucketsize;
+ if (innerbucketsize < 0)
{
/* not cached yet */
- innerdispersion = estimate_dispersion(root, left);
- restrictinfo->left_dispersion = innerdispersion;
+ innerbucketsize = estimate_hash_bucketsize(root, left);
+ restrictinfo->left_bucketsize = innerbucketsize;
}
}
else
@@ -790,7 +789,7 @@ hash_inner_and_outer(Query *root,
innerrel->cheapest_total_path,
restrictlist,
hashclauses,
- innerdispersion));
+ innerbucketsize));
if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
add_path(joinrel, (Path *)
create_hashjoin_path(joinrel,
@@ -799,7 +798,7 @@ hash_inner_and_outer(Query *root,
innerrel->cheapest_total_path,
restrictlist,
hashclauses,
- innerdispersion));
+ innerbucketsize));
}
}
@@ -867,31 +866,6 @@ best_innerjoin(List *join_paths, Relids outer_relids, JoinType jointype)
}
/*
- * Estimate dispersion of the specified Var
- *
- * We use a default of 0.1 if we can't figure out anything better.
- * This will typically discourage use of a hash rather strongly,
- * if the inner relation is large. We do not want to hash unless
- * we know that the inner rel is well-dispersed (or the alternatives
- * seem much worse).
- */
-static Selectivity
-estimate_dispersion(Query *root, Var *var)
-{
- Oid relid;
-
- if (!IsA(var, Var))
- return 0.1;
-
- relid = getrelid(var->varno, root->rtable);
-
- if (relid == InvalidOid)
- return 0.1;
-
- return (Selectivity) get_attdispersion(relid, var->varattno, 0.1);
-}
-
-/*
* select_mergejoin_clauses
* Select mergejoin clauses that are usable for a particular join.
* Returns a list of RestrictInfo nodes for those clauses.
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 8c3b00289d3..2d264c46881 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,14 +10,14 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.104 2001/03/22 03:59:36 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.105 2001/05/07 00:43:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
-#include <sys/types.h>
-
#include "postgres.h"
+#include <sys/types.h>
+
#include "catalog/pg_index.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
@@ -1484,9 +1484,9 @@ make_sort_from_pathkeys(List *tlist, Plan *lefttree, List *pathkeys)
*/
if (resdom->reskey == 0)
{
- /* OK, mark it as a sort key and set the sort operator regproc */
+ /* OK, mark it as a sort key and set the sort operator */
resdom->reskey = ++numsortkeys;
- resdom->reskeyop = get_opcode(pathkey->sortop);
+ resdom->reskeyop = pathkey->sortop;
}
}
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 7c3e15a8f88..5d67e02dacb 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -8,13 +8,14 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.59 2001/04/16 19:44:10 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.60 2001/05/07 00:43:21 tgl Exp $
*
*-------------------------------------------------------------------------
*/
+#include "postgres.h"
+
#include <sys/types.h>
-#include "postgres.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
#include "nodes/makefuncs.h"
@@ -348,8 +349,8 @@ distribute_qual_to_rels(Query *root, Node *clause,
restrictinfo->left_pathkey = NIL; /* not computable yet */
restrictinfo->right_pathkey = NIL;
restrictinfo->hashjoinoperator = InvalidOid;
- restrictinfo->left_dispersion = -1; /* not computed until needed */
- restrictinfo->right_dispersion = -1;
+ restrictinfo->left_bucketsize = -1; /* not computed until needed */
+ restrictinfo->right_bucketsize = -1;
/*
* Retrieve all relids and vars contained within the clause.
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index b2ab4600209..0aba4808c16 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.105 2001/04/30 19:24:47 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.106 2001/05/07 00:43:21 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1367,7 +1367,7 @@ make_groupplan(List *group_tlist,
{
/* OK, insert the ordering info needed by the executor. */
resdom->reskey = ++keyno;
- resdom->reskeyop = get_opcode(grpcl->sortop);
+ resdom->reskeyop = grpcl->sortop;
}
}
@@ -1412,7 +1412,7 @@ make_sortplan(List *tlist, Plan *plannode, List *sortcls)
{
/* OK, insert the ordering info needed by the executor. */
resdom->reskey = ++keyno;
- resdom->reskeyop = get_opcode(sortcl->sortop);
+ resdom->reskeyop = sortcl->sortop;
}
}
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 0b173466cf9..ede4159d970 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -14,7 +14,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.62 2001/03/27 18:02:19 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.63 2001/05/07 00:43:22 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -682,8 +682,8 @@ adjust_inherited_attrs_mutator(Node *node,
newinfo->eval_cost = -1; /* reset this too */
newinfo->left_pathkey = NIL; /* and these */
newinfo->right_pathkey = NIL;
- newinfo->left_dispersion = -1;
- newinfo->right_dispersion = -1;
+ newinfo->left_bucketsize = -1;
+ newinfo->right_bucketsize = -1;
return (Node *) newinfo;
}
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index cfba3ee395f..407c132b4f7 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,14 +8,14 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.71 2001/03/22 03:59:39 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.72 2001/05/07 00:43:22 tgl Exp $
*
*-------------------------------------------------------------------------
*/
-#include <math.h>
-
#include "postgres.h"
+#include <math.h>
+
#include "nodes/plannodes.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
@@ -559,7 +559,7 @@ create_mergejoin_path(RelOptInfo *joinrel,
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'hashclauses' is a list of the hash join clause (always a 1-element list)
* (this should be a subset of the restrict_clauses list)
- * 'innerdispersion' is an estimate of the dispersion of the inner hash key
+ * 'innerbucketsize' is an estimate of the bucketsize of the inner hash key
*
*/
HashPath *
@@ -569,7 +569,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
Path *inner_path,
List *restrict_clauses,
List *hashclauses,
- Selectivity innerdispersion)
+ Selectivity innerbucketsize)
{
HashPath *pathnode = makeNode(HashPath);
@@ -587,7 +587,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
outer_path,
inner_path,
restrict_clauses,
- innerdispersion);
+ innerbucketsize);
return pathnode;
}
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 4f711df203c..ee3523553e8 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -9,11 +9,10 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.64 2001/03/22 03:59:40 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.65 2001/05/07 00:43:22 tgl Exp $
*
*-------------------------------------------------------------------------
*/
-
#include "postgres.h"
#include <math.h>
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 4687a559962..76cc095bc4e 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.183 2001/03/22 06:16:15 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.184 2001/05/07 00:43:22 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -2660,7 +2660,7 @@ transformForUpdate(Query *qry, List *forUpdate)
/* just the named tables */
foreach(l, forUpdate)
{
- char *relname = lfirst(l);
+ char *relname = strVal(lfirst(l));
i = 0;
foreach(rt, qry->rtable)
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index bed0ce239a4..40c379aca51 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.222 2001/05/01 01:36:10 thomas Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.223 2001/05/07 00:43:23 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@@ -104,7 +104,6 @@ static void doNegateFloat(Value *v);
char *str;
bool boolean;
JoinType jtype;
- InhOption inhOpt;
List *list;
Node *node;
Value *value;
@@ -130,6 +129,7 @@ static void doNegateFloat(Value *v);
%type <node> stmt,
AlterGroupStmt, AlterSchemaStmt, AlterTableStmt, AlterUserStmt,
+ AnalyzeStmt,
ClosePortalStmt, ClusterStmt, CommentStmt, ConstraintsSetStmt,
CopyStmt, CreateAsStmt, CreateGroupStmt, CreatePLangStmt,
CreateSchemaStmt, CreateSeqStmt, CreateStmt, CreateTrigStmt,
@@ -147,7 +147,7 @@ static void doNegateFloat(Value *v);
%type <node> select_no_parens, select_with_parens, select_clause,
simple_select
-%type <node> alter_column_action
+%type <node> alter_column_default
%type <ival> drop_behavior
%type <list> createdb_opt_list, createdb_opt_item
@@ -185,7 +185,7 @@ static void doNegateFloat(Value *v);
OptTableElementList, OptInherit, definition, opt_distinct,
opt_with, func_args, func_args_list, func_as,
oper_argtypes, RuleActionList, RuleActionMulti,
- opt_column_list, columnList, opt_va_list, va_list,
+ opt_column_list, columnList, opt_name_list,
sort_clause, sortby_list, index_params, index_list, name_list,
from_clause, from_list, opt_array_bounds,
expr_list, attrs, target_list, update_target_list,
@@ -210,9 +210,7 @@ static void doNegateFloat(Value *v);
%type <node> substr_from, substr_for
%type <boolean> opt_binary, opt_using, opt_instead, opt_cursor
-%type <boolean> opt_with_copy, index_opt_unique, opt_verbose, opt_analyze
-
-%type <inhOpt> opt_inh_star, opt_only
+%type <boolean> opt_with_copy, index_opt_unique, opt_verbose, analyze_keyword
%type <ival> copy_dirn, direction, reindex_type, drop_type,
opt_column, event, comment_type, comment_cl,
@@ -350,7 +348,8 @@ static void doNegateFloat(Value *v);
NEW, NOCREATEDB, NOCREATEUSER, NONE, NOTHING, NOTIFY, NOTNULL,
OFFSET, OIDS, OPERATOR, OWNER, PASSWORD, PROCEDURAL,
REINDEX, RENAME, RESET, RETURNS, ROW, RULE,
- SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT, STDIN, STDOUT, SYSID,
+ SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT,
+ STATISTICS, STDIN, STDOUT, SYSID,
TEMP, TEMPLATE, TOAST, TRUNCATE, TRUSTED,
UNLISTEN, UNTIL, VACUUM, VALID, VERBOSE, VERSION
@@ -470,6 +469,7 @@ stmt : AlterSchemaStmt
| CreatedbStmt
| DropdbStmt
| VacuumStmt
+ | AnalyzeStmt
| VariableSetStmt
| VariableShowStmt
| VariableResetStmt
@@ -938,57 +938,68 @@ CheckPointStmt: CHECKPOINT
*****************************************************************************/
AlterTableStmt:
-/* ALTER TABLE <name> ADD [COLUMN] <coldef> */
- ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef
+/* ALTER TABLE <relation> ADD [COLUMN] <coldef> */
+ ALTER TABLE relation_expr ADD opt_column columnDef
{
AlterTableStmt *n = makeNode(AlterTableStmt);
n->subtype = 'A';
- n->relname = $3;
- n->inhOpt = $4;
- n->def = $7;
+ n->relname = $3->relname;
+ n->inhOpt = $3->inhOpt;
+ n->def = $6;
$$ = (Node *)n;
}
-/* ALTER TABLE <name> ALTER [COLUMN] <colname> {SET DEFAULT <expr>|DROP DEFAULT} */
- | ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId alter_column_action
+/* ALTER TABLE <relation> ALTER [COLUMN] <colname> {SET DEFAULT <expr>|DROP DEFAULT} */
+ | ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default
{
AlterTableStmt *n = makeNode(AlterTableStmt);
n->subtype = 'T';
- n->relname = $3;
- n->inhOpt = $4;
- n->name = $7;
- n->def = $8;
+ n->relname = $3->relname;
+ n->inhOpt = $3->inhOpt;
+ n->name = $6;
+ n->def = $7;
$$ = (Node *)n;
}
-/* ALTER TABLE <name> DROP [COLUMN] <name> {RESTRICT|CASCADE} */
- | ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior
+/* ALTER TABLE <relation> ALTER [COLUMN] <colname> SET STATISTICS <Iconst> */
+ | ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst
+ {
+ AlterTableStmt *n = makeNode(AlterTableStmt);
+ n->subtype = 'S';
+ n->relname = $3->relname;
+ n->inhOpt = $3->inhOpt;
+ n->name = $6;
+ n->def = (Node *) makeInteger($9);
+ $$ = (Node *)n;
+ }
+/* ALTER TABLE <relation> DROP [COLUMN] <colname> {RESTRICT|CASCADE} */
+ | ALTER TABLE relation_expr DROP opt_column ColId drop_behavior
{
AlterTableStmt *n = makeNode(AlterTableStmt);
n->subtype = 'D';
- n->relname = $3;
- n->inhOpt = $4;
- n->name = $7;
- n->behavior = $8;
+ n->relname = $3->relname;
+ n->inhOpt = $3->inhOpt;
+ n->name = $6;
+ n->behavior = $7;
$$ = (Node *)n;
}
-/* ALTER TABLE <name> ADD CONSTRAINT ... */
- | ALTER TABLE relation_name opt_inh_star ADD TableConstraint
+/* ALTER TABLE <relation> ADD CONSTRAINT ... */
+ | ALTER TABLE relation_expr ADD TableConstraint
{
AlterTableStmt *n = makeNode(AlterTableStmt);
n->subtype = 'C';
- n->relname = $3;
- n->inhOpt = $4;
- n->def = $6;
+ n->relname = $3->relname;
+ n->inhOpt = $3->inhOpt;
+ n->def = $5;
$$ = (Node *)n;
}
-/* ALTER TABLE <name> DROP CONSTRAINT <name> {RESTRICT|CASCADE} */
- | ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior
+/* ALTER TABLE <relation> DROP CONSTRAINT <name> {RESTRICT|CASCADE} */
+ | ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior
{
AlterTableStmt *n = makeNode(AlterTableStmt);
n->subtype = 'X';
- n->relname = $3;
- n->inhOpt = $4;
- n->name = $7;
- n->behavior = $8;
+ n->relname = $3->relname;
+ n->inhOpt = $3->inhOpt;
+ n->name = $6;
+ n->behavior = $7;
$$ = (Node *)n;
}
/* ALTER TABLE <name> CREATE TOAST TABLE */
@@ -997,6 +1008,7 @@ AlterTableStmt:
AlterTableStmt *n = makeNode(AlterTableStmt);
n->subtype = 'E';
n->relname = $3;
+ n->inhOpt = INH_NO;
$$ = (Node *)n;
}
/* ALTER TABLE <name> OWNER TO UserId */
@@ -1005,12 +1017,13 @@ AlterTableStmt:
AlterTableStmt *n = makeNode(AlterTableStmt);
n->subtype = 'U';
n->relname = $3;
+ n->inhOpt = INH_NO;
n->name = $6;
$$ = (Node *)n;
}
;
-alter_column_action:
+alter_column_default:
SET DEFAULT a_expr
{
/* Treat SET DEFAULT NULL the same as DROP DEFAULT */
@@ -1478,10 +1491,6 @@ key_reference: NO ACTION { $$ = FKCONSTR_ON_KEY_NOACTION; }
| SET DEFAULT { $$ = FKCONSTR_ON_KEY_SETDEFAULT; }
;
-opt_only: ONLY { $$ = INH_NO; }
- | /*EMPTY*/ { $$ = INH_DEFAULT; }
- ;
-
OptInherit: INHERITS '(' relation_name_list ')' { $$ = $3; }
| /*EMPTY*/ { $$ = NIL; }
;
@@ -2598,14 +2607,13 @@ opt_force: FORCE { $$ = TRUE; }
*
*****************************************************************************/
-RenameStmt: ALTER TABLE relation_name opt_inh_star
- RENAME opt_column opt_name TO name
+RenameStmt: ALTER TABLE relation_expr RENAME opt_column opt_name TO name
{
RenameStmt *n = makeNode(RenameStmt);
- n->relname = $3;
- n->inhOpt = $4;
- n->column = $7;
- n->newname = $9;
+ n->relname = $3->relname;
+ n->inhOpt = $3->inhOpt;
+ n->column = $6;
+ n->newname = $8;
$$ = (Node *)n;
}
;
@@ -2994,49 +3002,71 @@ ClusterStmt: CLUSTER index_name ON relation_name
*
* QUERY:
* vacuum
+ * analyze
*
*****************************************************************************/
-VacuumStmt: VACUUM opt_verbose opt_analyze
+VacuumStmt: VACUUM opt_verbose
{
VacuumStmt *n = makeNode(VacuumStmt);
+ n->vacuum = true;
+ n->analyze = false;
n->verbose = $2;
- n->analyze = $3;
n->vacrel = NULL;
- n->va_spec = NIL;
+ n->va_cols = NIL;
$$ = (Node *)n;
}
- | VACUUM opt_verbose opt_analyze relation_name opt_va_list
+ | VACUUM opt_verbose relation_name
{
VacuumStmt *n = makeNode(VacuumStmt);
+ n->vacuum = true;
+ n->analyze = false;
n->verbose = $2;
- n->analyze = $3;
- n->vacrel = $4;
- n->va_spec = $5;
- if ( $5 != NIL && !$4 )
- elog(ERROR,"VACUUM syntax error at or near \"(\""
- "\n\tRelation name must be specified");
+ n->vacrel = $3;
+ n->va_cols = NIL;
+ $$ = (Node *)n;
+ }
+ | VACUUM opt_verbose AnalyzeStmt
+ {
+ VacuumStmt *n = (VacuumStmt *) $3;
+ n->vacuum = true;
+ n->verbose |= $2;
$$ = (Node *)n;
}
;
-opt_verbose: VERBOSE { $$ = TRUE; }
- | /*EMPTY*/ { $$ = FALSE; }
+AnalyzeStmt: analyze_keyword opt_verbose
+ {
+ VacuumStmt *n = makeNode(VacuumStmt);
+ n->vacuum = false;
+ n->analyze = true;
+ n->verbose = $2;
+ n->vacrel = NULL;
+ n->va_cols = NIL;
+ $$ = (Node *)n;
+ }
+ | analyze_keyword opt_verbose relation_name opt_name_list
+ {
+ VacuumStmt *n = makeNode(VacuumStmt);
+ n->vacuum = false;
+ n->analyze = true;
+ n->verbose = $2;
+ n->vacrel = $3;
+ n->va_cols = $4;
+ $$ = (Node *)n;
+ }
;
-opt_analyze: ANALYZE { $$ = TRUE; }
+analyze_keyword: ANALYZE { $$ = TRUE; }
| ANALYSE /* British */ { $$ = TRUE; }
- | /*EMPTY*/ { $$ = FALSE; }
;
-opt_va_list: '(' va_list ')' { $$ = $2; }
- | /*EMPTY*/ { $$ = NIL; }
+opt_verbose: VERBOSE { $$ = TRUE; }
+ | /*EMPTY*/ { $$ = FALSE; }
;
-va_list: name
- { $$ = makeList1($1); }
- | va_list ',' name
- { $$ = lappend($1, $3); }
+opt_name_list: '(' name_list ')' { $$ = $2; }
+ | /*EMPTY*/ { $$ = NIL; }
;
@@ -3160,12 +3190,12 @@ columnElem: ColId opt_indirection
*
*****************************************************************************/
-DeleteStmt: DELETE FROM opt_only relation_name where_clause
+DeleteStmt: DELETE FROM relation_expr where_clause
{
DeleteStmt *n = makeNode(DeleteStmt);
- n->inhOpt = $3;
- n->relname = $4;
- n->whereClause = $5;
+ n->relname = $3->relname;
+ n->inhOpt = $3->inhOpt;
+ n->whereClause = $4;
$$ = (Node *)n;
}
;
@@ -3202,17 +3232,17 @@ opt_lmode: SHARE { $$ = TRUE; }
*
*****************************************************************************/
-UpdateStmt: UPDATE opt_only relation_name
+UpdateStmt: UPDATE relation_expr
SET update_target_list
from_clause
where_clause
{
UpdateStmt *n = makeNode(UpdateStmt);
- n->inhOpt = $2;
- n->relname = $3;
- n->targetList = $5;
- n->fromClause = $6;
- n->whereClause = $7;
+ n->relname = $2->relname;
+ n->inhOpt = $2->inhOpt;
+ n->targetList = $4;
+ n->fromClause = $5;
+ n->whereClause = $6;
$$ = (Node *)n;
}
;
@@ -3545,10 +3575,6 @@ select_offset_value: Iconst
* ...however, recursive addattr and rename supported. make special
* cases for these.
*/
-opt_inh_star: '*' { $$ = INH_YES; }
- | /*EMPTY*/ { $$ = INH_DEFAULT; }
- ;
-
relation_name_list: name_list;
name_list: name
@@ -3576,7 +3602,7 @@ opt_for_update_clause: for_update_clause { $$ = $1; }
| /* EMPTY */ { $$ = NULL; }
;
-update_list: OF va_list { $$ = $2; }
+update_list: OF name_list { $$ = $2; }
| /* EMPTY */ { $$ = makeList1(NULL); }
;
@@ -5525,6 +5551,7 @@ TokenId: ABSOLUTE { $$ = "absolute"; }
| SHARE { $$ = "share"; }
| START { $$ = "start"; }
| STATEMENT { $$ = "statement"; }
+ | STATISTICS { $$ = "statistics"; }
| STDIN { $$ = "stdin"; }
| STDOUT { $$ = "stdout"; }
| SYSID { $$ = "sysid"; }
diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c
index 402dbfd28ca..8ab19f86ae8 100644
--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.90 2001/03/22 03:59:40 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.91 2001/05/07 00:43:23 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = {
{"some", SOME},
{"start", START},
{"statement", STATEMENT},
+ {"statistics", STATISTICS},
{"stdin", STDIN},
{"stdout", STDOUT},
{"substring", SUBSTRING},
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c
index f5324cb3735..e1d49842fd2 100644
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.54 2001/04/18 17:04:24 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.55 2001/05/07 00:43:23 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -75,7 +75,7 @@ static struct
}
};
-#define SPECIALS ((int) (sizeof(special_attr)/sizeof(special_attr[0])))
+#define SPECIALS ((int) lengthof(special_attr))
/*
@@ -670,7 +670,7 @@ isForUpdate(ParseState *pstate, char *relname)
foreach(l, pstate->p_forUpdate)
{
- char *rname = lfirst(l);
+ char *rname = strVal(lfirst(l));
if (strcmp(relname, rname) == 0)
return true;
@@ -1020,20 +1020,6 @@ attnameIsSet(Relation rd, char *name)
#endif
-#ifdef NOT_USED
-/*
- * This should only be used if the relation is already
- * heap_open()'ed. Use the cache version
- * for access to non-opened relations.
- */
-int
-attnumAttNelems(Relation rd, int attid)
-{
- return rd->rd_att->attrs[attid - 1]->attnelems;
-}
-
-#endif
-
/* given attribute id, return type of that attribute */
/*
* This should only be used if the relation is already
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index ae6cd20a5db..b616f7e68ef 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.109 2001/03/22 06:16:17 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.110 2001/05/07 00:43:23 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -427,13 +427,19 @@ ProcessUtility(Node *parsetree,
interpretInhOption(stmt->inhOpt),
(ColumnDef *) stmt->def);
break;
- case 'T': /* ALTER COLUMN */
- AlterTableAlterColumn(stmt->relname,
+ case 'T': /* ALTER COLUMN DEFAULT */
+ AlterTableAlterColumnDefault(stmt->relname,
interpretInhOption(stmt->inhOpt),
- stmt->name,
- stmt->def);
+ stmt->name,
+ stmt->def);
break;
- case 'D': /* ALTER DROP */
+ case 'S': /* ALTER COLUMN STATISTICS */
+ AlterTableAlterColumnStatistics(stmt->relname,
+ interpretInhOption(stmt->inhOpt),
+ stmt->name,
+ stmt->def);
+ break;
+ case 'D': /* DROP COLUMN */
AlterTableDropColumn(stmt->relname,
interpretInhOption(stmt->inhOpt),
stmt->name,
@@ -703,12 +709,13 @@ ProcessUtility(Node *parsetree,
break;
case T_VacuumStmt:
- set_ps_display(commandTag = "VACUUM");
+ if (((VacuumStmt *) parsetree)->vacuum)
+ commandTag = "VACUUM";
+ else
+ commandTag = "ANALYZE";
+ set_ps_display(commandTag);
- vacuum(((VacuumStmt *) parsetree)->vacrel,
- ((VacuumStmt *) parsetree)->verbose,
- ((VacuumStmt *) parsetree)->analyze,
- ((VacuumStmt *) parsetree)->va_spec);
+ vacuum((VacuumStmt *) parsetree);
break;
case T_ExplainStmt:
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 1fe0afb0a35..41ba82db7b5 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.87 2001/03/23 04:49:54 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.88 2001/05/07 00:43:23 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -57,9 +57,6 @@
/* default selectivity estimate for pattern-match operators such as LIKE */
#define DEFAULT_MATCH_SEL 0.01
-/* "fudge factor" for estimating frequency of not-most-common values */
-#define NOT_MOST_COMMON_RATIO 0.1
-
static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
Datum lobound, Datum hibound, Oid boundstypid,
double *scaledlobound, double *scaledhibound);
@@ -75,17 +72,9 @@ static double convert_one_string_to_scalar(unsigned char *value,
static unsigned char *convert_string_datum(Datum value, Oid typid);
static double convert_timevalue_to_scalar(Datum value, Oid typid);
static void getattproperties(Oid relid, AttrNumber attnum,
- Oid *typid,
- int *typlen,
- bool *typbyval,
- int32 *typmod);
-static bool getattstatistics(Oid relid, AttrNumber attnum,
- Oid typid, int32 typmod,
- double *nullfrac,
- double *commonfrac,
- Datum *commonval,
- Datum *loval,
- Datum *hival);
+ Oid *typid, int32 *typmod);
+static double get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid,
+ Form_pg_statistic stats);
static Selectivity prefix_selectivity(char *prefix,
Oid relid,
AttrNumber attno,
@@ -115,134 +104,173 @@ eqsel(PG_FUNCTION_ARGS)
AttrNumber attno = PG_GETARG_INT16(2);
Datum value = PG_GETARG_DATUM(3);
int32 flag = PG_GETARG_INT32(4);
- float8 result;
-
- if (NONVALUE(attno) || NONVALUE(relid))
- result = DEFAULT_EQ_SEL;
- else
+ Oid typid;
+ int32 typmod;
+ HeapTuple statsTuple;
+ Datum *values;
+ int nvalues;
+ float4 *numbers;
+ int nnumbers;
+ double selec;
+
+ if (NONVALUE(relid) || NONVALUE(attno))
+ PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
+
+ /* get info about the attribute */
+ getattproperties(relid, attno, &typid, &typmod);
+
+ /* get stats for the attribute, if available */
+ statsTuple = SearchSysCache(STATRELATT,
+ ObjectIdGetDatum(relid),
+ Int16GetDatum(attno),
+ 0, 0);
+ if (HeapTupleIsValid(statsTuple))
{
- Oid typid;
- int typlen;
- bool typbyval;
- int32 typmod;
- double nullfrac;
- double commonfrac;
- Datum commonval;
- double selec;
-
- /* get info about the attribute */
- getattproperties(relid, attno,
- &typid, &typlen, &typbyval, &typmod);
-
- /* get stats for the attribute, if available */
- if (getattstatistics(relid, attno, typid, typmod,
- &nullfrac, &commonfrac, &commonval,
- NULL, NULL))
- {
- if (flag & SEL_CONSTANT)
- {
+ Form_pg_statistic stats;
- /*
- * Is the constant "=" to the column's most common value?
- * (Although the operator may not really be "=", we will
- * assume that seeing whether it returns TRUE for the most
- * common value is useful information. If you don't like
- * it, maybe you shouldn't be using eqsel for your
- * operator...)
- */
- RegProcedure eqproc = get_opcode(opid);
- bool mostcommon;
+ stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
- if (eqproc == (RegProcedure) NULL)
- elog(ERROR, "eqsel: no procedure for operator %u",
- opid);
+ if (flag & SEL_CONSTANT)
+ {
+ bool match = false;
+ int i;
- /* be careful to apply operator right way 'round */
- if (flag & SEL_RIGHT)
- mostcommon = DatumGetBool(OidFunctionCall2(eqproc,
- commonval,
- value));
- else
- mostcommon = DatumGetBool(OidFunctionCall2(eqproc,
- value,
- commonval));
+ /*
+ * Is the constant "=" to any of the column's most common
+ * values? (Although the given operator may not really be
+ * "=", we will assume that seeing whether it returns TRUE
+ * is an appropriate test. If you don't like this, maybe you
+ * shouldn't be using eqsel for your operator...)
+ */
+ if (get_attstatsslot(statsTuple, typid, typmod,
+ STATISTIC_KIND_MCV, InvalidOid,
+ &values, &nvalues,
+ &numbers, &nnumbers))
+ {
+ FmgrInfo eqproc;
- if (mostcommon)
- {
+ fmgr_info(get_opcode(opid), &eqproc);
- /*
- * Constant is "=" to the most common value. We know
- * selectivity exactly (or as exactly as VACUUM could
- * calculate it, anyway).
- */
- selec = commonfrac;
- }
- else
+ for (i = 0; i < nvalues; i++)
{
-
- /*
- * Comparison is against a constant that is neither
- * the most common value nor null. Its selectivity
- * cannot be more than this:
- */
- selec = 1.0 - commonfrac - nullfrac;
- if (selec > commonfrac)
- selec = commonfrac;
-
- /*
- * and in fact it's probably less, so we should apply
- * a fudge factor. The only case where we don't is
- * for a boolean column, where indeed we have
- * estimated the less-common value's frequency
- * exactly!
- */
- if (typid != BOOLOID)
- selec *= NOT_MOST_COMMON_RATIO;
+ /* be careful to apply operator right way 'round */
+ if (flag & SEL_RIGHT)
+ match = DatumGetBool(FunctionCall2(&eqproc,
+ values[i],
+ value));
+ else
+ match = DatumGetBool(FunctionCall2(&eqproc,
+ value,
+ values[i]));
+ if (match)
+ break;
}
}
else
{
+ /* no most-common-value info available */
+ values = NULL;
+ numbers = NULL;
+ i = nvalues = nnumbers = 0;
+ }
+ if (match)
+ {
+ /*
+ * Constant is "=" to this common value. We know
+ * selectivity exactly (or as exactly as VACUUM
+ * could calculate it, anyway).
+ */
+ selec = numbers[i];
+ }
+ else
+ {
/*
- * Search is for a value that we do not know a priori, but
- * we will assume it is not NULL. Selectivity cannot be
- * more than this:
+ * Comparison is against a constant that is neither
+ * NULL nor any of the common values. Its selectivity
+ * cannot be more than this:
*/
- selec = 1.0 - nullfrac;
- if (selec > commonfrac)
- selec = commonfrac;
+ double sumcommon = 0.0;
+ double otherdistinct;
+ for (i = 0; i < nnumbers; i++)
+ sumcommon += numbers[i];
+ selec = 1.0 - sumcommon - stats->stanullfrac;
+ /*
+ * and in fact it's probably a good deal less.
+ * We approximate that all the not-common values
+ * share this remaining fraction equally, so we
+ * divide by the number of other distinct values.
+ */
+ otherdistinct = get_att_numdistinct(relid, attno,
+ typid, stats)
+ - nnumbers;
+ if (otherdistinct > 1)
+ selec /= otherdistinct;
/*
- * and in fact it's probably less, so apply a fudge
- * factor.
+ * Another cross-check: selectivity shouldn't be
+ * estimated as more than the least common
+ * "most common value".
*/
- selec *= NOT_MOST_COMMON_RATIO;
+ if (nnumbers > 0 && selec > numbers[nnumbers-1])
+ selec = numbers[nnumbers-1];
}
- /* result should be in range, but make sure... */
- if (selec < 0.0)
- selec = 0.0;
- else if (selec > 1.0)
- selec = 1.0;
-
- if (!typbyval)
- pfree(DatumGetPointer(commonval));
+ free_attstatsslot(typid, values, nvalues, numbers, nnumbers);
}
else
{
+ double ndistinct;
/*
- * No VACUUM ANALYZE stats available, so make a guess using
- * the dispersion stat (if we have that, which is unlikely for
- * a normal attribute; but for a system attribute we may be
- * able to estimate it).
+ * Search is for a value that we do not know a priori, but
+ * we will assume it is not NULL. Estimate the selectivity
+ * as non-null fraction divided by number of distinct values,
+ * so that we get a result averaged over all possible values
+ * whether common or uncommon. (Essentially, we are assuming
+ * that the not-yet-known comparison value is equally likely
+ * to be any of the possible values, regardless of their
+ * frequency in the table. Is that a good idea?)
+ */
+ selec = 1.0 - stats->stanullfrac;
+ ndistinct = get_att_numdistinct(relid, attno, typid, stats);
+ if (ndistinct > 1)
+ selec /= ndistinct;
+ /*
+ * Cross-check: selectivity should never be
+ * estimated as more than the most common value's.
*/
- selec = get_attdispersion(relid, attno, 0.01);
+ if (get_attstatsslot(statsTuple, typid, typmod,
+ STATISTIC_KIND_MCV, InvalidOid,
+ NULL, NULL,
+ &numbers, &nnumbers))
+ {
+ if (nnumbers > 0 && selec > numbers[0])
+ selec = numbers[0];
+ free_attstatsslot(typid, NULL, 0, numbers, nnumbers);
+ }
}
- result = (float8) selec;
+ ReleaseSysCache(statsTuple);
}
- PG_RETURN_FLOAT8(result);
+ else
+ {
+ /*
+ * No VACUUM ANALYZE stats available, so make a guess using
+ * estimated number of distinct values and assuming they are
+ * equally common. (The guess is unlikely to be very good,
+ * but we do know a few special cases.)
+ */
+ selec = 1.0 / get_att_numdistinct(relid, attno, typid, NULL);
+ }
+
+ /* result should be in range, but make sure... */
+ if (selec < 0.0)
+ selec = 0.0;
+ else if (selec > 1.0)
+ selec = 1.0;
+
+ PG_RETURN_FLOAT8((float8) selec);
}
/*
@@ -301,117 +329,263 @@ scalarltsel(PG_FUNCTION_ARGS)
AttrNumber attno = PG_GETARG_INT16(2);
Datum value = PG_GETARG_DATUM(3);
int32 flag = PG_GETARG_INT32(4);
- float8 result;
+ bool isgt;
+ HeapTuple oprTuple;
+ HeapTuple statsTuple;
+ Form_pg_statistic stats;
+ Oid contype;
+ FmgrInfo opproc;
+ Oid typid;
+ int32 typmod;
+ Datum *values;
+ int nvalues;
+ float4 *numbers;
+ int nnumbers;
+ double mcv_selec,
+ hist_selec,
+ sumcommon;
+ double selec;
+ int i;
+
+ if (NONVALUE(relid) || NONVALUE(attno))
+ PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+
+ /* Can't do anything useful if no constant to compare against, either */
+ if (!(flag & SEL_CONSTANT))
+ PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
- if (!(flag & SEL_CONSTANT) || NONVALUE(attno) || NONVALUE(relid))
- result = DEFAULT_INEQ_SEL;
+ /*
+ * Force the constant to be on the right to simplify later logic.
+ * This means that we may be dealing with either "<" or ">" cases.
+ */
+ if (flag & SEL_RIGHT)
+ {
+ /* we have x < const */
+ isgt = false;
+ }
else
{
- HeapTuple oprtuple;
- Oid ltype,
- rtype,
- contype;
- Oid typid;
- int typlen;
- bool typbyval;
- int32 typmod;
- Datum hival,
- loval;
- double val,
- high,
- low,
- numerator,
- denominator;
-
- /*
- * Get left and right datatypes of the operator so we know what
- * type the constant is.
- */
- oprtuple = SearchSysCache(OPEROID,
- ObjectIdGetDatum(opid),
- 0, 0, 0);
- if (!HeapTupleIsValid(oprtuple))
- elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
- ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
- rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
- contype = (flag & SEL_RIGHT) ? rtype : ltype;
- ReleaseSysCache(oprtuple);
-
- /* Now get info and stats about the attribute */
- getattproperties(relid, attno,
- &typid, &typlen, &typbyval, &typmod);
-
- if (!getattstatistics(relid, attno, typid, typmod,
- NULL, NULL, NULL,
- &loval, &hival))
+ /* we have const < x, commute to make x > const */
+ opid = get_commutator(opid);
+ if (!opid)
{
- /* no stats available, so default result */
+ /* Use default selectivity (should we raise an error instead?) */
PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
}
+ isgt = true;
+ }
- /* Convert the values to a uniform comparison scale. */
- if (!convert_to_scalar(value, contype, &val,
- loval, hival, typid,
- &low, &high))
- {
+ /*
+ * The constant might not be the same datatype as the column;
+ * look at the operator's input types to find out what it is.
+ * Also set up to be able to call the operator's execution proc.
+ */
+ oprTuple = SearchSysCache(OPEROID,
+ ObjectIdGetDatum(opid),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(oprTuple))
+ elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
+ contype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright;
+ fmgr_info(((Form_pg_operator) GETSTRUCT(oprTuple))->oprcode, &opproc);
+ ReleaseSysCache(oprTuple);
+
+ /* Now get info and stats about the attribute */
+ getattproperties(relid, attno, &typid, &typmod);
+
+ statsTuple = SearchSysCache(STATRELATT,
+ ObjectIdGetDatum(relid),
+ Int16GetDatum(attno),
+ 0, 0);
+ if (!HeapTupleIsValid(statsTuple))
+ {
+ /* no stats available, so default result */
+ PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+ }
+ stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
- /*
- * Ideally we'd produce an error here, on the grounds that the
- * given operator shouldn't have scalarltsel registered as its
- * selectivity func unless we can deal with its operand types.
- * But currently, all manner of stuff is invoking scalarltsel,
- * so give a default estimate until that can be fixed.
- */
- if (!typbyval)
- {
- pfree(DatumGetPointer(hival));
- pfree(DatumGetPointer(loval));
- }
- PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
- }
+ /*
+ * If we have most-common-values info, add up the fractions of the
+ * MCV entries that satisfy MCV OP CONST. These fractions contribute
+ * directly to the result selectivity. Also add up the total fraction
+ * represented by MCV entries.
+ */
+ mcv_selec = 0.0;
+ sumcommon = 0.0;
- /* release temp storage if needed */
- if (!typbyval)
+ if (get_attstatsslot(statsTuple, typid, typmod,
+ STATISTIC_KIND_MCV, InvalidOid,
+ &values, &nvalues,
+ &numbers, &nnumbers))
+ {
+ for (i = 0; i < nvalues; i++)
{
- pfree(DatumGetPointer(hival));
- pfree(DatumGetPointer(loval));
+ if (DatumGetBool(FunctionCall2(&opproc,
+ values[i],
+ value)))
+ mcv_selec += numbers[i];
+ sumcommon += numbers[i];
}
+ free_attstatsslot(typid, values, nvalues, numbers, nnumbers);
+ }
+
+ /*
+ * If there is a histogram, determine which bin the constant falls in,
+ * and compute the resulting contribution to selectivity.
+ *
+ * Someday, VACUUM might store more than one histogram per rel/att,
+ * corresponding to more than one possible sort ordering defined for
+ * the column type. However, to make that work we will need to figure
+ * out which staop to search for --- it's not necessarily the one we
+ * have at hand! (For example, we might have a '<=' operator rather
+ * than the '<' operator that will appear in staop.) For now, assume
+ * that whatever appears in pg_statistic is sorted the same way our
+ * operator sorts.
+ */
+ hist_selec = 0.0;
- if (high <= low)
+ if (get_attstatsslot(statsTuple, typid, typmod,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ &values, &nvalues,
+ NULL, NULL))
+ {
+ if (nvalues > 1)
{
+ double histfrac;
+ bool ltcmp;
+
+ ltcmp = DatumGetBool(FunctionCall2(&opproc,
+ values[0],
+ value));
+ if (isgt)
+ ltcmp = !ltcmp;
+ if (!ltcmp)
+ {
+ /* Constant is below lower histogram boundary. */
+ histfrac = 0.0;
+ }
+ else
+ {
+ /*
+ * Scan to find proper location. This could be made faster
+ * by using a binary-search method, but it's probably not
+ * worth the trouble for typical histogram sizes.
+ */
+ for (i = 1; i < nvalues; i++)
+ {
+ ltcmp = DatumGetBool(FunctionCall2(&opproc,
+ values[i],
+ value));
+ if (isgt)
+ ltcmp = !ltcmp;
+ if (!ltcmp)
+ break;
+ }
+ if (i >= nvalues)
+ {
+ /* Constant is above upper histogram boundary. */
+ histfrac = 1.0;
+ }
+ else
+ {
+ double val,
+ high,
+ low;
+ double binfrac;
+ /*
+ * We have values[i-1] < constant < values[i].
+ *
+ * Convert the constant and the two nearest bin boundary
+ * values to a uniform comparison scale, and do a linear
+ * interpolation within this bin.
+ */
+ if (convert_to_scalar(value, contype, &val,
+ values[i-1], values[i], typid,
+ &low, &high))
+ {
+ if (high <= low)
+ {
+ /* cope if bin boundaries appear identical */
+ binfrac = 0.5;
+ }
+ else if (val <= low)
+ binfrac = 0.0;
+ else if (val >= high)
+ binfrac = 1.0;
+ else
+ binfrac = (val - low) / (high - low);
+ }
+ else
+ {
+ /*
+ * Ideally we'd produce an error here, on the grounds
+ * that the given operator shouldn't have scalarltsel
+ * registered as its selectivity func unless we can
+ * deal with its operand types. But currently, all
+ * manner of stuff is invoking scalarltsel, so give a
+ * default estimate until that can be fixed.
+ */
+ binfrac = 0.5;
+ }
+ /*
+ * Now, compute the overall selectivity across the values
+ * represented by the histogram. We have i-1 full bins
+ * and binfrac partial bin below the constant.
+ */
+ histfrac = (double) (i-1) + binfrac;
+ histfrac /= (double) (nvalues - 1);
+ }
+ }
/*
- * If we trusted the stats fully, we could return a small or
- * large selec depending on which side of the single data
- * point the constant is on. But it seems better to assume
- * that the stats are wrong and return a default...
+ * Now histfrac = fraction of histogram entries below the constant.
+ *
+ * Account for "<" vs ">"
*/
- result = DEFAULT_INEQ_SEL;
- }
- else if (val < low || val > high)
- {
-
+ hist_selec = isgt ? (1.0 - histfrac) : histfrac;
/*
- * If given value is outside the statistical range, return a
- * small or large value; but not 0.0/1.0 since there is a
- * chance the stats are out of date.
+ * The histogram boundaries are only approximate to begin
+ * with, and may well be out of date anyway. Therefore,
+ * don't believe extremely small or large selectivity
+ * estimates.
*/
- if (flag & SEL_RIGHT)
- result = (val < low) ? 0.001 : 0.999;
- else
- result = (val < low) ? 0.999 : 0.001;
- }
- else
- {
- denominator = high - low;
- if (flag & SEL_RIGHT)
- numerator = val - low;
- else
- numerator = high - val;
- result = numerator / denominator;
+ if (hist_selec < 0.001)
+ hist_selec = 0.001;
+ else if (hist_selec > 0.999)
+ hist_selec = 0.999;
}
+
+ free_attstatsslot(typid, values, nvalues, NULL, 0);
}
- PG_RETURN_FLOAT8(result);
+
+ /*
+ * Now merge the results from the MCV and histogram calculations,
+ * realizing that the histogram covers only the non-null values that
+ * are not listed in MCV.
+ */
+ selec = 1.0 - stats->stanullfrac - sumcommon;
+
+ if (hist_selec > 0.0)
+ selec *= hist_selec;
+ else
+ {
+ /*
+ * If no histogram but there are values not accounted for by MCV,
+ * arbitrarily assume half of them will match.
+ */
+ selec *= 0.5;
+ }
+
+ selec += mcv_selec;
+
+ ReleaseSysCache(statsTuple);
+
+ /* result should be in range, but make sure... */
+ if (selec < 0.0)
+ selec = 0.0;
+ else if (selec > 1.0)
+ selec = 1.0;
+
+ PG_RETURN_FLOAT8((float8) selec);
}
/*
@@ -428,34 +602,25 @@ scalargtsel(PG_FUNCTION_ARGS)
Datum value = PG_GETARG_DATUM(3);
int32 flag = PG_GETARG_INT32(4);
Oid ltopid;
- float8 result;
/*
- * Compute selectivity of "<", then invert --- but only if we were
- * able to produce a non-default estimate. Note that we get the
- * negator which strictly speaking means we are looking at "<=" for
- * ">" or "<" for ">=". We assume this won't matter.
+ * Commute so that we have a "<" or "<=" operator, then apply
+ * scalarltsel.
*/
- ltopid = get_negator(opid);
- if (ltopid)
- {
- result = DatumGetFloat8(DirectFunctionCall5(scalarltsel,
- ObjectIdGetDatum(ltopid),
- ObjectIdGetDatum(relid),
- Int16GetDatum(attno),
- value,
- Int32GetDatum(flag)));
- }
- else
+ ltopid = get_commutator(opid);
+ if (!ltopid)
{
/* Use default selectivity (should we raise an error instead?) */
- result = DEFAULT_INEQ_SEL;
+ PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
}
- if (result != DEFAULT_INEQ_SEL)
- result = 1.0 - result;
-
- PG_RETURN_FLOAT8(result);
+ flag ^= SEL_RIGHT;
+ return DirectFunctionCall5(scalarltsel,
+ ObjectIdGetDatum(ltopid),
+ ObjectIdGetDatum(relid),
+ Int16GetDatum(attno),
+ value,
+ Int32GetDatum(flag));
}
/*
@@ -476,7 +641,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
result = DEFAULT_MATCH_SEL;
else
{
- HeapTuple oprtuple;
+ HeapTuple oprTuple;
Oid ltype,
rtype;
char *patt;
@@ -488,14 +653,14 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
* Get left and right datatypes of the operator so we know what
* type the attribute is.
*/
- oprtuple = SearchSysCache(OPEROID,
+ oprTuple = SearchSysCache(OPEROID,
ObjectIdGetDatum(opid),
0, 0, 0);
- if (!HeapTupleIsValid(oprtuple))
+ if (!HeapTupleIsValid(oprTuple))
elog(ERROR, "patternsel: no tuple for operator %u", opid);
- ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
- rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
- ReleaseSysCache(oprtuple);
+ ltype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprleft;
+ rtype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright;
+ ReleaseSysCache(oprTuple);
/* the right-hand const is type text for all supported operators */
Assert(rtype == TEXTOID);
@@ -659,42 +824,88 @@ eqjoinsel(PG_FUNCTION_ARGS)
AttrNumber attno1 = PG_GETARG_INT16(2);
Oid relid2 = PG_GETARG_OID(3);
AttrNumber attno2 = PG_GETARG_INT16(4);
- float8 result;
- float8 num1,
- num2,
- min;
bool unknown1 = NONVALUE(relid1) || NONVALUE(attno1);
bool unknown2 = NONVALUE(relid2) || NONVALUE(attno2);
+ double selec;
if (unknown1 && unknown2)
- result = DEFAULT_EQ_SEL;
+ selec = DEFAULT_EQ_SEL;
else
{
- num1 = unknown1 ? 1.0 : get_attdispersion(relid1, attno1, 0.01);
- num2 = unknown2 ? 1.0 : get_attdispersion(relid2, attno2, 0.01);
+ Oid typid1;
+ Oid typid2;
+ int32 typmod1;
+ int32 typmod2;
+ HeapTuple statsTuple1 = NULL;
+ HeapTuple statsTuple2 = NULL;
+ Form_pg_statistic stats1 = NULL;
+ Form_pg_statistic stats2 = NULL;
+ double nd1,
+ nd2;
+
+ if (unknown1)
+ {
+ nd1 = 100.0;
+ }
+ else
+ {
+ /* get info about the attribute */
+ getattproperties(relid1, attno1, &typid1, &typmod1);
+
+ /* get stats for the attribute, if available */
+ statsTuple1 = SearchSysCache(STATRELATT,
+ ObjectIdGetDatum(relid1),
+ Int16GetDatum(attno1),
+ 0, 0);
+ if (HeapTupleIsValid(statsTuple1))
+ stats1 = (Form_pg_statistic) GETSTRUCT(statsTuple1);
+
+ nd1 = get_att_numdistinct(relid1, attno1, typid1, stats1);
+ }
+
+ if (unknown2)
+ {
+ nd2 = 100.0;
+ }
+ else
+ {
+ /* get info about the attribute */
+ getattproperties(relid2, attno2, &typid2, &typmod2);
+
+ /* get stats for the attribute, if available */
+ statsTuple2 = SearchSysCache(STATRELATT,
+ ObjectIdGetDatum(relid2),
+ Int16GetDatum(attno2),
+ 0, 0);
+ if (HeapTupleIsValid(statsTuple2))
+ stats2 = (Form_pg_statistic) GETSTRUCT(statsTuple2);
+
+ nd2 = get_att_numdistinct(relid2, attno2, typid2, stats2);
+ }
/*
- * The join selectivity cannot be more than num2, since each tuple
- * in table 1 could match no more than num2 fraction of tuples in
- * table 2 (and that's only if the table-1 tuple matches the most
- * common value in table 2, so probably it's less). By the same
- * reasoning it is not more than num1. The min is therefore an
- * upper bound.
+ * Estimate the join selectivity as 1 / sqrt(nd1*nd2)
+ * (can we produce any theory for this)?
*
- * If we know the dispersion of only one side, use it; the reasoning
- * above still works.
+ * XXX possibility to do better: if both attributes have histograms
+ * then we could determine the exact join selectivity between the
+ * MCV sets, and only have to assume the join behavior of the non-MCV
+ * values. This could be a big win when the MCVs cover a large part
+ * of the population.
*
- * XXX can we make a better estimate here? Using the nullfrac
- * statistic might be helpful, for example. Assuming the operator
- * is strict (does not succeed for null inputs) then the
- * selectivity couldn't be more than (1-nullfrac1)*(1-nullfrac2),
- * which might be usefully small if there are many nulls. How
- * about applying the operator to the most common values?
+ * XXX what about nulls?
*/
- min = (num1 < num2) ? num1 : num2;
- result = min;
+ selec = 1.0 / sqrt(nd1 * nd2);
+ if (selec > 1.0)
+ selec = 1.0;
+
+ if (HeapTupleIsValid(statsTuple1))
+ ReleaseSysCache(statsTuple1);
+ if (HeapTupleIsValid(statsTuple2))
+ ReleaseSysCache(statsTuple2);
+
}
- PG_RETURN_FLOAT8(result);
+ PG_RETURN_FLOAT8((float8) selec);
}
/*
@@ -829,7 +1040,8 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
* Returns "true" if successful.
*
* All numeric datatypes are simply converted to their equivalent
- * "double" values.
+ * "double" values. XXX what about NUMERIC values that are outside
+ * the range of "double"?
*
* String datatypes are converted by convert_string_to_scalar(),
* which is explained below. The reason why this routine deals with
@@ -917,7 +1129,7 @@ convert_numeric_to_scalar(Datum value, Oid typid)
{
switch (typid)
{
- case BOOLOID:
+ case BOOLOID:
return (double) DatumGetBool(value);
case INT2OID:
return (double) DatumGetInt16(value);
@@ -963,6 +1175,8 @@ convert_numeric_to_scalar(Datum value, Oid typid)
* three strings before computing the scaled values. This allows us to
* "zoom in" when we encounter a narrow data range. An example is a phone
* number database where all the values begin with the same area code.
+ * (Actually, the bounds will be adjacent histogram-bin-boundary values,
+ * so this is more likely to happen than you might think.)
*/
static void
convert_string_to_scalar(unsigned char *value,
@@ -1208,11 +1422,11 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
/*
* getattproperties
* Retrieve pg_attribute properties for an attribute,
- * including type OID, type len, type byval flag, typmod.
+ * including type OID and typmod.
*/
static void
getattproperties(Oid relid, AttrNumber attnum,
- Oid *typid, int *typlen, bool *typbyval, int32 *typmod)
+ Oid *typid, int32 *typmod)
{
HeapTuple atp;
Form_pg_attribute att_tup;
@@ -1227,164 +1441,87 @@ getattproperties(Oid relid, AttrNumber attnum,
att_tup = (Form_pg_attribute) GETSTRUCT(atp);
*typid = att_tup->atttypid;
- *typlen = att_tup->attlen;
- *typbyval = att_tup->attbyval;
*typmod = att_tup->atttypmod;
ReleaseSysCache(atp);
}
/*
- * getattstatistics
- * Retrieve the pg_statistic data for an attribute.
- * Returns 'false' if no stats are available.
+ * get_att_numdistinct
*
- * Inputs:
- * 'relid' and 'attnum' are the relation and attribute number.
- * 'typid' and 'typmod' are the type and typmod of the column,
- * which the caller must already have looked up.
+ * Estimate the number of distinct values of an attribute.
*
- * Outputs:
- * The available stats are nullfrac, commonfrac, commonval, loval, hival.
- * The caller need not retrieve all five --- pass NULL pointers for the
- * unwanted values.
+ * relid, attnum: identify the attribute to examine.
+ * typid: type of attribute.
+ * stats: pg_statistic tuple for attribute, or NULL if not available.
*
- * commonval, loval, hival are returned as Datums holding the internal
- * representation of the values. (Note that these should be pfree'd
- * after use if the data type is not by-value.)
+ * XXX possible future improvement: look to see if there is a unique
+ * index on the attribute. If so, we can estimate ndistinct = ntuples.
+ * This should probably override any info from pg_statistic.
*/
-static bool
-getattstatistics(Oid relid,
- AttrNumber attnum,
- Oid typid,
- int32 typmod,
- double *nullfrac,
- double *commonfrac,
- Datum *commonval,
- Datum *loval,
- Datum *hival)
+static double
+get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid,
+ Form_pg_statistic stats)
{
- HeapTuple tuple;
- HeapTuple typeTuple;
- FmgrInfo inputproc;
- Oid typelem;
- bool isnull;
+ HeapTuple reltup;
+ double ntuples;
/*
- * We assume that there will only be one entry in pg_statistic for the
- * given rel/att, so we search WITHOUT considering the staop column.
- * Someday, VACUUM might store more than one entry per rel/att,
- * corresponding to more than one possible sort ordering defined for
- * the column type. However, to make that work we will need to figure
- * out which staop to search for --- it's not necessarily the one we
- * have at hand! (For example, we might have a '>' operator rather
- * than the '<' operator that will appear in staop.)
+ * Special-case boolean columns: presumably, two distinct values.
+ *
+ * Are there any other cases we should wire in special estimates for?
*/
- tuple = SearchSysCache(STATRELID,
- ObjectIdGetDatum(relid),
- Int16GetDatum((int16) attnum),
- 0, 0);
- if (!HeapTupleIsValid(tuple))
- {
- /* no such stats entry */
- return false;
- }
+ if (typid == BOOLOID)
+ return 2.0;
- if (nullfrac)
- *nullfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stanullfrac;
- if (commonfrac)
- *commonfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stacommonfrac;
-
- /* Get the type input proc for the column datatype */
- typeTuple = SearchSysCache(TYPEOID,
- ObjectIdGetDatum(typid),
- 0, 0, 0);
- if (!HeapTupleIsValid(typeTuple))
- elog(ERROR, "getattstatistics: Cache lookup failed for type %u",
- typid);
- fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
- typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem;
- ReleaseSysCache(typeTuple);
+ /*
+ * If VACUUM ANALYZE determined a fixed estimate, use it.
+ */
+ if (stats && stats->stadistinct > 0.0)
+ return stats->stadistinct;
/*
- * Values are variable-length fields, so cannot access as struct
- * fields. Must do it the hard way with SysCacheGetAttr.
+ * Otherwise we need to get the relation size.
*/
- if (commonval)
- {
- Datum val = SysCacheGetAttr(STATRELID, tuple,
- Anum_pg_statistic_stacommonval,
- &isnull);
+ reltup = SearchSysCache(RELOID,
+ ObjectIdGetDatum(relid),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(reltup))
+ elog(ERROR, "get_att_numdistinct: no relation tuple %u", relid);
- if (isnull)
- {
- elog(DEBUG, "getattstatistics: stacommonval is null");
- *commonval = PointerGetDatum(NULL);
- }
- else
- {
- char *strval = DatumGetCString(DirectFunctionCall1(textout,
- val));
-
- *commonval = FunctionCall3(&inputproc,
- CStringGetDatum(strval),
- ObjectIdGetDatum(typelem),
- Int32GetDatum(typmod));
- pfree(strval);
- }
- }
+ ntuples = ((Form_pg_class) GETSTRUCT(reltup))->reltuples;
- if (loval)
- {
- Datum val = SysCacheGetAttr(STATRELID, tuple,
- Anum_pg_statistic_staloval,
- &isnull);
+ ReleaseSysCache(reltup);
- if (isnull)
- {
- elog(DEBUG, "getattstatistics: staloval is null");
- *loval = PointerGetDatum(NULL);
- }
- else
- {
- char *strval = DatumGetCString(DirectFunctionCall1(textout,
- val));
-
- *loval = FunctionCall3(&inputproc,
- CStringGetDatum(strval),
- ObjectIdGetDatum(typelem),
- Int32GetDatum(typmod));
- pfree(strval);
- }
- }
+ if (ntuples <= 0.0)
+ return 100.0; /* no data available; return a default */
- if (hival)
- {
- Datum val = SysCacheGetAttr(STATRELID, tuple,
- Anum_pg_statistic_stahival,
- &isnull);
+ /*
+ * If VACUUM ANALYZE determined a scaled estimate, use it.
+ */
+ if (stats && stats->stadistinct < 0.0)
+ return - stats->stadistinct * ntuples;
- if (isnull)
- {
- elog(DEBUG, "getattstatistics: stahival is null");
- *hival = PointerGetDatum(NULL);
- }
- else
- {
- char *strval = DatumGetCString(DirectFunctionCall1(textout,
- val));
-
- *hival = FunctionCall3(&inputproc,
- CStringGetDatum(strval),
- ObjectIdGetDatum(typelem),
- Int32GetDatum(typmod));
- pfree(strval);
- }
+ /*
+ * VACUUM ANALYZE does not compute stats for system attributes,
+ * but some of them can reasonably be assumed unique anyway.
+ */
+ switch (attnum)
+ {
+ case ObjectIdAttributeNumber:
+ case SelfItemPointerAttributeNumber:
+ return ntuples;
+ case TableOidAttributeNumber:
+ return 1.0;
}
- ReleaseSysCache(tuple);
+ /*
+ * Estimate ndistinct = ntuples if the table is small, else 100.
+ */
+ if (ntuples < 100.0)
+ return ntuples;
- return true;
+ return 100.0;
}
/*-------------------------------------------------------------------------
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c
index 82d55866215..3995de5d7a1 100644
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.52 2001/03/23 04:49:55 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.53 2001/05/07 00:43:24 tgl Exp $
*
* NOTES
* Eventually, the index information should go through here, too.
@@ -18,7 +18,10 @@
#include "access/tupmacs.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_proc.h"
+#include "catalog/pg_statistic.h"
#include "catalog/pg_type.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
@@ -182,106 +185,6 @@ get_atttypmod(Oid relid, AttrNumber attnum)
return -1;
}
-/*
- * get_attdispersion
- *
- * Retrieve the dispersion statistic for an attribute,
- * or produce an estimate if no info is available.
- *
- * min_estimate is the minimum estimate to return if insufficient data
- * is available to produce a reliable value. This value may vary
- * depending on context. (For example, when deciding whether it is
- * safe to use a hashjoin, we want to be more conservative than when
- * estimating the number of tuples produced by an equijoin.)
- */
-double
-get_attdispersion(Oid relid, AttrNumber attnum, double min_estimate)
-{
- HeapTuple atp;
- Form_pg_attribute att_tup;
- double dispersion;
- Oid atttypid;
- int32 ntuples;
-
- atp = SearchSysCache(ATTNUM,
- ObjectIdGetDatum(relid),
- Int16GetDatum(attnum),
- 0, 0);
- if (!HeapTupleIsValid(atp))
- {
- /* this should not happen */
- elog(ERROR, "get_attdispersion: no attribute tuple %u %d",
- relid, attnum);
- return min_estimate;
- }
-
- att_tup = (Form_pg_attribute) GETSTRUCT(atp);
-
- dispersion = att_tup->attdispersion;
- atttypid = att_tup->atttypid;
-
- ReleaseSysCache(atp);
-
- if (dispersion > 0.0)
- return dispersion; /* we have a specific estimate from VACUUM */
-
- /*
- * Special-case boolean columns: the dispersion of a boolean is highly
- * unlikely to be anywhere near 1/numtuples, instead it's probably
- * more like 0.5.
- *
- * Are there any other cases we should wire in special estimates for?
- */
- if (atttypid == BOOLOID)
- return 0.5;
-
- /*
- * Dispersion is either 0 (no data available) or -1 (dispersion is
- * 1/numtuples). Either way, we need the relation size.
- */
-
- atp = SearchSysCache(RELOID,
- ObjectIdGetDatum(relid),
- 0, 0, 0);
- if (!HeapTupleIsValid(atp))
- {
- /* this should not happen */
- elog(ERROR, "get_attdispersion: no relation tuple %u", relid);
- return min_estimate;
- }
-
- ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples;
-
- ReleaseSysCache(atp);
-
- if (ntuples == 0)
- return min_estimate; /* no data available */
-
- if (dispersion < 0.0) /* VACUUM thinks there are no duplicates */
- return 1.0 / (double) ntuples;
-
- /*
- * VACUUM ANALYZE does not compute dispersion for system attributes,
- * but some of them can reasonably be assumed unique anyway.
- */
- if (attnum == ObjectIdAttributeNumber ||
- attnum == SelfItemPointerAttributeNumber)
- return 1.0 / (double) ntuples;
- if (attnum == TableOidAttributeNumber)
- return 1.0;
-
- /*
- * VACUUM ANALYZE has not been run for this table. Produce an estimate
- * of 1/numtuples. This may produce unreasonably small estimates for
- * large tables, so limit the estimate to no less than min_estimate.
- */
- dispersion = 1.0 / (double) ntuples;
- if (dispersion < min_estimate)
- dispersion = min_estimate;
-
- return dispersion;
-}
-
/* ---------- INDEX CACHE ---------- */
/* watch this space...
@@ -876,3 +779,157 @@ get_typtype(Oid typid)
}
#endif
+
+/* ---------- STATISTICS CACHE ---------- */
+
+/*
+ * get_attstatsslot
+ *
+ * Extract the contents of a "slot" of a pg_statistic tuple.
+ * Returns TRUE if requested slot type was found, else FALSE.
+ *
+ * Unlike other routines in this file, this takes a pointer to an
+ * already-looked-up tuple in the pg_statistic cache. We do this since
+ * most callers will want to extract more than one value from the cache
+ * entry, and we don't want to repeat the cache lookup unnecessarily.
+ *
+ * statstuple: pg_statistics tuple to be examined.
+ * atttype: type OID of attribute.
+ * atttypmod: typmod of attribute.
+ * reqkind: STAKIND code for desired statistics slot kind.
+ * reqop: STAOP value wanted, or InvalidOid if don't care.
+ * values, nvalues: if not NULL, the slot's stavalues are extracted.
+ * numbers, nnumbers: if not NULL, the slot's stanumbers are extracted.
+ *
+ * If assigned, values and numbers are set to point to palloc'd arrays.
+ * If the attribute type is pass-by-reference, the values referenced by
+ * the values array are themselves palloc'd. The palloc'd stuff can be
+ * freed by calling free_attstatsslot.
+ */
+bool
+get_attstatsslot(HeapTuple statstuple,
+ Oid atttype, int32 atttypmod,
+ int reqkind, Oid reqop,
+ Datum **values, int *nvalues,
+ float4 **numbers, int *nnumbers)
+{
+ Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(statstuple);
+ int i,
+ j;
+ Datum val;
+ bool isnull;
+ ArrayType *statarray;
+ int narrayelem;
+ HeapTuple typeTuple;
+ FmgrInfo inputproc;
+ Oid typelem;
+
+ for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+ {
+ if ((&stats->stakind1)[i] == reqkind &&
+ (reqop == InvalidOid || (&stats->staop1)[i] == reqop))
+ break;
+ }
+ if (i >= STATISTIC_NUM_SLOTS)
+ return false; /* not there */
+
+ if (values)
+ {
+ val = SysCacheGetAttr(STATRELATT, statstuple,
+ Anum_pg_statistic_stavalues1 + i,
+ &isnull);
+ if (isnull)
+ elog(ERROR, "get_attstatsslot: stavalues is null");
+ statarray = DatumGetArrayTypeP(val);
+ /*
+ * Do initial examination of the array. This produces a list
+ * of text Datums --- ie, pointers into the text array value.
+ */
+ deconstruct_array(statarray, false, -1, 'i', values, nvalues);
+ narrayelem = *nvalues;
+ /*
+ * We now need to replace each text Datum by its internal equivalent.
+ *
+ * Get the type input proc and typelem for the column datatype.
+ */
+ typeTuple = SearchSysCache(TYPEOID,
+ ObjectIdGetDatum(atttype),
+ 0, 0, 0);
+ if (!HeapTupleIsValid(typeTuple))
+ elog(ERROR, "get_attstatsslot: Cache lookup failed for type %u",
+ atttype);
+ fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
+ typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem;
+ ReleaseSysCache(typeTuple);
+ /*
+ * Do the conversions. The palloc'd array of Datums is reused
+ * in place.
+ */
+ for (j = 0; j < narrayelem; j++)
+ {
+ char *strval;
+
+ strval = DatumGetCString(DirectFunctionCall1(textout,
+ (*values)[j]));
+ (*values)[j] = FunctionCall3(&inputproc,
+ CStringGetDatum(strval),
+ ObjectIdGetDatum(typelem),
+ Int32GetDatum(atttypmod));
+ pfree(strval);
+ }
+ /*
+ * Free statarray if it's a detoasted copy.
+ */
+ if ((Pointer) statarray != DatumGetPointer(val))
+ pfree(statarray);
+ }
+
+ if (numbers)
+ {
+ val = SysCacheGetAttr(STATRELATT, statstuple,
+ Anum_pg_statistic_stanumbers1 + i,
+ &isnull);
+ if (isnull)
+ elog(ERROR, "get_attstatsslot: stanumbers is null");
+ statarray = DatumGetArrayTypeP(val);
+ /*
+ * We expect the array to be a 1-D float4 array; verify that.
+ * We don't need to use deconstruct_array() since the array
+ * data is just going to look like a C array of float4 values.
+ */
+ narrayelem = ARR_DIMS(statarray)[0];
+ if (ARR_NDIM(statarray) != 1 || narrayelem <= 0 ||
+ ARR_SIZE(statarray) != (ARR_OVERHEAD(1) + narrayelem * sizeof(float4)))
+ elog(ERROR, "get_attstatsslot: stanumbers is bogus");
+ *numbers = (float4 *) palloc(narrayelem * sizeof(float4));
+ memcpy(*numbers, ARR_DATA_PTR(statarray), narrayelem * sizeof(float4));
+ *nnumbers = narrayelem;
+ /*
+ * Free statarray if it's a detoasted copy.
+ */
+ if ((Pointer) statarray != DatumGetPointer(val))
+ pfree(statarray);
+ }
+
+ return true;
+}
+
+void
+free_attstatsslot(Oid atttype,
+ Datum *values, int nvalues,
+ float4 *numbers, int nnumbers)
+{
+ if (values)
+ {
+ if (! get_typbyval(atttype))
+ {
+ int i;
+
+ for (i = 0; i < nvalues; i++)
+ pfree(DatumGetPointer(values[i]));
+ }
+ pfree(values);
+ }
+ if (numbers)
+ pfree(numbers);
+}
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index 75ef3179202..4e35b3fb35b 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.60 2001/03/22 03:59:57 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.61 2001/05/07 00:43:24 tgl Exp $
*
* NOTES
* These routines allow the parser/planner/executor to perform
@@ -313,7 +313,7 @@ static struct cachedesc cacheinfo[] = {
0,
0
}},
- {StatisticRelationName, /* STATRELID */
+ {StatisticRelationName, /* STATRELATT */
StatisticRelidAttnumIndex,
2,
{
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index d27bfb29668..5a77c47c200 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -78,7 +78,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.15 2001/03/23 04:49:55 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.16 2001/05/07 00:43:24 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -87,7 +87,11 @@
#include "access/heapam.h"
#include "access/nbtree.h"
+#include "catalog/catname.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
#include "miscadmin.h"
+#include "utils/fmgroids.h"
#include "utils/logtape.h"
#include "utils/lsyscache.h"
#include "utils/tuplesort.h"
@@ -263,6 +267,7 @@ struct Tuplesortstate
TupleDesc tupDesc;
int nKeys;
ScanKey scanKeys;
+ SortFunctionKind *sortFnKinds;
/*
* These variables are specific to the IndexTuple case; they are set
@@ -279,6 +284,7 @@ struct Tuplesortstate
Oid datumType;
Oid sortOperator;
FmgrInfo sortOpFn; /* cached lookup data for sortOperator */
+ SortFunctionKind sortFnKind;
/* we need typelen and byval in order to know how to copy the Datums. */
int datumTypeLen;
bool datumTypeByVal;
@@ -458,14 +464,14 @@ tuplesort_begin_common(bool randomAccess)
Tuplesortstate *
tuplesort_begin_heap(TupleDesc tupDesc,
- int nkeys, ScanKey keys,
+ int nkeys,
+ Oid *sortOperators, AttrNumber *attNums,
bool randomAccess)
{
Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+ int i;
- AssertArg(nkeys >= 1);
- AssertArg(keys[0].sk_attno != 0);
- AssertArg(keys[0].sk_procedure != 0);
+ AssertArg(nkeys > 0);
state->comparetup = comparetup_heap;
state->copytup = copytup_heap;
@@ -475,7 +481,29 @@ tuplesort_begin_heap(TupleDesc tupDesc,
state->tupDesc = tupDesc;
state->nKeys = nkeys;
- state->scanKeys = keys;
+ state->scanKeys = (ScanKey) palloc(nkeys * sizeof(ScanKeyData));
+ MemSet(state->scanKeys, 0, nkeys * sizeof(ScanKeyData));
+ state->sortFnKinds = (SortFunctionKind *)
+ palloc(nkeys * sizeof(SortFunctionKind));
+ MemSet(state->sortFnKinds, 0, nkeys * sizeof(SortFunctionKind));
+
+ for (i = 0; i < nkeys; i++)
+ {
+ RegProcedure sortFunction;
+
+ AssertArg(sortOperators[i] != 0);
+ AssertArg(attNums[i] != 0);
+
+ /* select a function that implements the sort operator */
+ SelectSortFunction(sortOperators[i], &sortFunction,
+ &state->sortFnKinds[i]);
+
+ ScanKeyEntryInitialize(&state->scanKeys[i],
+ 0x0,
+ attNums[i],
+ sortFunction,
+ (Datum) 0);
+ }
return state;
}
@@ -507,6 +535,7 @@ tuplesort_begin_datum(Oid datumType,
bool randomAccess)
{
Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+ RegProcedure sortFunction;
int16 typlen;
bool typbyval;
@@ -518,8 +547,12 @@ tuplesort_begin_datum(Oid datumType,
state->datumType = datumType;
state->sortOperator = sortOperator;
- /* lookup the function that implements the sort operator */
- fmgr_info(get_opcode(sortOperator), &state->sortOpFn);
+
+ /* select a function that implements the sort operator */
+ SelectSortFunction(sortOperator, &sortFunction, &state->sortFnKind);
+ /* and look up the function */
+ fmgr_info(sortFunction, &state->sortOpFn);
+
/* lookup necessary attributes of the datum type */
get_typlenbyval(datumType, &typlen, &typbyval);
state->datumTypeLen = typlen;
@@ -548,6 +581,13 @@ tuplesort_end(Tuplesortstate *state)
}
if (state->memtupindex)
pfree(state->memtupindex);
+
+ /* this stuff might better belong in a variant-specific shutdown routine */
+ if (state->scanKeys)
+ pfree(state->scanKeys);
+ if (state->sortFnKinds)
+ pfree(state->sortFnKinds);
+
pfree(state);
}
@@ -1692,6 +1732,7 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b)
for (nkey = 0; nkey < state->nKeys; nkey++)
{
ScanKey scanKey = state->scanKeys + nkey;
+ SortFunctionKind fnKind = state->sortFnKinds[nkey];
AttrNumber attno = scanKey->sk_attno;
Datum lattr,
rattr;
@@ -1708,23 +1749,36 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b)
}
else if (isnull2)
return -1;
- else if (scanKey->sk_flags & SK_COMMUTE)
- {
- if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
- rattr, lattr)))
- return -1; /* a < b after commute */
- if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
- lattr, rattr)))
- return 1; /* a > b after commute */
- }
else
{
- if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
- lattr, rattr)))
- return -1; /* a < b */
- if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
- rattr, lattr)))
- return 1; /* a > b */
+ int32 compare;
+
+ if (fnKind == SORTFUNC_LT)
+ {
+ if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
+ lattr, rattr)))
+ compare = -1; /* a < b */
+ else if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
+ rattr, lattr)))
+ compare = 1; /* a > b */
+ else
+ compare = 0;
+ }
+ else
+ {
+ /* sort function is CMP or REVCMP */
+ compare = DatumGetInt32(FunctionCall2(&scanKey->sk_func,
+ lattr, rattr));
+ if (fnKind == SORTFUNC_REVCMP)
+ compare = -compare;
+ }
+
+ if (compare != 0)
+ {
+ if (scanKey->sk_flags & SK_COMMUTE)
+ compare = -compare;
+ return compare;
+ }
}
}
@@ -1852,8 +1906,10 @@ comparetup_index(Tuplesortstate *state, const void *a, const void *b)
}
else
{
+ /* the comparison function is always of CMP type */
compare = DatumGetInt32(FunctionCall2(&entry->sk_func,
- attrDatum1, attrDatum2));
+ attrDatum1,
+ attrDatum2));
}
if (compare != 0)
@@ -1954,7 +2010,7 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
}
else if (rtup->isNull)
return -1;
- else
+ else if (state->sortFnKind == SORTFUNC_LT)
{
if (DatumGetBool(FunctionCall2(&state->sortOpFn,
ltup->val, rtup->val)))
@@ -1964,6 +2020,17 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
return 1; /* a > b */
return 0;
}
+ else
+ {
+ /* sort function is CMP or REVCMP */
+ int32 compare;
+
+ compare = DatumGetInt32(FunctionCall2(&state->sortOpFn,
+ ltup->val, rtup->val));
+ if (state->sortFnKind == SORTFUNC_REVCMP)
+ compare = -compare;
+ return compare;
+ }
}
static void *
@@ -2032,3 +2099,119 @@ tuplesize_datum(Tuplesortstate *state, void *tup)
return (unsigned int) tuplelen;
}
}
+
+
+/*
+ * This routine selects an appropriate sorting function to implement
+ * a sort operator as efficiently as possible. The straightforward
+ * method is to use the operator's implementation proc --- ie, "<"
+ * comparison. However, that way often requires two calls of the function
+ * per comparison. If we can find a btree three-way comparator function
+ * associated with the operator, we can use it to do the comparisons
+ * more efficiently. We also support the possibility that the operator
+ * is ">" (descending sort), in which case we have to reverse the output
+ * of the btree comparator.
+ *
+ * Possibly this should live somewhere else (backend/catalog/, maybe?).
+ */
+void
+SelectSortFunction(Oid sortOperator,
+ RegProcedure *sortFunction,
+ SortFunctionKind *kind)
+{
+ Relation relation;
+ HeapScanDesc scan;
+ ScanKeyData skey[3];
+ HeapTuple tuple;
+ Oid opclass = InvalidOid;
+
+ /*
+ * Scan pg_amop to see if the target operator is registered as the
+ * "<" or ">" operator of any btree opclass. It's possible that it
+ * might be registered both ways (eg, if someone were to build a
+ * "reverse sort" opclass for some reason); prefer the "<" case if so.
+ * If the operator is registered the same way in multiple opclasses,
+ * assume we can use the associated comparator function from any one.
+ */
+ relation = heap_openr(AccessMethodOperatorRelationName,
+ AccessShareLock);
+
+ ScanKeyEntryInitialize(&skey[0], 0,
+ Anum_pg_amop_amopid,
+ F_OIDEQ,
+ ObjectIdGetDatum(BTREE_AM_OID));
+
+ ScanKeyEntryInitialize(&skey[1], 0,
+ Anum_pg_amop_amopopr,
+ F_OIDEQ,
+ ObjectIdGetDatum(sortOperator));
+
+ scan = heap_beginscan(relation, false, SnapshotNow, 2, skey);
+
+ while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+ {
+ Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple);
+
+ if (aform->amopstrategy == BTLessStrategyNumber)
+ {
+ opclass = aform->amopclaid;
+ *kind = SORTFUNC_CMP;
+ break; /* done looking */
+ }
+ else if (aform->amopstrategy == BTGreaterStrategyNumber)
+ {
+ opclass = aform->amopclaid;
+ *kind = SORTFUNC_REVCMP;
+ /* keep scanning in hopes of finding a BTLess entry */
+ }
+ }
+
+ heap_endscan(scan);
+ heap_close(relation, AccessShareLock);
+
+ if (OidIsValid(opclass))
+ {
+ /* Found a suitable opclass, get its comparator support function */
+ relation = heap_openr(AccessMethodProcedureRelationName,
+ AccessShareLock);
+
+ ScanKeyEntryInitialize(&skey[0], 0,
+ Anum_pg_amproc_amid,
+ F_OIDEQ,
+ ObjectIdGetDatum(BTREE_AM_OID));
+
+ ScanKeyEntryInitialize(&skey[1], 0,
+ Anum_pg_amproc_amopclaid,
+ F_OIDEQ,
+ ObjectIdGetDatum(opclass));
+
+ ScanKeyEntryInitialize(&skey[2], 0,
+ Anum_pg_amproc_amprocnum,
+ F_INT2EQ,
+ Int16GetDatum(BTORDER_PROC));
+
+ scan = heap_beginscan(relation, false, SnapshotNow, 3, skey);
+
+ *sortFunction = InvalidOid;
+
+ if (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+ {
+ Form_pg_amproc aform = (Form_pg_amproc) GETSTRUCT(tuple);
+ *sortFunction = aform->amproc;
+ }
+
+ heap_endscan(scan);
+ heap_close(relation, AccessShareLock);
+
+ if (RegProcedureIsValid(*sortFunction))
+ return;
+ }
+
+ /* Can't find a comparator, so use the operator as-is */
+
+ *kind = SORTFUNC_LT;
+ *sortFunction = get_opcode(sortOperator);
+ if (!RegProcedureIsValid(*sortFunction))
+ elog(ERROR, "SelectSortFunction: operator %u has no implementation",
+ sortOperator);
+}
diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h
index 759ab3d39e2..6e38529204d 100644
--- a/src/include/access/tuptoaster.h
+++ b/src/include/access/tuptoaster.h
@@ -6,15 +6,13 @@
*
* Copyright (c) 2000, PostgreSQL Development Team
*
- * $Id: tuptoaster.h,v 1.10 2001/03/22 04:00:32 momjian Exp $
+ * $Id: tuptoaster.h,v 1.11 2001/05/07 00:43:24 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef TUPTOASTER_H
#define TUPTOASTER_H
-#ifdef TUPLE_TOASTER_ACTIVE
-
#include "access/heapam.h"
#include "access/htup.h"
#include "access/tupmacs.h"
@@ -109,7 +107,13 @@ extern varattrib *heap_tuple_untoast_attr(varattrib *attr);
*/
extern Datum toast_compress_datum(Datum value);
-#endif /* TUPLE_TOASTER_ACTIVE */
+/* ----------
+ * toast_raw_datum_size -
+ *
+ * Return the raw (detoasted) size of a varlena datum
+ * ----------
+ */
+extern Size toast_raw_datum_size(Datum value);
#endif /* TUPTOASTER_H */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 963b11c1d38..832f91fb09f 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: catversion.h,v 1.70 2001/03/22 04:00:35 momjian Exp $
+ * $Id: catversion.h,v 1.71 2001/05/07 00:43:24 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 200101061
+#define CATALOG_VERSION_NO 200105051
#endif
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h
index a7248f6c6dc..7ab04b05fb2 100644
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -7,13 +7,14 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: heap.h,v 1.34 2001/03/22 04:00:35 momjian Exp $
+ * $Id: heap.h,v 1.35 2001/05/07 00:43:24 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef HEAP_H
#define HEAP_H
+#include "catalog/pg_attribute.h"
#include "utils/rel.h"
typedef struct RawColumnDefault
@@ -44,4 +45,6 @@ extern void AddRelationRawConstraints(Relation rel,
List *rawColDefaults,
List *rawConstraints);
+extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno);
+
#endif /* HEAP_H */
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h
index 1dac0bb1c31..07aaad61c79 100644
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: index.h,v 1.33 2001/03/22 04:00:35 momjian Exp $
+ * $Id: index.h,v 1.34 2001/05/07 00:43:24 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -46,7 +46,7 @@ extern void FormIndexDatum(IndexInfo *indexInfo,
Datum *datum,
char *nullv);
-extern void UpdateStats(Oid relid, long reltuples);
+extern void UpdateStats(Oid relid, double reltuples);
extern bool IndexesAreActive(Oid relid, bool comfirmCommitted);
extern void setRelhasindex(Oid relid, bool hasindex);
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h
index 41a580a3777..cc155cf1bbb 100644
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: indexing.h,v 1.48 2001/03/22 04:00:36 momjian Exp $
+ * $Id: indexing.h,v 1.49 2001/05/07 00:43:24 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -171,7 +171,7 @@ DECLARE_UNIQUE_INDEX(pg_rewrite_rulename_index on pg_rewrite using btree(rulenam
xDECLARE_UNIQUE_INDEX(pg_shadow_name_index on pg_shadow using btree(usename name_ops));
xDECLARE_UNIQUE_INDEX(pg_shadow_sysid_index on pg_shadow using btree(usesysid int4_ops));
*/
-DECLARE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops));
+DECLARE_UNIQUE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops));
DECLARE_INDEX(pg_trigger_tgconstrname_index on pg_trigger using btree(tgconstrname name_ops));
DECLARE_INDEX(pg_trigger_tgconstrrelid_index on pg_trigger using btree(tgconstrrelid oid_ops));
DECLARE_INDEX(pg_trigger_tgrelid_index on pg_trigger using btree(tgrelid oid_ops));
diff --git a/src/include/catalog/pg_attribute.h b/src/include/catalog/pg_attribute.h
index 58724e94dc9..6e11aa6d530 100644
--- a/src/include/catalog/pg_attribute.h
+++ b/src/include/catalog/pg_attribute.h
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: pg_attribute.h,v 1.70 2001/03/22 04:00:37 momjian Exp $
+ * $Id: pg_attribute.h,v 1.71 2001/05/07 00:43:24 tgl Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
@@ -36,15 +36,14 @@
* typedef struct FormData_pg_attribute
*
* If you change the following, make sure you change the structs for
- * system attributes in heap.c and index.c also.
+ * system attributes in catalog/heap.c also.
* ----------------
*/
CATALOG(pg_attribute) BOOTSTRAP
{
Oid attrelid; /* OID of relation containing this
* attribute */
- NameData attname;
- Oid atttypid;
+ NameData attname; /* name of attribute */
/*
* atttypid is the OID of the instance in Catalog Class pg_type that
@@ -53,30 +52,20 @@ CATALOG(pg_attribute) BOOTSTRAP
* attalign attributes of this instance, so they had better match or
* Postgres will fail.
*/
-
- float4 attdispersion;
+ Oid atttypid;
/*
- * attdispersion is the dispersion statistic of the column (0.0 to
- * 1.0), or zero if the statistic has not been calculated, or -1.0 if
- * VACUUM found that the column contains no duplicate entries (in
- * which case the dispersion should be taken as 1.0/numberOfRows for
- * the current table size). The -1.0 hack is useful because the
- * number of rows may be updated more often than attdispersion is. We
- * assume that the column will retain its no-duplicate-entry property.
- * (Perhaps this should be driven off the existence of a UNIQUE index
- * for the column, instead of being a statistical guess?)
+ * attstattarget is the target number of statistics datapoints to collect
+ * during VACUUM ANALYZE of this column. A zero here indicates that we
+ * do not wish to collect any stats about this column.
*/
-
- int2 attlen;
+ int4 attstattarget;
/*
* attlen is a copy of the typlen field from pg_type for this
- * attribute. See atttypid above. See struct Form_pg_type for
- * definition.
+ * attribute. See atttypid comments above.
*/
-
- int2 attnum;
+ int2 attlen;
/*
* attnum is the "attribute number" for the attribute: A value that
@@ -91,10 +80,13 @@ CATALOG(pg_attribute) BOOTSTRAP
*
* Note that (attnum - 1) is often used as the index to an array.
*/
+ int2 attnum;
- int4 attnelems; /* number of dimensions, if an array type */
-
- int4 attcacheoff;
+ /*
+ * attndims is the declared number of dimensions, if an array type,
+ * otherwise zero.
+ */
+ int4 attndims;
/*
* fastgetattr() uses attcacheoff to cache byte offsets of attributes
@@ -103,8 +95,7 @@ CATALOG(pg_attribute) BOOTSTRAP
* tuple descriptor, we may then update attcacheoff in the copies.
* This speeds up the attribute walking process.
*/
-
- int4 atttypmod;
+ int4 attcacheoff;
/*
* atttypmod records type-specific data supplied at table creation
@@ -113,16 +104,13 @@ CATALOG(pg_attribute) BOOTSTRAP
* argument. The value will generally be -1 for types that do not need
* typmod.
*/
-
- bool attbyval;
+ int4 atttypmod;
/*
* attbyval is a copy of the typbyval field from pg_type for this
- * attribute. See atttypid above. See struct Form_pg_type for
- * definition.
+ * attribute. See atttypid comments above.
*/
-
- char attstorage;
+ bool attbyval;
/*----------
* attstorage tells for VARLENA attributes, what the heap access
@@ -137,30 +125,31 @@ CATALOG(pg_attribute) BOOTSTRAP
* but only as a last resort ('e' and 'x' fields are moved first).
*----------
*/
+ char attstorage;
+ /* This flag indicates that the attribute is really a set */
bool attisset;
- char attalign;
/*
* attalign is a copy of the typalign field from pg_type for this
- * attribute. See atttypid above. See struct Form_pg_type for
- * definition.
+ * attribute. See atttypid comments above.
*/
-
- bool attnotnull;
+ char attalign;
/* This flag represents the "NOT NULL" constraint */
- bool atthasdef;
+ bool attnotnull;
/* Has DEFAULT value or not */
+ bool atthasdef;
} FormData_pg_attribute;
/*
* someone should figure out how to do this properly. (The problem is
- * the size of the C struct is not the same as the size of the tuple.)
+ * the size of the C struct is not the same as the size of the tuple
+ * because of alignment padding at the end of the struct.)
*/
#define ATTRIBUTE_TUPLE_SIZE \
- (offsetof(FormData_pg_attribute,atthasdef) + sizeof(char))
+ (offsetof(FormData_pg_attribute,atthasdef) + sizeof(bool))
/* ----------------
* Form_pg_attribute corresponds to a pointer to a tuple with
@@ -178,10 +167,10 @@ typedef FormData_pg_attribute *Form_pg_attribute;
#define Anum_pg_attribute_attrelid 1
#define Anum_pg_attribute_attname 2
#define Anum_pg_attribute_atttypid 3
-#define Anum_pg_attribute_attdispersion 4
+#define Anum_pg_attribute_attstattarget 4
#define Anum_pg_attribute_attlen 5
#define Anum_pg_attribute_attnum 6
-#define Anum_pg_attribute_attnelems 7
+#define Anum_pg_attribute_attndims 7
#define Anum_pg_attribute_attcacheoff 8
#define Anum_pg_attribute_atttypmod 9
#define Anum_pg_attribute_attbyval 10
@@ -206,6 +195,7 @@ typedef FormData_pg_attribute *Form_pg_attribute;
(attribute)->attnotnull = false; \
(attribute)->atthasdef = false;
#endif /* _DROP_COLUMN_HACK__ */
+
/* ----------------
* SCHEMA_ macros for declaring hardcoded tuple descriptors.
* these are used in utils/cache/relcache.c
@@ -231,25 +221,25 @@ typedef FormData_pg_attribute *Form_pg_attribute;
* ----------------
*/
#define Schema_pg_type \
-{ 1247, {"typname"}, 19, 0, NAMEDATALEN, 1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typowner"}, 23, 0, 4, 2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typlen"}, 21, 0, 2, 3, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1247, {"typprtlen"}, 21, 0, 2, 4, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1247, {"typbyval"}, 16, 0, 1, 5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typtype"}, 18, 0, 1, 6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typisdefined"}, 16, 0, 1, 7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typdelim"}, 18, 0, 1, 8, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typrelid"}, 26, 0, 4, 9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typelem"}, 26, 0, 4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typinput"}, 24, 0, 4, 11, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typoutput"}, 24, 0, 4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typreceive"}, 24, 0, 4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typsend"}, 24, 0, 4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typalign"}, 18, 0, 1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typstorage"}, 18, 0, 1, 16, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typdefault"}, 25, 0, -1, 17, 0, -1, -1, '\0' , 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1247 typname 19 0 NAMEDATALEN 1 0 -1 -1 f p f i f f));
+{ 1247, {"typname"}, 19, DEFAULT_ATTSTATTARGET, NAMEDATALEN, 1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1247, {"typowner"}, 23, 0, 4, 2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typlen"}, 21, 0, 2, 3, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1247, {"typprtlen"}, 21, 0, 2, 4, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1247, {"typbyval"}, 16, 0, 1, 5, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typtype"}, 18, 0, 1, 6, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typisdefined"}, 16, 0, 1, 7, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typdelim"}, 18, 0, 1, 8, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typrelid"}, 26, 0, 4, 9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typelem"}, 26, 0, 4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typinput"}, 24, 0, 4, 11, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typoutput"}, 24, 0, 4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typreceive"}, 24, 0, 4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typsend"}, 24, 0, 4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typalign"}, 18, 0, 1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typstorage"}, 18, 0, 1, 16, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typdefault"}, 25, 0, -1, 17, 0, -1, -1, false , 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1247 typname 19 DEFAULT_ATTSTATTARGET NAMEDATALEN 1 0 -1 -1 f p f i f f));
DATA(insert OID = 0 ( 1247 typowner 23 0 4 2 0 -1 -1 t p f i f f));
DATA(insert OID = 0 ( 1247 typlen 21 0 2 3 0 -1 -1 t p f s f f));
DATA(insert OID = 0 ( 1247 typprtlen 21 0 2 4 0 -1 -1 t p f s f f));
@@ -299,25 +289,25 @@ DATA(insert OID = 0 ( 1262 tableoid 26 0 4 -7 0 -1 -1 t p f i f f));
* ----------------
*/
#define Schema_pg_proc \
-{ 1255, {"proname"}, 19, 0, NAMEDATALEN, 1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proowner"}, 23, 0, 4, 2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prolang"}, 26, 0, 4, 3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proisinh"}, 16, 0, 1, 4, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proistrusted"}, 16, 0, 1, 5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proiscachable"}, 16, 0, 1, 6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proisstrict"}, 16, 0, 1, 7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"pronargs"}, 21, 0, 2, 8, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1255, {"proretset"}, 16, 0, 1, 9, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"prorettype"}, 26, 0, 4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proargtypes"}, 30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"probyte_pct"}, 23, 0, 4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"properbyte_cpu"}, 23, 0, 4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"propercall_cpu"}, 23, 0, 4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prooutin_ratio"}, 23, 0, 4, 15, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prosrc"}, 25, 0, -1, 16, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"probin"}, 17, 0, -1, 17, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1255 proname 19 0 NAMEDATALEN 1 0 -1 -1 f p f i f f));
+{ 1255, {"proname"}, 19, DEFAULT_ATTSTATTARGET, NAMEDATALEN, 1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1255, {"proowner"}, 23, 0, 4, 2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prolang"}, 26, 0, 4, 3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"proisinh"}, 16, 0, 1, 4, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proistrusted"}, 16, 0, 1, 5, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proiscachable"}, 16, 0, 1, 6, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proisstrict"}, 16, 0, 1, 7, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"pronargs"}, 21, 0, 2, 8, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1255, {"proretset"}, 16, 0, 1, 9, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"prorettype"}, 26, 0, 4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"proargtypes"}, 30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1255, {"probyte_pct"}, 23, 0, 4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"properbyte_cpu"}, 23, 0, 4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"propercall_cpu"}, 23, 0, 4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prooutin_ratio"}, 23, 0, 4, 15, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prosrc"}, 25, 0, -1, 16, 0, -1, -1, false, 'x', false, 'i', false, false }, \
+{ 1255, {"probin"}, 17, 0, -1, 17, 0, -1, -1, false, 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1255 proname 19 DEFAULT_ATTSTATTARGET NAMEDATALEN 1 0 -1 -1 f p f i f f));
DATA(insert OID = 0 ( 1255 proowner 23 0 4 2 0 -1 -1 t p f i f f));
DATA(insert OID = 0 ( 1255 prolang 26 0 4 3 0 -1 -1 t p f i f f));
DATA(insert OID = 0 ( 1255 proisinh 16 0 1 4 0 -1 -1 t p f c f f));
@@ -346,8 +336,8 @@ DATA(insert OID = 0 ( 1255 tableoid 26 0 4 -7 0 -1 -1 t p f i f f));
* pg_shadow
* ----------------
*/
-DATA(insert OID = 0 ( 1260 usename 19 0 NAMEDATALEN 1 0 -1 -1 f p f i f f));
-DATA(insert OID = 0 ( 1260 usesysid 23 0 4 2 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1260 usename 19 DEFAULT_ATTSTATTARGET NAMEDATALEN 1 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1260 usesysid 23 DEFAULT_ATTSTATTARGET 4 2 0 -1 -1 t p f i f f));
DATA(insert OID = 0 ( 1260 usecreatedb 16 0 1 3 0 -1 -1 t p f c f f));
DATA(insert OID = 0 ( 1260 usetrace 16 0 1 4 0 -1 -1 t p f c f f));
DATA(insert OID = 0 ( 1260 usesuper 16 0 1 5 0 -1 -1 t p f c f f));
@@ -366,8 +356,8 @@ DATA(insert OID = 0 ( 1260 tableoid 26 0 4 -7 0 -1 -1 t p f i f f));
* pg_group
* ----------------
*/
-DATA(insert OID = 0 ( 1261 groname 19 0 NAMEDATALEN 1 0 -1 -1 f p f i f f));
-DATA(insert OID = 0 ( 1261 grosysid 23 0 4 2 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1261 groname 19 DEFAULT_ATTSTATTARGET NAMEDATALEN 1 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1261 grosysid 23 DEFAULT_ATTSTATTARGET 4 2 0 -1 -1 t p f i f f));
DATA(insert OID = 0 ( 1261 grolist 1007 0 -1 3 0 -1 -1 f x f i f f));
DATA(insert OID = 0 ( 1261 ctid 27 0 6 -1 0 -1 -1 f p f i f f));
DATA(insert OID = 0 ( 1261 oid 26 0 4 -2 0 -1 -1 t p f i f f));
@@ -382,29 +372,29 @@ DATA(insert OID = 0 ( 1261 tableoid 26 0 4 -7 0 -1 -1 t p f i f f));
* ----------------
*/
#define Schema_pg_attribute \
-{ 1249, {"attrelid"}, 26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attname"}, 19, 0, NAMEDATALEN, 2, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"atttypid"}, 26, 0, 4, 3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attdispersion"}, 700, 0, 4, 4, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attlen"}, 21, 0, 2, 5, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1249, {"attnum"}, 21, 0, 2, 6, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1249, {"attnelems"}, 23, 0, 4, 7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attcacheoff"}, 23, 0, 4, 8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"atttypmod"}, 23, 0, 4, 9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attbyval"}, 16, 0, 1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attstorage"}, 18, 0, 1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attisset"}, 16, 0, 1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attalign"}, 18, 0, 1, 13, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attnotnull"}, 16, 0, 1, 14, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"atthasdef"}, 16, 0, 1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1249 attrelid 26 0 4 1 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1249 attname 19 0 NAMEDATALEN 2 0 -1 -1 f p f i f f));
+{ 1249, {"attrelid"}, 26, DEFAULT_ATTSTATTARGET, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attname"}, 19, DEFAULT_ATTSTATTARGET, NAMEDATALEN, 2, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1249, {"atttypid"}, 26, 0, 4, 3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attstattarget"}, 23, 0, 4, 4, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attlen"}, 21, 0, 2, 5, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1249, {"attnum"}, 21, 0, 2, 6, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1249, {"attndims"}, 23, 0, 4, 7, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attcacheoff"}, 23, 0, 4, 8, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"atttypmod"}, 23, 0, 4, 9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attbyval"}, 16, 0, 1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attstorage"}, 18, 0, 1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attisset"}, 16, 0, 1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attalign"}, 18, 0, 1, 13, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attnotnull"}, 16, 0, 1, 14, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"atthasdef"}, 16, 0, 1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }
+
+DATA(insert OID = 0 ( 1249 attrelid 26 DEFAULT_ATTSTATTARGET 4 1 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1249 attname 19 DEFAULT_ATTSTATTARGET NAMEDATALEN 2 0 -1 -1 f p f i f f));
DATA(insert OID = 0 ( 1249 atttypid 26 0 4 3 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1249 attdispersion 700 0 4 4 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1249 attstattarget 23 0 4 4 0 -1 -1 t p f i f f));
DATA(insert OID = 0 ( 1249 attlen 21 0 2 5 0 -1 -1 t p f s f f));
DATA(insert OID = 0 ( 1249 attnum 21 0 2 6 0 -1 -1 t p f s f f));
-DATA(insert OID = 0 ( 1249 attnelems 23 0 4 7 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1249 attndims 23 0 4 7 0 -1 -1 t p f i f f));
DATA(insert OID = 0 ( 1249 attcacheoff 23 0 4 8 0 -1 -1 t p f i f f));
DATA(insert OID = 0 ( 1249 atttypmod 23 0 4 9 0 -1 -1 t p f i f f));
DATA(insert OID = 0 ( 1249 attbyval 16 0 1 10 0 -1 -1 t p f c f f));
@@ -426,36 +416,36 @@ DATA(insert OID = 0 ( 1249 tableoid 26 0 4 -7 0 -1 -1 t p f i f f));
* ----------------
*/
#define Schema_pg_class \
-{ 1259, {"relname"}, 19, 0, NAMEDATALEN, 1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltype"}, 26, 0, 4, 2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relowner"}, 23, 0, 4, 3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relam"}, 26, 0, 4, 4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relfilenode"}, 26, 0, 4, 5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relpages"}, 23, 0, 4, 6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltuples"}, 23, 0, 4, 7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltoastrelid"}, 26, 0, 4, 8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltoastidxid"}, 26, 0, 4, 9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relhasindex"}, 16, 0, 1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relisshared"}, 16, 0, 1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relkind"}, 18, 0, 1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relnatts"}, 21, 0, 2, 13, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relchecks"}, 21, 0, 2, 14, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"reltriggers"}, 21, 0, 2, 15, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relukeys"}, 21, 0, 2, 16, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relfkeys"}, 21, 0, 2, 17, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relrefs"}, 21, 0, 2, 18, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relhaspkey"}, 16, 0, 1, 19, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relhasrules"}, 16, 0, 1, 20, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relhassubclass"},16, 0, 1, 21, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relacl"}, 1034, 0, -1, 22, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1259 relname 19 0 NAMEDATALEN 1 0 -1 -1 f p f i f f));
+{ 1259, {"relname"}, 19, DEFAULT_ATTSTATTARGET, NAMEDATALEN, 1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1259, {"reltype"}, 26, 0, 4, 2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relowner"}, 23, 0, 4, 3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relam"}, 26, 0, 4, 4, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relfilenode"}, 26, 0, 4, 5, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relpages"}, 23, 0, 4, 6, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"reltuples"}, 700, 0, 4, 7, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1259, {"reltoastrelid"}, 26, 0, 4, 8, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"reltoastidxid"}, 26, 0, 4, 9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relhasindex"}, 16, 0, 1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relisshared"}, 16, 0, 1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relkind"}, 18, 0, 1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relnatts"}, 21, 0, 2, 13, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relchecks"}, 21, 0, 2, 14, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"reltriggers"}, 21, 0, 2, 15, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relukeys"}, 21, 0, 2, 16, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relfkeys"}, 21, 0, 2, 17, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relrefs"}, 21, 0, 2, 18, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relhaspkey"}, 16, 0, 1, 19, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relhasrules"}, 16, 0, 1, 20, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relhassubclass"},16, 0, 1, 21, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relacl"}, 1034, 0, -1, 22, 0, -1, -1, false, 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1259 relname 19 DEFAULT_ATTSTATTARGET NAMEDATALEN 1 0 -1 -1 f p f i f f));
DATA(insert OID = 0 ( 1259 reltype 26 0 4 2 0 -1 -1 t p f i f f));
DATA(insert OID = 0 ( 1259 relowner 23 0 4 3 0 -1 -1 t p f i f f));
DATA(insert OID = 0 ( 1259 relam 26 0 4 4 0 -1 -1 t p f i f f));
DATA(insert OID = 0 ( 1259 relfilenode 26 0 4 5 0 -1 -1 t p f i f f));
DATA(insert OID = 0 ( 1259 relpages 23 0 4 6 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1259 reltuples 23 0 4 7 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1259 reltuples 700 0 4 7 0 -1 -1 f p f i f f));
DATA(insert OID = 0 ( 1259 reltoastrelid 26 0 4 8 0 -1 -1 t p f i f f));
DATA(insert OID = 0 ( 1259 reltoastidxid 26 0 4 9 0 -1 -1 t p f i f f));
DATA(insert OID = 0 ( 1259 relhasindex 16 0 1 10 0 -1 -1 t p f c f f));
@@ -544,7 +534,7 @@ DATA(insert OID = 0 ( 1219 tableoid 26 0 4 -7 0 -1 -1 t p f i f f));
* ----------------
*/
#define Schema_pg_variable \
-{ 1264, {"varfoo"}, 26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 1264, {"varfoo"}, 26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
DATA(insert OID = 0 ( 1264 varfoo 26 0 4 1 0 -1 -1 t p f i f f));
@@ -555,7 +545,7 @@ DATA(insert OID = 0 ( 1264 varfoo 26 0 4 1 0 -1 -1 t p f i f f));
* ----------------
*/
#define Schema_pg_log \
-{ 1269, {"logfoo"}, 26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 1269, {"logfoo"}, 26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
DATA(insert OID = 0 ( 1269 logfoo 26 0 4 1 0 -1 -1 t p f i f f));
@@ -566,7 +556,7 @@ DATA(insert OID = 0 ( 1269 logfoo 26 0 4 1 0 -1 -1 t p f i f f));
* ----------------
*/
#define Schema_pg_xactlock \
-{ 376, {"xactlockfoo"}, 26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 376, {"xactlockfoo"}, 26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
DATA(insert OID = 0 ( 376 xactlockfoo 26 0 4 1 0 -1 -1 t p f i f f));
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index 81e75e14b6a..86de88cc9b6 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: pg_class.h,v 1.47 2001/03/22 04:00:38 momjian Exp $
+ * $Id: pg_class.h,v 1.48 2001/05/07 00:43:25 tgl Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
@@ -52,7 +52,7 @@ CATALOG(pg_class) BOOTSTRAP
Oid relam;
Oid relfilenode;
int4 relpages;
- int4 reltuples;
+ float4 reltuples;
Oid reltoastrelid;
Oid reltoastidxid;
bool relhasindex;
diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h
index 2f39bea3245..8d6a6b37c16 100644
--- a/src/include/catalog/pg_statistic.h
+++ b/src/include/catalog/pg_statistic.h
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: pg_statistic.h,v 1.10 2001/01/24 19:43:22 momjian Exp $
+ * $Id: pg_statistic.h,v 1.11 2001/05/07 00:43:25 tgl Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
@@ -36,40 +36,91 @@ CATALOG(pg_statistic)
/* These fields form the unique key for the entry: */
Oid starelid; /* relation containing attribute */
int2 staattnum; /* attribute (column) stats are for */
- Oid staop; /* '<' comparison op used for lo/hi vals */
+
+ /* the fraction of the column's entries that are NULL: */
+ float4 stanullfrac;
/*
- * Note: the current VACUUM code will never produce more than one
- * entry per column, but in theory there could be multiple entries if
- * a datatype has more than one useful ordering operator. Also, the
- * current code will not write an entry unless it found at least one
- * non-NULL value in the column; so the remaining fields will never be
- * NULL.
+ * stawidth is the average width in bytes of non-null entries. For
+ * fixed-width datatypes this is of course the same as the typlen, but
+ * for varlena types it is more useful. Note that this is the average
+ * width of the data as actually stored, post-TOASTing (eg, for a
+ * moved-out-of-line value, only the size of the pointer object is
+ * counted). This is the appropriate definition for the primary use of
+ * the statistic, which is to estimate sizes of in-memory hash tables of
+ * tuples.
+ */
+ int4 stawidth;
+
+ /* ----------------
+ * stadistinct indicates the (approximate) number of distinct non-null
+ * data values in the column. The interpretation is:
+ * 0 unknown or not computed
+ * > 0 actual number of distinct values
+ * < 0 negative of multiplier for number of rows
+ * The special negative case allows us to cope with columns that are
+ * unique (stadistinct = -1) or nearly so (for example, a column in
+ * which values appear about twice on the average could be represented
+ * by stadistinct = -0.5). Because the number-of-rows statistic in
+ * pg_class may be updated more frequently than pg_statistic is, it's
+ * important to be able to describe such situations as a multiple of
+ * the number of rows, rather than a fixed number of distinct values.
+ * But in other cases a fixed number is correct (eg, a boolean column).
+ * ----------------
+ */
+ float4 stadistinct;
+
+ /* ----------------
+ * To allow keeping statistics on different kinds of datatypes,
+ * we do not hard-wire any particular meaning for the remaining
+ * statistical fields. Instead, we provide several "slots" in which
+ * statistical data can be placed. Each slot includes:
+ * kind integer code identifying kind of data
+ * op OID of associated operator, if needed
+ * numbers float4 array (for statistical values)
+ * values text array (for representations of data values)
+ * The ID and operator fields are never NULL; they are zeroes in an
+ * unused slot. The numbers and values fields are NULL in an unused
+ * slot, and might also be NULL in a used slot if the slot kind has
+ * no need for one or the other.
+ * ----------------
*/
+ int2 stakind1;
+ int2 stakind2;
+ int2 stakind3;
+ int2 stakind4;
+
+ Oid staop1;
+ Oid staop2;
+ Oid staop3;
+ Oid staop4;
+
/*
- * These fields contain the stats about the column indicated by the
- * key
+ * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent
+ * (NULL). They cannot be accessed as C struct entries; you have to use
+ * the full field access machinery (heap_getattr) for them. We declare
+ * them here for the catalog machinery.
*/
- float4 stanullfrac; /* the fraction of the entries that are
- * NULL */
- float4 stacommonfrac; /* the fraction that are the most common
- * val */
+
+ float4 stanumbers1[1];
+ float4 stanumbers2[1];
+ float4 stanumbers3[1];
+ float4 stanumbers4[1];
/*
- * THE REST OF THESE ARE VARIABLE LENGTH FIELDS. They cannot be
- * accessed as C struct entries; you have to use the full field access
- * machinery (heap_getattr) for them.
- *
- * All three of these are text representations of data values of the
- * column's data type. To re-create the actual Datum, do
- * datatypein(textout(givenvalue)).
+ * Values in these text arrays are external representations of values
+ * of the column's data type. To re-create the actual Datum, do
+ * datatypein(textout(arrayelement)).
*/
- text stacommonval; /* most common non-null value in column */
- text staloval; /* smallest non-null value in column */
- text stahival; /* largest non-null value in column */
+ text stavalues1[1];
+ text stavalues2[1];
+ text stavalues3[1];
+ text stavalues4[1];
} FormData_pg_statistic;
+#define STATISTIC_NUM_SLOTS 4
+
/* ----------------
* Form_pg_statistic corresponds to a pointer to a tuple with
* the format of pg_statistic relation.
@@ -81,14 +132,78 @@ typedef FormData_pg_statistic *Form_pg_statistic;
* compiler constants for pg_statistic
* ----------------
*/
-#define Natts_pg_statistic 8
+#define Natts_pg_statistic 21
#define Anum_pg_statistic_starelid 1
#define Anum_pg_statistic_staattnum 2
-#define Anum_pg_statistic_staop 3
-#define Anum_pg_statistic_stanullfrac 4
-#define Anum_pg_statistic_stacommonfrac 5
-#define Anum_pg_statistic_stacommonval 6
-#define Anum_pg_statistic_staloval 7
-#define Anum_pg_statistic_stahival 8
+#define Anum_pg_statistic_stanullfrac 3
+#define Anum_pg_statistic_stawidth 4
+#define Anum_pg_statistic_stadistinct 5
+#define Anum_pg_statistic_stakind1 6
+#define Anum_pg_statistic_stakind2 7
+#define Anum_pg_statistic_stakind3 8
+#define Anum_pg_statistic_stakind4 9
+#define Anum_pg_statistic_staop1 10
+#define Anum_pg_statistic_staop2 11
+#define Anum_pg_statistic_staop3 12
+#define Anum_pg_statistic_staop4 13
+#define Anum_pg_statistic_stanumbers1 14
+#define Anum_pg_statistic_stanumbers2 15
+#define Anum_pg_statistic_stanumbers3 16
+#define Anum_pg_statistic_stanumbers4 17
+#define Anum_pg_statistic_stavalues1 18
+#define Anum_pg_statistic_stavalues2 19
+#define Anum_pg_statistic_stavalues3 20
+#define Anum_pg_statistic_stavalues4 21
+
+/*
+ * Currently, three statistical slot "kinds" are defined: most common values,
+ * histogram, and correlation. Additional "kinds" will probably appear in
+ * future to help cope with non-scalar datatypes.
+ *
+ * Code reading the pg_statistic relation should not assume that a particular
+ * data "kind" will appear in any particular slot. Instead, search the
+ * stakind fields to see if the desired data is available.
+ */
+
+/*
+ * In a "most common values" slot, staop is the OID of the "=" operator
+ * used to decide whether values are the same or not. stavalues contains
+ * the K most common non-null values appearing in the column, and stanumbers
+ * contains their frequencies (fractions of total row count). The values
+ * shall be ordered in decreasing frequency. Note that since the arrays are
+ * variable-size, K may be chosen by the statistics collector. Values should
+ * not appear in MCV unless they have been observed to occur more than once;
+ * a unique column will have no MCV slot.
+ */
+#define STATISTIC_KIND_MCV 1
+
+/*
+ * A "histogram" slot describes the distribution of scalar data. staop is
+ * the OID of the "<" operator that describes the sort ordering. (In theory,
+ * more than one histogram could appear, if a datatype has more than one
+ * useful sort operator.) stavalues contains M (>=2) non-null values that
+ * divide the non-null column data values into M-1 bins of approximately equal
+ * population. The first stavalues item is the MIN and the last is the MAX.
+ * stanumbers is not used and should be NULL. IMPORTANT POINT: if an MCV
+ * slot is also provided, then the histogram describes the data distribution
+ * *after removing the values listed in MCV* (thus, it's a "compressed
+ * histogram" in the technical parlance). This allows a more accurate
+ * representation of the distribution of a column with some very-common
+ * values. In a column with only a few distinct values, it's possible that
+ * the MCV list describes the entire data population; in this case the
+ * histogram reduces to empty and should be omitted.
+ */
+#define STATISTIC_KIND_HISTOGRAM 2
+
+/*
+ * A "correlation" slot describes the correlation between the physical order
+ * of table tuples and the ordering of data values of this column, as seen
+ * by the "<" operator identified by staop. (As with the histogram, more
+ * than one entry could theoretically appear.) stavalues is not used and
+ * should be NULL. stanumbers contains a single entry, the correlation
+ * coefficient between the sequence of data values and the sequence of
+ * their actual tuple positions. The coefficient ranges from +1 to -1.
+ */
+#define STATISTIC_KIND_CORRELATION 3
#endif /* PG_STATISTIC_H */
diff --git a/src/include/commands/command.h b/src/include/commands/command.h
index 8b108451d2a..7eb1a4fab84 100644
--- a/src/include/commands/command.h
+++ b/src/include/commands/command.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: command.h,v 1.26 2001/03/22 04:00:41 momjian Exp $
+ * $Id: command.h,v 1.27 2001/05/07 00:43:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -43,9 +43,13 @@ extern void PortalCleanup(Portal portal);
extern void AlterTableAddColumn(const char *relationName,
bool inh, ColumnDef *colDef);
-extern void AlterTableAlterColumn(const char *relationName,
- bool inh, const char *colName,
- Node *newDefault);
+extern void AlterTableAlterColumnDefault(const char *relationName,
+ bool inh, const char *colName,
+ Node *newDefault);
+
+extern void AlterTableAlterColumnStatistics(const char *relationName,
+ bool inh, const char *colName,
+ Node *statsTarget);
extern void AlterTableDropColumn(const char *relationName,
bool inh, const char *colName,
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index d82d22fcdfc..87bb0007aa0 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -1,129 +1,27 @@
/*-------------------------------------------------------------------------
*
* vacuum.h
- * header file for postgres vacuum cleaner
+ * header file for postgres vacuum cleaner and statistics analyzer
*
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: vacuum.h,v 1.34 2001/03/22 04:00:43 momjian Exp $
+ * $Id: vacuum.h,v 1.35 2001/05/07 00:43:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef VACUUM_H
#define VACUUM_H
-#include "catalog/pg_attribute.h"
-#include "catalog/pg_index.h"
-#include "fmgr.h"
-#include "nodes/pg_list.h"
-#include "storage/itemptr.h"
+#include "nodes/parsenodes.h"
-typedef struct VAttListData
-{
- int val_dummy;
- struct VAttListData *val_next;
-} VAttListData;
-
-typedef VAttListData *VAttList;
-
-typedef struct VacPageData
-{
- BlockNumber blkno; /* BlockNumber of this Page */
- Size free; /* FreeSpace on this Page */
- uint16 offsets_used; /* Number of OffNums used by vacuum */
- uint16 offsets_free; /* Number of OffNums free or to be free */
- OffsetNumber offsets[1]; /* Array of its OffNums */
-} VacPageData;
-
-typedef VacPageData *VacPage;
-
-typedef struct VacPageListData
-{
- int empty_end_pages;/* Number of "empty" end-pages */
- int num_pages; /* Number of pages in pagedesc */
- int num_allocated_pages; /* Number of allocated pages in
- * pagedesc */
- VacPage *pagedesc; /* Descriptions of pages */
-} VacPageListData;
-
-typedef VacPageListData *VacPageList;
-
-typedef struct
-{
- Form_pg_attribute attr;
- Datum best,
- guess1,
- guess2,
- max,
- min;
- int best_len,
- guess1_len,
- guess2_len,
- max_len,
- min_len;
- long best_cnt,
- guess1_cnt,
- guess1_hits,
- guess2_hits,
- null_cnt,
- nonnull_cnt,
- max_cnt,
- min_cnt;
- FmgrInfo f_cmpeq,
- f_cmplt,
- f_cmpgt;
- Oid op_cmplt;
- regproc outfunc;
- Oid typelem;
- bool initialized;
-} VacAttrStats;
-
-typedef struct VRelListData
-{
- Oid vrl_relid;
- struct VRelListData *vrl_next;
-} VRelListData;
-
-typedef VRelListData *VRelList;
-
-typedef struct VTupleLinkData
-{
- ItemPointerData new_tid;
- ItemPointerData this_tid;
-} VTupleLinkData;
-
-typedef VTupleLinkData *VTupleLink;
-
-typedef struct VTupleMoveData
-{
- ItemPointerData tid; /* tuple ID */
- VacPage vacpage; /* where to move */
- bool cleanVpd; /* clean vacpage before using */
-} VTupleMoveData;
-
-typedef VTupleMoveData *VTupleMove;
-
-typedef struct VRelStats
-{
- Oid relid;
- int num_tuples;
- int num_pages;
- Size min_tlen;
- Size max_tlen;
- bool hasindex;
- int num_vtlinks;
- VTupleLink vtlinks;
-} VRelStats;
-
-extern bool VacuumRunning;
-
-extern void vc_abort(void);
-extern void vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols);
-extern void analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL);
-
-#define ATTNVALS_SCALE 1000000000 /* XXX so it can act as a float4 */
+/* in commands/vacuum.c */
+extern void vacuum(VacuumStmt *vacstmt);
+extern void vac_update_relstats(Oid relid, long num_pages, double num_tuples,
+ bool hasindex);
+/* in commands/analyze.c */
+extern void analyze_rel(Oid relid, VacuumStmt *vacstmt);
#endif /* VACUUM_H */
diff --git a/src/include/config.h.in b/src/include/config.h.in
index 0d989dbbb31..01593a4ce96 100644
--- a/src/include/config.h.in
+++ b/src/include/config.h.in
@@ -8,7 +8,7 @@
* or in config.h afterwards. Of course, if you edit config.h, then your
* changes will be overwritten the next time you run configure.
*
- * $Id: config.h.in,v 1.162 2001/04/14 22:55:02 petere Exp $
+ * $Id: config.h.in,v 1.163 2001/05/07 00:43:25 tgl Exp $
*/
#ifndef CONFIG_H
@@ -157,6 +157,11 @@
#define FUNC_MAX_ARGS INDEX_MAX_KEYS
/*
+ * System default value for pg_attribute.attstattarget
+ */
+#define DEFAULT_ATTSTATTARGET 10
+
+/*
* Define this to make libpgtcl's "pg_result -assign" command process C-style
* backslash sequences in returned tuple data and convert Postgres array
* attributes into Tcl lists. CAUTION: this conversion is *wrong* unless
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index 2cf9378cf11..0967bef24ba 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: execnodes.h,v 1.57 2001/03/22 04:00:50 momjian Exp $
+ * $Id: execnodes.h,v 1.58 2001/05/07 00:43:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -628,7 +628,6 @@ typedef struct GroupState
* SortState information
*
* sort_Done indicates whether sort has been performed yet
- * sort_Keys scan key structures describing the sort keys
* tuplesortstate private state of tuplesort.c
* ----------------
*/
@@ -636,7 +635,6 @@ typedef struct SortState
{
CommonScanState csstate; /* its first field is NodeTag */
bool sort_Done;
- ScanKey sort_Keys;
void *tuplesortstate;
} SortState;
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 1614d787bcb..63b1b1046a8 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: parsenodes.h,v 1.126 2001/03/23 04:49:56 momjian Exp $
+ * $Id: parsenodes.h,v 1.127 2001/05/07 00:43:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -118,11 +118,12 @@ typedef struct AlterTableStmt
NodeTag type;
char subtype; /*------------
* A = add column
- * T = alter column
+ * T = alter column default
+ * S = alter column statistics
* D = drop column
* C = add constraint
* X = drop constraint
- * E = add toast table,
+ * E = create toast table
* U = change owner
*------------
*/
@@ -690,16 +691,20 @@ typedef struct ClusterStmt
} ClusterStmt;
/* ----------------------
- * Vacuum Statement
+ * Vacuum and Analyze Statements
+ *
+ * Even though these are nominally two statements, it's convenient to use
+ * just one node type for both.
* ----------------------
*/
typedef struct VacuumStmt
{
NodeTag type;
- bool verbose; /* print status info */
- bool analyze; /* analyze data */
- char *vacrel; /* table to vacuum */
- List *va_spec; /* columns to analyse */
+ bool vacuum; /* do VACUUM step */
+ bool analyze; /* do ANALYZE step */
+ bool verbose; /* print progress info */
+ char *vacrel; /* name of single table to process, or NULL */
+ List *va_cols; /* list of column names, or NIL for all */
} VacuumStmt;
/* ----------------------
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 3ae8e09f57a..9e69ed60992 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -10,7 +10,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: primnodes.h,v 1.53 2001/03/22 04:00:52 momjian Exp $
+ * $Id: primnodes.h,v 1.54 2001/05/07 00:43:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -45,8 +45,8 @@ typedef struct FunctionCache *FunctionCachePtr;
* reskey and reskeyop are the execution-time representation of sorting.
* reskey must be zero in any non-sort-key item. The reskey of sort key
* targetlist items for a sort plan node is 1,2,...,n for the n sort keys.
- * The reskeyop of each such targetlist item is the sort operator's
- * regproc OID. reskeyop will be zero in non-sort-key items.
+ * The reskeyop of each such targetlist item is the sort operator's OID.
+ * reskeyop will be zero in non-sort-key items.
*
* Both reskey and reskeyop are typically zero during parse/plan stages.
* The executor does not pay any attention to ressortgroupref.
@@ -62,7 +62,7 @@ typedef struct Resdom
Index ressortgroupref;
/* nonzero if referenced by a sort/group clause */
Index reskey; /* order of key in a sort (for those > 0) */
- Oid reskeyop; /* sort operator's regproc Oid */
+ Oid reskeyop; /* sort operator's Oid */
bool resjunk; /* set to true to eliminate the attribute
* from final target list */
} Resdom;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index f643ef87968..c76d9b4af71 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: relation.h,v 1.54 2001/03/22 04:00:53 momjian Exp $
+ * $Id: relation.h,v 1.55 2001/05/07 00:43:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -512,8 +512,8 @@ typedef struct RestrictInfo
Oid hashjoinoperator; /* copy of clause operator */
/* cache space for hashclause processing; -1 if not yet set */
- Selectivity left_dispersion;/* dispersion of left side */
- Selectivity right_dispersion; /* dispersion of right side */
+ Selectivity left_bucketsize; /* avg bucketsize of left side */
+ Selectivity right_bucketsize; /* avg bucketsize of right side */
} RestrictInfo;
/*
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 5caa576f0c0..cbf6df063a3 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: cost.h,v 1.38 2001/02/16 00:03:05 tgl Exp $
+ * $Id: cost.h,v 1.39 2001/05/07 00:43:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -64,7 +64,8 @@ extern void cost_mergejoin(Path *path, Path *outer_path, Path *inner_path,
List *restrictlist,
List *outersortkeys, List *innersortkeys);
extern void cost_hashjoin(Path *path, Path *outer_path, Path *inner_path,
- List *restrictlist, Selectivity innerdispersion);
+ List *restrictlist, Selectivity innerbucketsize);
+extern Selectivity estimate_hash_bucketsize(Query *root, Var *var);
extern Cost cost_qual_eval(List *quals);
extern void set_baserel_size_estimates(Query *root, RelOptInfo *rel);
extern void set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 5b71eded86f..0839feb4b2f 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: pathnode.h,v 1.35 2001/03/22 04:00:54 momjian Exp $
+ * $Id: pathnode.h,v 1.36 2001/05/07 00:43:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -59,7 +59,7 @@ extern HashPath *create_hashjoin_path(RelOptInfo *joinrel,
Path *inner_path,
List *restrict_clauses,
List *hashclauses,
- Selectivity innerdispersion);
+ Selectivity innerbucketsize);
/*
* prototypes for relnode.c
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h
index f1c4aff1c80..6b35deed286 100644
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: lsyscache.h,v 1.30 2001/03/22 04:01:13 momjian Exp $
+ * $Id: lsyscache.h,v 1.31 2001/05/07 00:43:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -21,8 +21,6 @@ extern AttrNumber get_attnum(Oid relid, char *attname);
extern Oid get_atttype(Oid relid, AttrNumber attnum);
extern bool get_attisset(Oid relid, char *attname);
extern int32 get_atttypmod(Oid relid, AttrNumber attnum);
-extern double get_attdispersion(Oid relid, AttrNumber attnum,
- double min_estimate);
extern RegProcedure get_opcode(Oid opno);
extern char *get_opname(Oid opno);
extern bool op_mergejoinable(Oid opno, Oid ltype, Oid rtype,
@@ -41,6 +39,14 @@ extern bool get_typbyval(Oid typid);
extern void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval);
extern char get_typstorage(Oid typid);
extern Datum get_typdefault(Oid typid);
+extern bool get_attstatsslot(HeapTuple statstuple,
+ Oid atttype, int32 atttypmod,
+ int reqkind, Oid reqop,
+ Datum **values, int *nvalues,
+ float4 **numbers, int *nnumbers);
+extern void free_attstatsslot(Oid atttype,
+ Datum *values, int nvalues,
+ float4 *numbers, int nnumbers);
#define TypeIsToastable(typid) (get_typstorage(typid) != 'p')
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h
index 8d4e2ae62c4..342f7bf8a56 100644
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -9,7 +9,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: syscache.h,v 1.29 2001/03/22 04:01:14 momjian Exp $
+ * $Id: syscache.h,v 1.30 2001/05/07 00:43:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -53,7 +53,7 @@
#define RULEOID 22
#define SHADOWNAME 23
#define SHADOWSYSID 24
-#define STATRELID 25
+#define STATRELATT 25
#define TYPENAME 26
#define TYPEOID 27
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index 7f273776c36..001761796e2 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -13,7 +13,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: tuplesort.h,v 1.6 2001/01/24 19:43:29 momjian Exp $
+ * $Id: tuplesort.h,v 1.7 2001/05/07 00:43:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -36,8 +36,9 @@ typedef struct Tuplesortstate Tuplesortstate;
*/
extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
- int nkeys, ScanKey keys,
- bool randomAccess);
+ int nkeys,
+ Oid *sortOperators, AttrNumber *attNums,
+ bool randomAccess);
extern Tuplesortstate *tuplesort_begin_index(Relation indexRel,
bool enforceUnique,
bool randomAccess);
@@ -75,4 +76,19 @@ extern void tuplesort_rescan(Tuplesortstate *state);
extern void tuplesort_markpos(Tuplesortstate *state);
extern void tuplesort_restorepos(Tuplesortstate *state);
+/*
+ * This routine selects an appropriate sorting function to implement
+ * a sort operator as efficiently as possible.
+ */
+typedef enum
+{
+ SORTFUNC_LT, /* raw "<" operator */
+ SORTFUNC_CMP, /* -1 / 0 / 1 three-way comparator */
+ SORTFUNC_REVCMP /* 1 / 0 / -1 (reversed) 3-way comparator */
+} SortFunctionKind;
+
+extern void SelectSortFunction(Oid sortOperator,
+ RegProcedure *sortFunction,
+ SortFunctionKind *kind);
+
#endif /* TUPLESORT_H */
diff --git a/src/interfaces/ecpg/preproc/keywords.c b/src/interfaces/ecpg/preproc/keywords.c
index 5614a34b0fe..c03880f497d 100644
--- a/src/interfaces/ecpg/preproc/keywords.c
+++ b/src/interfaces/ecpg/preproc/keywords.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.39 2001/03/22 04:01:21 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.40 2001/05/07 00:43:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = {
{"some", SOME},
{"start", START},
{"statement", STATEMENT},
+ {"statistics", STATISTICS},
{"stdin", STDIN},
{"stdout", STDOUT},
{"substring", SUBSTRING},
diff --git a/src/interfaces/ecpg/preproc/preproc.y b/src/interfaces/ecpg/preproc/preproc.y
index 345efb6576e..91708bd91fa 100644
--- a/src/interfaces/ecpg/preproc/preproc.y
+++ b/src/interfaces/ecpg/preproc/preproc.y
@@ -134,7 +134,7 @@ make_name(void)
%union {
double dval;
- int ival;
+ int ival;
char * str;
struct when action;
struct index index;
@@ -224,7 +224,7 @@ make_name(void)
NONE, NOTHING, NOTIFY, NOTNULL, OFFSET, OIDS,
OPERATOR, OWNER, PASSWORD, PROCEDURAL, REINDEX, RENAME, RESET,
RETURNS, ROW, RULE, SEQUENCE, SERIAL, SETOF, SHARE,
- SHOW, START, STATEMENT, STDIN, STDOUT, SYSID TEMP,
+ SHOW, START, STATEMENT, STATISTICS, STDIN, STDOUT, SYSID TEMP,
TEMPLATE, TOAST, TRUNCATE, TRUSTED, UNLISTEN, UNTIL, VACUUM,
VALID, VERBOSE, VERSION
@@ -285,7 +285,7 @@ make_name(void)
%type <str> file_name AexprConst ParamNo c_expr ConstTypename
%type <str> in_expr_nodes a_expr b_expr TruncateStmt CommentStmt
%type <str> opt_indirection expr_list extract_list extract_arg
-%type <str> position_list substr_list substr_from alter_column_action
+%type <str> position_list substr_list substr_from alter_column_default
%type <str> trim_list in_expr substr_for attr attrs drop_behavior
%type <str> Typename SimpleTypename Generic Numeric generic opt_float opt_numeric
%type <str> opt_decimal Character character opt_varying opt_charset
@@ -293,7 +293,7 @@ make_name(void)
%type <str> row_expr row_descriptor row_list ConstDatetime opt_chain
%type <str> SelectStmt into_clause OptTemp ConstraintAttributeSpec
%type <str> opt_table opt_all sort_clause sortby_list ConstraintAttr
-%type <str> sortby OptUseOp opt_inh_star relation_name_list name_list
+%type <str> sortby OptUseOp relation_name_list name_list
%type <str> group_clause having_clause from_clause opt_distinct
%type <str> join_outer where_clause relation_expr sub_type opt_arg
%type <str> opt_column_list insert_rest InsertStmt OptimizableStmt
@@ -301,8 +301,8 @@ make_name(void)
%type <str> NotifyStmt columnElem copy_dirn UnlistenStmt copy_null
%type <str> copy_delimiter ListenStmt CopyStmt copy_file_name opt_binary
%type <str> opt_with_copy FetchStmt direction fetch_how_many from_in
-%type <str> ClosePortalStmt DropStmt VacuumStmt opt_verbose func_arg
-%type <str> opt_analyze opt_va_list va_list ExplainStmt index_params
+%type <str> ClosePortalStmt DropStmt VacuumStmt AnalyzeStmt opt_verbose func_arg
+%type <str> analyze_keyword opt_name_list ExplainStmt index_params
%type <str> index_list func_index index_elem opt_class access_method_clause
%type <str> index_opt_unique IndexStmt func_return ConstInterval
%type <str> func_args_list func_args opt_with ProcedureStmt def_arg
@@ -329,7 +329,7 @@ make_name(void)
%type <str> opt_cursor opt_lmode ConstraintsSetStmt comment_tg AllConst
%type <str> case_expr when_clause_list case_default case_arg when_clause
%type <str> select_clause opt_select_limit select_limit_value ConstraintTimeSpec
-%type <str> select_offset_value ReindexStmt join_type opt_only opt_boolean
+%type <str> select_offset_value ReindexStmt join_type opt_boolean
%type <str> join_qual update_list AlterSchemaStmt joined_table
%type <str> opt_level opt_lock lock_type users_in_new_group_clause
%type <str> OptConstrFromTable comment_op OptTempTableName StringConst
@@ -447,6 +447,7 @@ stmt: AlterSchemaStmt { output_statement($1, 0, NULL, connection); }
| CreatedbStmt { output_statement($1, 0, NULL, connection); }
| DropdbStmt { output_statement($1, 0, NULL, connection); }
| VacuumStmt { output_statement($1, 0, NULL, connection); }
+ | AnalyzeStmt { output_statement($1, 0, NULL, connection); }
| VariableSetStmt { output_statement($1, 0, NULL, connection); }
| VariableShowStmt { output_statement($1, 0, NULL, connection); }
| VariableResetStmt { output_statement($1, 0, NULL, connection); }
@@ -909,39 +910,40 @@ CheckPointStmt: CHECKPOINT { $$= make_str("checkpoint"); }
/*****************************************************************************
*
- * QUERY :
- *
* ALTER TABLE variations
*
*****************************************************************************/
AlterTableStmt:
-/* ALTER TABLE <name> ADD [COLUMN] <coldef> */
- ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef
+/* ALTER TABLE <relation> ADD [COLUMN] <coldef> */
+ ALTER TABLE relation_expr ADD opt_column columnDef
+ {
+ $$ = cat_str(5, make_str("alter table"), $3, make_str("add"), $5, $6);
+ }
+/* ALTER TABLE <relation> ALTER [COLUMN] <colname> {SET DEFAULT <expr>|DROP DEFAULT} */
+ | ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default
{
- $$ = cat_str(6, make_str("alter table"), $3, $4, make_str("add"), $6, $7);
+ $$ = cat_str(6, make_str("alter table"), $3, make_str("alter"), $5, $6, $7);
}
-/* ALTER TABLE <name> ALTER [COLUMN] <colname> {SET DEFAULT <expr>|DROP
-DEFAULT} */
- | ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId
- alter_column_action
+/* ALTER TABLE <relation> ALTER [COLUMN] <colname> SET STATISTICS <Iconst> */
+ | ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst
{
- $$ = cat_str(7, make_str("alter table"), $3, $4, make_str("alter"), $6, $7, $8);
+ $$ = cat_str(7, make_str("alter table"), $3, make_str("alter"), $5, $6, make_str("set statistics"), $9);
}
-/* ALTER TABLE <name> DROP [COLUMN] <name> {RESTRICT|CASCADE} */
- | ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior
+/* ALTER TABLE <relation> DROP [COLUMN] <colname> {RESTRICT|CASCADE} */
+ | ALTER TABLE relation_expr DROP opt_column ColId drop_behavior
{
- $$ = cat_str(7, make_str("alter table"), $3, $4, make_str("drop"), $6, $7, $8);
+ $$ = cat_str(6, make_str("alter table"), $3, make_str("drop"), $5, $6, $7);
}
-/* ALTER TABLE <name> ADD CONSTRAINT ... */
- | ALTER TABLE relation_name opt_inh_star ADD TableConstraint
+/* ALTER TABLE <relation> ADD CONSTRAINT ... */
+ | ALTER TABLE relation_expr ADD TableConstraint
{
- $$ = cat_str(5, make_str("alter table"), $3, $4, make_str("add"), $6);
+ $$ = cat_str(4, make_str("alter table"), $3, make_str("add"), $5);
}
-/* ALTER TABLE <name> DROP CONSTRAINT ... */
- | ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior
+/* ALTER TABLE <relation> DROP CONSTRAINT ... */
+ | ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior
{
- $$ = cat_str(6, make_str("alter table"), $3, $4, make_str("drop constraint"), $7, $8);
+ $$ = cat_str(5, make_str("alter table"), $3, make_str("drop constraint"), $6, $7);
}
/* ALTER TABLE <name> OWNER TO UserId */
| ALTER TABLE relation_name OWNER TO UserId
@@ -950,7 +952,7 @@ DEFAULT} */
}
;
-alter_column_action:
+alter_column_default:
SET DEFAULT a_expr { $$ = cat2_str(make_str("set default"), $3); }
| DROP DEFAULT { $$ = make_str("drop default"); }
;
@@ -1234,10 +1236,6 @@ key_reference: NO ACTION { $$ = make_str("no action"); }
| SET NULL_P { $$ = make_str("set null"); }
;
-opt_only: ONLY { $$ = make_str("only"); }
- | /*EMPTY*/ { $$ = EMPTY; }
- ;
-
OptInherit: INHERITS '(' relation_name_list ')' { $$ = cat_str(3, make_str("inherits ("), $3, make_str(")")); }
| /*EMPTY*/ { $$ = EMPTY; }
;
@@ -2013,10 +2011,9 @@ opt_force: FORCE { $$ = make_str("force"); }
*
*****************************************************************************/
-RenameStmt: ALTER TABLE relation_name opt_inh_star
- RENAME opt_column opt_name TO name
+RenameStmt: ALTER TABLE relation_expr RENAME opt_column opt_name TO name
{
- $$ = cat_str(8, make_str("alter table"), $3, $4, make_str("rename"), $6, $7, make_str("to"), $9);
+ $$ = cat_str(7, make_str("alter table"), $3, make_str("rename"), $5, $6, make_str("to"), $8);
}
;
@@ -2250,38 +2247,44 @@ ClusterStmt: CLUSTER index_name ON relation_name
*
* QUERY:
* vacuum
+ * analyze
*
*****************************************************************************/
-VacuumStmt: VACUUM opt_verbose opt_analyze
+VacuumStmt: VACUUM opt_verbose
+ {
+ $$ = cat_str(2, make_str("vacuum"), $2);
+ }
+ | VACUUM opt_verbose relation_name
{
$$ = cat_str(3, make_str("vacuum"), $2, $3);
}
- | VACUUM opt_verbose opt_analyze relation_name opt_va_list
+ | VACUUM opt_verbose AnalyzeStmt
{
- if ( strlen($5) > 0 && strlen($4) == 0 )
- mmerror(ET_ERROR, "VACUUM syntax error at or near \"(\"\n\tRelations name must be specified");
- $$ = cat_str(5, make_str("vacuum"), $2, $3, $4, $5);
+ $$ = cat_str(3, make_str("vacuum"), $2, $3);
}
;
-opt_verbose: VERBOSE { $$ = make_str("verbose"); }
- | /*EMPTY*/ { $$ = EMPTY; }
+AnalyzeStmt: analyze_keyword opt_verbose
+ {
+ $$ = cat_str(2, $1, $2);
+ }
+ | analyze_keyword opt_verbose relation_name opt_name_list
+ {
+ $$ = cat_str(4, $1, $2, $3, $4);
+ }
;
-opt_analyze: ANALYZE { $$ = make_str("analyze"); }
- | ANALYSE { $$ = make_str("analyse"); }
- | /*EMPTY*/ { $$ = EMPTY; }
+analyze_keyword: ANALYZE { $$ = make_str("analyze"); }
+ | ANALYSE { $$ = make_str("analyse"); }
;
-opt_va_list: '(' va_list ')' { $$ = cat_str(3, make_str("("), $2, make_str(")")); }
+opt_verbose: VERBOSE { $$ = make_str("verbose"); }
| /*EMPTY*/ { $$ = EMPTY; }
;
-va_list: name
- { $$=$1; }
- | va_list ',' name
- { $$=cat_str(3, $1, make_str(","), $3); }
+opt_name_list: '(' name_list ')' { $$ = cat_str(3, make_str("("), $2, make_str(")")); }
+ | /*EMPTY*/ { $$ = EMPTY; }
;
@@ -2383,9 +2386,9 @@ columnElem: ColId opt_indirection
*
*****************************************************************************/
-DeleteStmt: DELETE FROM opt_only name where_clause
+DeleteStmt: DELETE FROM relation_expr where_clause
{
- $$ = cat_str(4, make_str("delete from"), $3, $4, $5);
+ $$ = cat_str(3, make_str("delete from"), $3, $4);
}
;
@@ -2416,12 +2419,12 @@ opt_lmode: SHARE { $$ = make_str("share"); }
*
*****************************************************************************/
-UpdateStmt: UPDATE opt_only relation_name
+UpdateStmt: UPDATE relation_expr
SET update_target_list
from_clause
where_clause
{
- $$ = cat_str(7, make_str("update"), $2, $3, make_str("set"), $5, $6, $7);
+ $$ = cat_str(6, make_str("update"), $2, make_str("set"), $4, $5, $6);
}
;
@@ -2667,10 +2670,6 @@ select_offset_value: PosIntConst {
* ...however, recursive addattr and rename supported. make special
* cases for these.
*/
-opt_inh_star: '*' { $$ = make_str("*"); }
- | /*EMPTY*/ { $$ = EMPTY; }
- ;
-
relation_name_list: name_list { $$ = $1; };
name_list: name
@@ -2704,7 +2703,7 @@ opt_for_update_clause: for_update_clause { $$ = $1; }
| /* EMPTY */ { $$ = EMPTY; }
;
-update_list: OF va_list
+update_list: OF name_list
{
$$ = cat2_str(make_str("of"), $2);
}
@@ -5028,6 +5027,7 @@ TokenId: ABSOLUTE { $$ = make_str("absolute"); }
| SHARE { $$ = make_str("share"); }
| START { $$ = make_str("start"); }
| STATEMENT { $$ = make_str("statement"); }
+ | STATISTICS { $$ = make_str("statistics"); }
| STDIN { $$ = make_str("stdin"); }
| STDOUT { $$ = make_str("stdout"); }
| SYSID { $$ = make_str("sysid"); }
diff --git a/src/test/regress/expected/oidjoins.out b/src/test/regress/expected/oidjoins.out
index a2b0ad9e3e7..46bc60f6955 100644
--- a/src/test/regress/expected/oidjoins.out
+++ b/src/test/regress/expected/oidjoins.out
@@ -353,12 +353,28 @@ WHERE pg_statistic.starelid != 0 AND
-----+----------
(0 rows)
-SELECT oid, pg_statistic.staop
+SELECT oid, pg_statistic.staop1
FROM pg_statistic
-WHERE pg_statistic.staop != 0 AND
- NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop);
- oid | staop
------+-------
+WHERE pg_statistic.staop1 != 0 AND
+ NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1);
+ oid | staop1
+-----+--------
+(0 rows)
+
+SELECT oid, pg_statistic.staop2
+FROM pg_statistic
+WHERE pg_statistic.staop2 != 0 AND
+ NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2);
+ oid | staop2
+-----+--------
+(0 rows)
+
+SELECT oid, pg_statistic.staop3
+FROM pg_statistic
+WHERE pg_statistic.staop3 != 0 AND
+ NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3);
+ oid | staop3
+-----+--------
(0 rows)
SELECT oid, pg_trigger.tgrelid
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index 9d4ff1b3985..1b094a6e3bf 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -482,8 +482,8 @@ WHERE p1.aggtransfn = p2.oid AND
(p2.pronargs = 1 AND p1.aggbasetype = 0)));
oid | aggname | oid | proname
-------+---------+-----+-------------
- 16997 | max | 768 | int4larger
- 17011 | min | 769 | int4smaller
+ 17010 | max | 768 | int4larger
+ 17024 | min | 769 | int4smaller
(2 rows)
-- Cross-check finalfn (if present) against its entry in pg_proc.
diff --git a/src/test/regress/sql/oidjoins.sql b/src/test/regress/sql/oidjoins.sql
index b7ea1f63eaa..88727a6c76e 100644
--- a/src/test/regress/sql/oidjoins.sql
+++ b/src/test/regress/sql/oidjoins.sql
@@ -177,10 +177,18 @@ SELECT oid, pg_statistic.starelid
FROM pg_statistic
WHERE pg_statistic.starelid != 0 AND
NOT EXISTS(SELECT * FROM pg_class AS t1 WHERE t1.oid = pg_statistic.starelid);
-SELECT oid, pg_statistic.staop
+SELECT oid, pg_statistic.staop1
FROM pg_statistic
-WHERE pg_statistic.staop != 0 AND
- NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop);
+WHERE pg_statistic.staop1 != 0 AND
+ NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1);
+SELECT oid, pg_statistic.staop2
+FROM pg_statistic
+WHERE pg_statistic.staop2 != 0 AND
+ NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2);
+SELECT oid, pg_statistic.staop3
+FROM pg_statistic
+WHERE pg_statistic.staop3 != 0 AND
+ NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3);
SELECT oid, pg_trigger.tgrelid
FROM pg_trigger
WHERE pg_trigger.tgrelid != 0 AND